Thrill  0.1
triangles_run.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/triangles/triangles_run.cpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Alexander Noe <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
13 
14 #include <thrill/api/cache.hpp>
15 #include <thrill/api/generate.hpp>
17 #include <thrill/api/size.hpp>
18 #include <thrill/common/logger.hpp>
19 #include <tlx/cmdline_parser.hpp>
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <functional>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 using namespace thrill; // NOLINT
30 
31 using Node = size_t;
32 using Edge = std::pair<Node, Node>;
33 
34 static size_t CountTrianglesPerLine(
35  api::Context& ctx,
36  const std::vector<std::string>& input_path) {
37  auto edges = ReadLines(ctx, input_path).template FlatMap<Edge>(
38  [](const std::string& input, auto emit) {
39  // parse "source\ttarget\ttarget...\n" lines
40  char* endptr;
41  unsigned long src = std::strtoul(input.c_str(), &endptr, 10);
42  // die_unless(endptr && *endptr == ' ' &&
43  // "Could not parse src tgt line");
44  while (*endptr != 0) {
45  unsigned long tgt = std::strtoul(endptr + 1, &endptr, 10);
46 
47  if (src < tgt) {
48  emit(std::make_pair(src, tgt));
49  }
50  else {
51  // do not emit when src >= tgt (will be emitted when on other
52  // side of edge)
53  }
54  }
55  }).Keep();
56 
58 }
59 
61  api::Context& ctx,
62  const ZipfGraphGen& base_graph_gen,
63  const size_t& num_vertices) {
64 
65  auto edge_lists = Generate(
66  ctx, num_vertices,
67  [graph_gen = ZipfGraphGen(base_graph_gen, num_vertices),
68  rng = std::default_random_engine(std::random_device { } ())](
69  size_t index) mutable {
70  return std::make_pair(index, graph_gen.GenerateOutgoing(rng));
71  });
72 
73  auto edges = edge_lists.template FlatMap<Edge>(
74  [](std::pair<Node, std::vector<Node> > neighbors, auto emit) {
75  for (auto neighbor : neighbors.second) {
76  if (neighbors.first > neighbor) {
77  emit(std::make_pair(neighbor, neighbors.first));
78  }
79  else {
80  if (neighbors.first < neighbor) {
81  // emit(std::make_pair(neighbors.first, neighbor));
82  }
83  // self-loop: do not emit
84  }
85  }
86  }).Keep().Cache().Execute();
87 
88  ctx.net.Barrier();
90 
91  const bool use_detection = true;
92 
93  size_t triangles = examples::triangles::CountTriangles<use_detection>(edges);
94 
95  ctx.net.Barrier();
96 
97  if (ctx.my_rank() == 0) {
98  if (use_detection) {
99  LOG1 << "RESULT " << "benchmark=triangles " << "detection=ON"
100  << " vertices=" << num_vertices
101  << " time=" << timer
102  << " traffic=" << ctx.net_manager().Traffic()
103  << " hosts=" << ctx.num_hosts();
104  }
105  else {
106  LOG1 << "RESULT " << "benchmark=triangles " << "detection=OFF"
107  << " vertices=" << num_vertices
108  << " time=" << timer
109  << " traffic=" << ctx.net_manager().Traffic()
110  << " hosts=" << ctx.num_hosts();
111  }
112  }
113 
114  return triangles;
115 }
116 
117 int main(int argc, char* argv[]) {
118 
119  tlx::CmdlineParser clp;
120 
121  bool generate = false;
122  clp.add_bool('g', "generate", generate,
123  "generate graph data, set input = #pages");
124 
125  size_t num_vertices;
126 
127  clp.add_size_t('n', "vertices", num_vertices, "Number of vertices");
128 
129  // Graph Generator
130  ZipfGraphGen gg(1);
131 
132  clp.add_double(0, "size_mean", gg.size_mean,
133  "generated: mean of number of outgoing links, "
134  "default: " + std::to_string(gg.size_mean));
135 
136  clp.add_double(0, "size_var", gg.size_var,
137  "generated: variance of number of outgoing links, "
138  "default: " + std::to_string(gg.size_var));
139 
140  clp.add_double(0, "link_scale", gg.link_zipf_scale,
141  "generated: Zipf scale parameter for outgoing links, "
142  "default: " + std::to_string(gg.link_zipf_scale));
143 
144  clp.add_double(0, "link_exponent", gg.link_zipf_exponent,
145  "generated: Zipf exponent parameter for outgoing links, "
146  "default: " + std::to_string(gg.link_zipf_exponent));
147 
148  std::vector<std::string> input_path;
149  clp.add_param_stringlist("input", input_path,
150  "input file pattern(s)");
151 
152  if (!clp.process(argc, argv)) {
153  return -1;
154  }
155 
156  die_unless(!generate || input_path.size() == 1);
157 
158  clp.print_result();
159 
160  return api::Run(
161  [&](api::Context& ctx) {
162  ctx.enable_consume();
163 
164  size_t triangles;
165  if (generate) {
166  triangles = CountTrianglesGenerated(
167  ctx, gg, num_vertices);
168  }
169  else {
170  triangles = CountTrianglesPerLine(
171  ctx, input_path);
172  }
173 
174  return triangles;
175  });
176 }
177 
178 /******************************************************************************/
net::FlowControlChannel & net
Definition: context.hpp:446
auto Generate(Context &ctx, size_t size, const GenerateFunction &generate_function)
Generate is a Source-DOp, which creates a DIA of given size using a generator function.
Definition: generate.hpp:87
#define die_unless(X)
Definition: die.hpp:27
std::pair< Node, Node > Edge
Definition: triangles.hpp:21
size_t num_hosts() const
Returns the total number of hosts.
Definition: context.hpp:233
#define LOG1
Definition: logger.hpp:28
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
Definition: context.cpp:947
void Barrier()
A trivial global barrier.
void add_size_t(char key, const std::string &longkey, size_t &dest, const std::string &desc)
add size_t option -key, –longkey with description and store to dest
size_t Node
Definition: triangles.hpp:20
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
Definition: context.hpp:221
void enable_consume(bool consume=true)
Sets consume-mode flag such that DIA contents may be consumed during PushData().
Definition: context.hpp:388
DIA< std::string > ReadLines(Context &ctx, const std::string &filepath)
ReadLines is a DOp, which reads a file from the file system and creates an ordered DIA according to a...
Definition: read_lines.hpp:454
static by_string to_string(int val)
convert to string
double size_mean
Gaussian mean and variance of content length.
void print_result(std::ostream &os)
print nicely formatted result of processing
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
Command line parser which automatically fills variables and prints nice usage messages.
net::Traffic Traffic() const
calculate overall traffic for final stats
Definition: group.cpp:67
size_t my_rank() const
Global rank of this worker among all other workers in the system.
Definition: context.hpp:243
void add_double(char key, const std::string &longkey, double &dest, const std::string &desc)
add double option -key, –longkey with description and store to dest
size_t CountTriangles(const DIA< Edge, Stack > &edges)
Definition: triangles.hpp:46
void add_param_stringlist(const std::string &name, std::vector< std::string > &dest, const std::string &desc)
void add_bool(char key, const std::string &longkey, bool &dest, const std::string &desc)
net::Manager & net_manager()
Definition: context.hpp:334
static size_t CountTrianglesGenerated(api::Context &ctx, const ZipfGraphGen &base_graph_gen, const size_t &num_vertices)
int main(int argc, char *argv[])
bool process(int argc, const char *const *argv, std::ostream &os)
static size_t CountTrianglesPerLine(api::Context &ctx, const std::vector< std::string > &input_path)