Thrill  0.1
zipf_graph_gen.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/page_rank/zipf_graph_gen.cpp
3  *
4  * A simple graph generator for the PageRank benchmark inspired by HiBench's
5  * generator. The number of outgoing links of each page is Gaussian distributed,
6  * by default with mean 50 and variance 10, and the link targets themselves
7  * follow a Zipf-Mandelbrot distribution with very small scale parameter, such
8  * that the pages with low id numbers have a slightly higher probability than
9  * the rest.
10  *
11  * Part of Project Thrill - http://project-thrill.org
12  *
13  * Copyright (C) 2016 Timo Bingmann <[email protected]>
14  *
15  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
16  ******************************************************************************/
17 
19 
20 #include <thrill/common/logger.hpp>
21 #include <tlx/cmdline_parser.hpp>
22 
23 #include <iostream>
24 #include <string>
25 #include <vector>
26 
27 using namespace thrill; // NOLINT
28 using namespace examples::page_rank; // NOLINT
29 
30 int main(int argc, char* argv[]) {
31 
33 
34  // Graph Generator
35  ZipfGraphGen gg(1);
36 
37  uint64_t pages;
38  clp.add_param_bytes("pages", pages, "number of pages");
39 
40  bool group = false;
41  clp.add_bool('g', "group", group, "group outgoing links");
42 
43  clp.add_double('m', "size_mean", gg.size_mean,
44  "mean of number of outgoing links, default: "
46 
47  clp.add_double(0, "size_var", gg.size_var,
48  "variance of number of outgoing links, default: "
49  + std::to_string(gg.size_var));
50 
51  clp.add_double(0, "link_scale", gg.link_zipf_scale,
52  "Zipf scale parameter for outgoing links, default: "
54 
55  clp.add_double(0, "link_exponent", gg.link_zipf_exponent,
56  "Zipf exponent parameter for outgoing links, default: "
58 
59  if (!clp.process(argc, argv)) {
60  return -1;
61  }
62 
63  // reinitialize graph generator with parameters from the command line.
64  gg.Initialize(pages);
65 
66  //! underlying random number generator
67  std::default_random_engine rng(std::random_device { } ());
68 
69  for (size_t p = 0; p < pages; ++p)
70  {
71  std::vector<size_t> result = gg.GenerateOutgoing(rng);
72  if (group) {
73  for (size_t i = 0; i < result.size(); ++i) {
74  if (i != 0) std::cout << ' ';
75  std::cout << result[i];
76  }
77  std::cout << '\n';
78  }
79  else {
80  for (const size_t& out : result) {
81  std::cout << p << '\t' << out << '\n';
82  }
83  }
84  }
85 
86  return 0;
87 }
88 
89 /******************************************************************************/
std::vector< size_t > GenerateOutgoing(Generator &rng)
void add_param_bytes(const std::string &name, uint32_t &dest, const std::string &desc)
static by_string to_string(int val)
convert to string
double size_mean
Gaussian mean and variance of content length.
void Initialize(uint64_t _pages)
reinitialize the random generator if parameters were changed.
Command line parser which automatically fills variables and prints nice usage messages.
int main(int argc, char *argv[])
void add_double(char key, const std::string &longkey, double &dest, const std::string &desc)
add double option -key, –longkey with description and store to dest
void add_bool(char key, const std::string &longkey, bool &dest, const std::string &desc)
bool process(int argc, const char *const *argv, std::ostream &os)