Thrill  0.1
zipf_graph_gen.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/page_rank/zipf_graph_gen.hpp
3  *
4  * A simple graph generator for the PageRank benchmark inspired by HiBench's
5  * generator. The number of outgoing links of each page is Gaussian distributed,
6  * by default with mean 50 and variance 10, and the link targets themselves
7  * follow a Zipf-Mandelbrot distribution with very small scale parameter, such
8  * that the pages with low id numbers have a slightly higher probability than
9  * the rest.
10  *
11  * Part of Project Thrill - http://project-thrill.org
12  *
13  * Copyright (C) 2016 Timo Bingmann <[email protected]>
14  *
15  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
16  ******************************************************************************/
17 
18 #pragma once
19 #ifndef THRILL_EXAMPLES_PAGE_RANK_ZIPF_GRAPH_GEN_HEADER
20 #define THRILL_EXAMPLES_PAGE_RANK_ZIPF_GRAPH_GEN_HEADER
21 
22 #include <thrill/common/string.hpp>
24 
25 #include <algorithm>
26 #include <string>
27 #include <vector>
28 
29 namespace examples {
30 namespace page_rank {
31 
33 {
34 public:
36 
37  //! number of pages in graph
38  uint64_t pages;
39 
40  //! Gaussian mean and variance of content length
41  double size_mean = 50;
42  double size_var = 10;
43 
44  //! Zipf distribution scale and exponent for generating outgoing links over
45  //! the page number universe.
46  double link_zipf_scale = 0.3;
47  double link_zipf_exponent = 0.5;
48 
49  explicit ZipfGraphGen(uint64_t _pages)
50  : pages(_pages)
51  { Initialize(); }
52 
53  ZipfGraphGen(const ZipfGraphGen& base, uint64_t _pages)
54  : pages(_pages),
55  size_mean(base.size_mean), size_var(base.size_var),
56  link_zipf_scale(base.link_zipf_scale),
57  link_zipf_exponent(base.link_zipf_exponent)
58  { Initialize(); }
59 
60  //! reinitialize the random generator if parameters were changed.
61  void Initialize(uint64_t _pages) {
62  pages = _pages;
63 
64  content_length_dist_ = std::normal_distribution<double>(
66 
68  pages, link_zipf_scale, link_zipf_exponent);
69  }
70 
71  //! reinitialize the random generator if parameters were changed.
72  void Initialize() {
73  return Initialize(pages);
74  }
75 
76  template <typename Generator>
77  std::vector<size_t> GenerateOutgoing(Generator& rng) {
78  double dsize = content_length_dist_(rng);
79  if (dsize < 0) dsize = 0;
80 
81  size_t size = static_cast<size_t>(std::round(dsize));
82 
83  std::vector<size_t> result;
84  result.reserve(size);
85 
86  for (size_t i = 0; i < size; ++i) {
87  result.emplace_back(link_zipf_(rng) - 1);
88  }
89  std::sort(result.begin(), result.end());
90  return result;
91  }
92 
93 private:
94  //! Gaussian random variable for content length of a page
95  std::normal_distribution<double> content_length_dist_;
96 
97  //! Zipf random variable for outgoing links.
99 };
100 
101 } // namespace page_rank
102 } // namespace examples
103 
104 #endif // !THRILL_EXAMPLES_PAGE_RANK_ZIPF_GRAPH_GEN_HEADER
105 
106 /******************************************************************************/
std::vector< size_t > GenerateOutgoing(Generator &rng)
Definition: bfs.hpp:21
thrill::common::ZipfDistribution ZipfDistribution
A class for producing random integers distributed according to the Zipf-Mandelbrot probability mass f...
double size_mean
Gaussian mean and variance of content length.
void Initialize(uint64_t _pages)
reinitialize the random generator if parameters were changed.
std::normal_distribution< double > content_length_dist_
Gaussian random variable for content length of a page.
uint64_t pages
number of pages in graph
ZipfGraphGen(const ZipfGraphGen &base, uint64_t _pages)
void Initialize()
reinitialize the random generator if parameters were changed.
ZipfDistribution link_zipf_
Zipf random variable for outgoing links.