Thrill  0.1
zipf_distribution.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/common/zipf_distribution.hpp
3  *
4  * A Zipf distribution random generator using std::discrete_distribution and a
5  * computed probability table.
6  *
7  * Code borrowed from
8  * https://github.com/gkohri/discreteRNG/blob/master/zipf-mandelbrot.h
9  *
10  * Copyright (c) 2015, G.A. Kohring
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  *
17  * * Redistributions of source code must retain the above copyright
18  * notice, this list of conditions and the following disclaimer.
19  *
20  * * Redistributions in binary form must reproduce the above copyright
21  * notice, this list of conditions and the following disclaimer in the
22  * documentation and/or other materials provided with the distribution.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  * Part of Project Thrill - http://project-thrill.org
37  *
38  * Copyright (C) 2016 Timo Bingmann <[email protected]>
39  *
40  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
41  ******************************************************************************/
42 
43 #pragma once
44 #ifndef THRILL_COMMON_ZIPF_DISTRIBUTION_HEADER
45 #define THRILL_COMMON_ZIPF_DISTRIBUTION_HEADER
46 
47 #include <thrill/common/logger.hpp>
48 
49 #include <random>
50 #include <vector>
51 
52 namespace thrill {
53 namespace common {
54 
55 /*!
56  * A class for producing random integers distributed according to the
57  * Zipf-Mandelbrot probability mass function:
58  *
59  * p(k;N,q,s) = 1/( H(N,q,s)*(k+q)^s )
60  *
61  * where
62  * H(N,q,s) = sum_(n=1)^(N) 1/(n+q)^s
63  *
64  * and s > 1, q >= 0, N > 1.
65  *
66  * When q = 0 this becomes the mass function for Zipf's Law
67  * When N -> infinity this becomes the Hurwitz Zeta mass function
68  * When N -> infinity and q = 0, this becomes the Riemann Zeta mass function
69  */
71 {
72 public:
73  //! create uninitialized object. be careful.
74  ZipfDistribution() = default;
75 
76  /*!
77  * Creates a new Zipf-Mandelbrot distribution given s, q, N
78  *
79  * p(k;N,q,s) = 1/( H(N,q,s)*(k+q)^s )
80  *
81  * where
82  * H(N,q,s) = sum_(n=1)^(N) 1/(n+q)^s
83  *
84  * and s > 1, q >= 0, N > 1
85  *
86  * Only N and s needs to be specified. The default for q is 0.
87  */
88  ZipfDistribution(const size_t N, const double s, const double q = 0)
89  : N_(N), s_(s), q_(q),
90  dist_(make_dist(N, s, q)) { }
91 
92  //! pick next random number in the range [1,num)
93  template <typename Engine>
94  size_t operator () (Engine& eng) { return dist_(eng) + 1; }
95 
96  //! deliver population size
97  size_t N() const { return N_; }
98 
99  //! parameter of distribution
100  double q() const { return q_; }
101 
102  //! parameter of distribution
103  double s() const { return s_; }
104 
105  //! minimum value of distribution
106  size_t min() const { return 1; }
107 
108  //! maximum value (inclusive) of distribution
109  size_t max() const { return N_; }
110 
111 private:
112  size_t N_;
113  double s_;
114  double q_;
115 
116  using dist_type = std::discrete_distribution<size_t>;
118 
119  static dist_type make_dist(size_t N, double s, double q) {
120  if (!(s > 0.0)) {
121  LOG1 << "s (" << s << ") must be greater than 0.0.";
122  abort();
123  }
124 
125  std::vector<double> probs(N);
126 
127  double p_sum = 0.0;
128  for (size_t k = 1; k < N + 1; ++k) {
129  double prob = 1.0 / std::pow(static_cast<double>(k) + q, s);
130  p_sum += prob;
131  probs[k - 1] = prob;
132  }
133 
134  double p_norm = 1.0 / p_sum;
135  for (size_t i = 0; i < N; ++i) {
136  probs[i] *= p_norm;
137  }
138 
139  return dist_type(probs.begin(), probs.end());
140  }
141 };
142 
143 } // namespace common
144 } // namespace thrill
145 
146 #endif // !THRILL_COMMON_ZIPF_DISTRIBUTION_HEADER
147 
148 /******************************************************************************/
size_t max() const
maximum value (inclusive) of distribution
#define LOG1
Definition: logger.hpp:28
ZipfDistribution()=default
create uninitialized object. be careful.
std::discrete_distribution< size_t > dist_type
size_t min() const
minimum value of distribution
A class for producing random integers distributed according to the Zipf-Mandelbrot probability mass f...
double s() const
parameter of distribution
size_t N() const
deliver population size
static dist_type make_dist(size_t N, double s, double q)
ZipfDistribution(const size_t N, const double s, const double q=0)
Creates a new Zipf-Mandelbrot distribution given s, q, N.
size_t operator()(Engine &eng)
pick next random number in the range [1,num)
double q() const
parameter of distribution