Thrill  0.1
aggregate.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/math/aggregate.hpp
3  *
4  * Part of tlx - http://panthema.net/tlx
5  *
6  * Copyright (C) 2015-2018 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the Boost Software License, Version 1.0
9  ******************************************************************************/
10 
11 #ifndef TLX_MATH_AGGREGATE_HEADER
12 #define TLX_MATH_AGGREGATE_HEADER
13 
14 #include <tlx/define/likely.hpp>
15 
16 #include <algorithm>
17 #include <cmath>
18 #include <limits>
19 
20 namespace tlx {
21 
22 //! \addtogroup tlx_math
23 //! \{
24 
25 /*!
26  * Calculate running aggregate statistics: feed it with values, and it will keep
27  * the minimum, the maximum, the average, the value number, and the standard
28  * deviation is values.
29  */
30 template <typename Type_>
31 class Aggregate
32 {
33 public:
34  using Type = Type_;
35 
36  //! default constructor
37  Aggregate() = default;
38 
39  //! initializing constructor
40  Aggregate(size_t count, const double& mean, const double& nvar,
41  const Type& min, const Type& max) noexcept
42  : count_(count), mean_(mean), nvar_(nvar),
43  min_(min), max_(max) { }
44 
45  //! add a value to the running aggregation
46  Aggregate& add(const Type& value) noexcept {
47  count_++;
48  min_ = std::min(min_, value);
49  max_ = std::max(max_, value);
50  // Single-pass numerically stable mean and standard deviation
51  // calculation as described in Donald Knuth: The Art of Computer
52  // Programming, Volume 2, Chapter 4.2.2, Equations 15 & 16
53  double delta = value - mean_;
54  mean_ += delta / count_;
55  nvar_ += delta * (value - mean_);
56  return *this;
57  }
58 
59  //! return number of values aggregated
60  size_t count() const noexcept { return count_; }
61 
62  //! return sum over all values aggregated
63  // can't make noexcept since Type_'s conversion is allowed to throw
64  const Type sum() const { return static_cast<Type>(count_ * mean_); }
65 
66  //! return sum over all values aggregated
67  const Type total() const { return sum(); }
68 
69  //! return the average over all values aggregated
70  double average() const noexcept { return mean_; }
71 
72  //! return the average over all values aggregated
73  double avg() const noexcept { return average(); }
74 
75  //! return the average over all values aggregated
76  double mean() const noexcept { return average(); }
77 
78  //! return minimum over all values aggregated
79  const Type& min() const noexcept { return min_; }
80 
81  //! return maximum over all values aggregated
82  const Type& max() const noexcept { return max_; }
83 
84  //! return maximum - minimum over all values aggregated
85  Type span() const noexcept { return max_ - min_; }
86 
87  //! return the variance of all values aggregated.
88  //! ddof = delta degrees of freedom
89  //! Set to 0 if you have the entire distribution
90  //! Set to 1 if you have a sample (to correct for bias)
91  double variance(size_t ddof = 1) const {
92  if (count_ <= 1) return 0.0;
93  return nvar_ / static_cast<double>(count_ - ddof);
94  }
95 
96  //! return the variance of all values aggregated.
97  //! ddof = delta degrees of freedom
98  //! Set to 0 if you have the entire distribution
99  //! Set to 1 if you have a sample (to correct for bias)
100  double var(size_t ddof = 1) const {
101  return variance(ddof);
102  }
103 
104  //! return the standard deviation of all values aggregated.
105  //! ddof = delta degrees of freedom
106  //! Set to 0 if you have the entire distribution
107  //! Set to 1 if you have a sample (to correct for bias)
108  double standard_deviation(size_t ddof = 1) const {
109  return std::sqrt(variance(ddof));
110  }
111 
112  //! return the standard deviation of all values aggregated.
113  //! ddof = delta degrees of freedom
114  //! Set to 0 if you have the entire distribution
115  //! Set to 1 if you have a sample (to correct for bias)
116  double stdev(size_t ddof = 1) const { return standard_deviation(ddof); }
117 
118  //! operator + to combine two Aggregate<>
119  Aggregate operator + (const Aggregate& a) const noexcept {
120  return Aggregate(
121  // count
122  count_ + a.count_,
123  // mean
124  combine_means(a),
125  // merging variance is a bit complicated
126  combine_variance(a),
127  // min, max
128  std::min(min_, a.min_), std::max(max_, a.max_));
129  }
130 
131  //! operator += to combine two Aggregate<>
132  Aggregate& operator += (const Aggregate& a) noexcept {
133  mean_ = combine_means(a);
134  min_ = std::min(min_, a.min_);
135  max_ = std::max(max_, a.max_);
136  nvar_ = combine_variance(a);
137  count_ += a.count_;
138  return *this;
139  }
140 
141  //! serialization method for cereal.
142  template <typename Archive>
143  void serialize(Archive& archive) {
144  archive(count_, mean_, nvar_, min_, max_);
145  }
146 
147 private:
148  //! combine means, check if either count is zero. fix problems with NaN
149  double combine_means(const Aggregate& a) const noexcept {
150  if (count_ == 0)
151  return a.mean_;
152  if (a.count_ == 0)
153  return mean_;
154  return (mean_ * count_ + a.mean_ * a.count_) / (count_ + a.count_);
155  }
156 
157  //! T. Chan et al 1979, "Updating Formulae and a Pairwise Algorithm for
158  //! Computing Sample Variances"
159  double combine_variance(const Aggregate& other) const noexcept {
160  double delta = mean_ - other.mean_;
161  return nvar_ + other.nvar_ + (delta * delta) *
162  (count_ * other.count_) / (count_ + other.count_);
163  }
164 
165  //! number of values aggregated
166  size_t count_ = 0;
167 
168  //! mean of values
169  double mean_ = 0.0;
170 
171  //! approximate count * variance; stddev = sqrt(nvar / (count-1))
172  double nvar_ = 0.0;
173 
174  //! minimum value
176 
177  //! maximum value
178  Type max_ = std::numeric_limits<Type>::lowest();
179 };
180 
181 //! \}
182 
183 } // namespace tlx
184 
185 #endif // !TLX_MATH_AGGREGATE_HEADER
186 
187 /******************************************************************************/
double combine_variance(const Aggregate &other) const noexcept
Definition: aggregate.hpp:159
static uint_pair max()
return an uint_pair instance containing the largest value possible
Definition: uint_types.hpp:226
Aggregate & add(const Type &value) noexcept
add a value to the running aggregation
Definition: aggregate.hpp:46
Type span() const noexcept
return maximum - minimum over all values aggregated
Definition: aggregate.hpp:85
static constexpr double delta
Definition: select.hpp:35
double stdev(size_t ddof=1) const
Definition: aggregate.hpp:116
Aggregate & operator+=(const Aggregate &a) noexcept
operator += to combine two Aggregate<>
Definition: aggregate.hpp:132
double var(size_t ddof=1) const
Definition: aggregate.hpp:100
double mean() const noexcept
return the average over all values aggregated
Definition: aggregate.hpp:76
Aggregate()=default
default constructor
const Type sum() const
return sum over all values aggregated
Definition: aggregate.hpp:64
double standard_deviation(size_t ddof=1) const
Definition: aggregate.hpp:108
void serialize(Archive &archive)
serialization method for cereal.
Definition: aggregate.hpp:143
int value
Definition: gen_data.py:41
double mean_
mean of values
Definition: aggregate.hpp:169
Aggregate(size_t count, const double &mean, const double &nvar, const Type &min, const Type &max) noexcept
initializing constructor
Definition: aggregate.hpp:40
Type min_
minimum value
Definition: aggregate.hpp:175
const Type & min() const noexcept
return minimum over all values aggregated
Definition: aggregate.hpp:79
Type max_
maximum value
Definition: aggregate.hpp:178
double avg() const noexcept
return the average over all values aggregated
Definition: aggregate.hpp:73
static uint_pair min()
return an uint_pair instance containing the smallest value possible
Definition: uint_types.hpp:217
const Type total() const
return sum over all values aggregated
Definition: aggregate.hpp:67
size_t count_
number of values aggregated
Definition: aggregate.hpp:166
double average() const noexcept
return the average over all values aggregated
Definition: aggregate.hpp:70
double variance(size_t ddof=1) const
Definition: aggregate.hpp:91
const Type & max() const noexcept
return maximum over all values aggregated
Definition: aggregate.hpp:82
size_t count() const noexcept
return number of values aggregated
Definition: aggregate.hpp:60
double combine_means(const Aggregate &a) const noexcept
combine means, check if either count is zero. fix problems with NaN
Definition: aggregate.hpp:149
Aggregate operator+(const Aggregate &a) const noexcept
operator + to combine two Aggregate<>
Definition: aggregate.hpp:119
double nvar_
approximate count * variance; stddev = sqrt(nvar / (count-1))
Definition: aggregate.hpp:172
Calculate running aggregate statistics: feed it with values, and it will keep the minimum...
Definition: aggregate.hpp:31