Thrill  0.1
polynomial_regression.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/math/polynomial_regression.hpp
3  *
4  * Part of tlx - http://panthema.net/tlx
5  *
6  * Copyright (C) 2018 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the Boost Software License, Version 1.0
9  ******************************************************************************/
10 
11 #ifndef TLX_MATH_POLYNOMIAL_REGRESSION_HEADER
12 #define TLX_MATH_POLYNOMIAL_REGRESSION_HEADER
13 
14 #include <cmath>
15 #include <cstdlib>
16 #include <vector>
17 
18 namespace tlx {
19 
20 //! \addtogroup tlx_math
21 //! \{
22 
23 /*!
24  * Calculate the regression polynomial \f$ a_0+a_1x^1+a_2x^2+\cdots+a_nx^n \f$
25  * from a list of 2D points.
26  * See also https://en.wikipedia.org/wiki/Polynomial_regression
27  *
28  * If WithStore is false, then the sums are aggregated directly such that the
29  * class retains O(1) size independent of the number of points. if WithStore is
30  * true then the points are stored in a vector and can be retrieved.
31  */
32 template <typename Type = double, bool WithStore = false>
34 {
35 public:
36  //! start new polynomial regression calculation
37  PolynomialRegression(size_t order)
38  : order_(order), size_(0), X_(2 * order + 1, 0), Y_(order + 1, 0)
39  { }
40 
41  //! number of points
42  size_t size() const {
43  return size_;
44  }
45 
46  //! 2D point
47  struct Point {
48  Type x, y;
49  };
50 
51  //! add point. this invalidates cached coefficients until next evaluate()
52  PolynomialRegression& add(const Type& x, const Type& y) {
53  X_[0] += 1.0;
54  if (1 < 2 * order_ + 1)
55  X_[1] += x;
56  if (2 < 2 * order_ + 1)
57  X_[2] += x * x;
58  for (size_t i = 3; i < 2 * order_ + 1; ++i)
59  X_[i] += Type(std::pow(x, i));
60 
61  Y_[0] += y;
62  if (1 < order_ + 1)
63  Y_[1] += x * y;
64  if (2 < order_ + 1)
65  Y_[2] += x * x * y;
66  for (size_t i = 3; i < order_ + 1; ++i)
67  Y_[i] += Type(std::pow(x, i)) * y;
68 
69  if (WithStore)
70  points_.emplace_back(Point { x, y });
71 
72  ++size_;
73  coefficients_.clear();
74  return *this;
75  }
76 
77  //! return a point. Only available if WithStore is true.
78  const Point& point(size_t i) {
79  return points_[i];
80  }
81 
82  //! polynomial stored as the coefficients of
83  //! \f$ a_0+a_1 x^1+a_2 x^2+\cdots+a_n x^n \f$
84  struct Coefficients : public std::vector<Type> {
85  //! evaluate polynomial at x using Horner schema
86  Type evaluate(const Type& x) const {
87  Type result = 0.0;
88  for (size_t i = this->size(); i != 0; ) {
89  result = result * x + this->operator [] (--i);
90  }
91  return result;
92  }
93  };
94 
95  //! get r^2. Only available if WithStore is true.
97  if (size_ == 0 || !WithStore)
98  return NAN;
99 
100  //! mean value of y
101  Type y_mean = Y_[0] / size_;
102 
103  // the sum of the squares of the differences between the measured y
104  // values and the mean y value
105  Type ss_total = 0.0;
106 
107  // the sum of the squares of the difference between the measured y
108  // values and the values of y predicted by the polynomial
109  Type ss_error = 0.0;
110 
111  for (const Point& p : points_) {
112  ss_total += (p.y - y_mean) * (p.y - y_mean);
113  Type y = evaluate(p.x);
114  ss_error += (p.y - y) * (p.y - y);
115  }
116 
117  // 1 - (residual sum of squares / total sum of squares)
118  return 1.0 - ss_error / ss_total;
119  }
120 
121  //! returns value of y predicted by the polynomial for a given value of x
122  Type evaluate(const Type& x) {
123  return coefficients().evaluate(x);
124  }
125 
126  //! return coefficients vector
128  if (coefficients_.empty()) {
130  }
131  return coefficients_;
132  }
133 
134 protected:
135  //! polynomial order
136  size_t order_;
137 
138  //! list of stored points if WithStore, else empty.
139  std::vector<Point> points_;
140 
141  //! number of points added
142  size_t size_;
143 
144  //! X_ = vector that stores values of sigma(x_i^2n)
145  std::vector<Type> X_;
146 
147  //! Y_ = vector to store values of sigma(x_i^order * y_i)
148  std::vector<Type> Y_;
149 
150  //! cached coefficients
152 
153  //! polynomial regression by inverting a Vandermonde matrix.
155 
156  coefficients_.clear();
157 
158  if (size_ == 0)
159  return;
160 
161  size_t np1 = order_ + 1, np2 = order_ + 2;
162 
163  // B = normal augmented matrix that stores the equations.
164  std::vector<Type> B(np1 * np2, 0);
165 
166  for (size_t i = 0; i <= order_; ++i) {
167  for (size_t j = 0; j <= order_; ++j) {
168  B[i * np2 + j] = X_[i + j];
169  }
170  }
171 
172  // load values of Y_ as last column of B
173  for (size_t i = 0; i <= order_; ++i) {
174  B[i * np2 + np1] = Y_[i];
175  }
176 
177  // pivotization of the B matrix.
178  for (size_t i = 0; i < order_ + 1; ++i) {
179  for (size_t k = i + 1; k < order_ + 1; ++k) {
180  if (B[i * np2 + i] < B[k * np2 + i]) {
181  for (size_t j = 0; j <= order_ + 1; ++j) {
182  std::swap(B[i * np2 + j], B[k * np2 + j]);
183  }
184  }
185  }
186  }
187 
188  // perform Gaussian elimination.
189  for (size_t i = 0; i < order_; ++i) {
190  for (size_t k = i + 1; k < order_ + 1; ++k) {
191  Type t = B[k * np2 + i] / B[i * np2 + i];
192  for (size_t j = 0; j <= order_ + 1; ++j) {
193  B[k * np2 + j] -= t * B[i * np2 + j];
194  }
195  }
196  }
197 
198  // back substitution to calculate final coefficients.
199  coefficients_.resize(np1);
200  for (size_t i = order_ + 1; i != 0; ) {
201  --i;
202  coefficients_[i] = B[i * np2 + order_ + 1];
203  for (size_t j = 0; j < order_ + 1; ++j) {
204  if (j != i)
205  coefficients_[i] -= B[i * np2 + j] * coefficients_[j];
206  }
207  coefficients_[i] /= B[i * np2 + i];
208  }
209  }
210 };
211 
212 //! \}
213 
214 } // namespace tlx
215 
216 #endif // !TLX_MATH_POLYNOMIAL_REGRESSION_HEADER
217 
218 /******************************************************************************/
Calculate the regression polynomial from a list of 2D points.
size_t size() const
number of points
const Point & point(size_t i)
return a point. Only available if WithStore is true.
Type
VFS object type.
Definition: file_io.hpp:52
Type evaluate(const Type &x)
returns value of y predicted by the polynomial for a given value of x
size_t size_
number of points added
size_t order_
polynomial order
Type evaluate(const Type &x) const
evaluate polynomial at x using Horner schema
Coefficients coefficients_
cached coefficients
std::vector< Point > points_
list of stored points if WithStore, else empty.
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
const Coefficients & coefficients()
return coefficients vector
std::vector< Type > Y_
Y_ = vector to store values of sigma(x_i^order * y_i)
std::vector< Type > X_
X_ = vector that stores values of sigma(x_i^2n)
void fit_coefficients()
polynomial regression by inverting a Vandermonde matrix.
std::vector< T, Allocator< T > > vector
vector with Manager tracking
Definition: allocator.hpp:228
PolynomialRegression(size_t order)
start new polynomial regression calculation
Type r_square()
get r^2. Only available if WithStore is true.
PolynomialRegression & add(const Type &x, const Type &y)
add point. this invalidates cached coefficients until next evaluate()