13 #ifndef THRILL_EXAMPLES_STOCHASTIC_GRADIENT_DESCENT_STOCHASTIC_GRADIENT_DESCENT_HEADER 14 #define THRILL_EXAMPLES_STOCHASTIC_GRADIENT_DESCENT_STOCHASTIC_GRADIENT_DESCENT_HEADER 22 #include <cereal/types/vector.hpp> 29 namespace stochastic_gradient_descent {
39 template <
typename Vector>
44 template <
typename Archive>
50 template <
typename Vector>
51 std::ostream& operator << (std::ostream& os, const DataPoint<Vector>& p) {
52 return os <<
"data: " << p.data <<
", label: " << p.label;
56 template <
typename Vector>
65 template <
typename Archive>
72 template <
typename Vector>
77 template <
typename Archive>
85 template <
typename Vector>
91 auto diff = data.
dot(weights) -
label;
92 auto loss = 0.5 * diff * diff;
98 template <
typename Vector>
103 size_t num_iterations,
double mini_batch_fraction,
104 double step_size,
double tolerance)
105 : num_iterations(num_iterations),
106 mini_batch_fraction(mini_batch_fraction),
107 step_size(step_size), tolerance(tolerance)
112 const Vector& initial_weights) {
113 auto weights = initial_weights;
114 bool converged =
false;
116 while (!converged && i <= num_iterations) {
117 LOG1 <<
"weights: " << weights;
118 auto old_weights = weights;
119 auto sample = input_points.BernoulliSample(mini_batch_fraction);
143 auto weight_gradient_sum = sum_result.grad;
145 LOG1 <<
"n: " << sum_result.count;
146 LOG1 <<
"grad: " << weight_gradient_sum.weights;
147 LOG1 <<
"loss: " << weight_gradient_sum.loss;
152 (step_size / sqrt(i))
153 * weight_gradient_sum.weights / sum_result.count;
155 converged = is_converged(old_weights, weights, tolerance);
157 LOG1 <<
"iterations: " << i;
175 #endif // !THRILL_EXAMPLES_STOCHASTIC_GRADIENT_DESCENT_STOCHASTIC_GRADIENT_DESCENT_HEADER
static Vector Make(size_t D_)
DIA is the interface between the user and the Thrill framework.
static uint_pair max()
return an uint_pair instance containing the largest value possible
A variable-length D-dimensional point with double precision.
bool is_converged(Vector &old, Vector &curr, double tolerance)
GradientResult< Vector > grad
void serialize(Archive &ar)
Type dot(const Vector &b) const
Model for one point consisting of a d-dimensional position and a label.
void serialize(Archive &ar)
auto gradient(const bool &y, const std::array< T, dim > &x, const std::array< T, dim > &w)
static GradientResult< Vector > Compute(const Vector &data, double label, const Vector &weights)
Type Distance(const Vector &b) const
StochasticGradientDescent(size_t num_iterations, double mini_batch_fraction, double step_size, double tolerance)
double mini_batch_fraction
void serialize(Archive &ar)
Vector optimize(const DIA< DataPoint< Vector > > &input_points, const Vector &initial_weights)
do the actual computation