13 #ifndef THRILL_EXAMPLES_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_HEADER 14 #define THRILL_EXAMPLES_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_HEADER 29 #define LOGM LOGC(debug && ctx.my_rank() == 0) 32 namespace logistic_regression {
35 static constexpr
bool debug =
true;
39 return 1.0 / (1.0 + exp(-x));
42 template <
typename T,
size_t dim>
44 const std::array<T, dim>& new_weights) {
46 for (
size_t i = 0; i <
dim; ++i) {
47 T diff = weights[i] - new_weights[i];
50 return std::sqrt(sum);
53 template <
typename T,
size_t dim>
54 auto gradient(
const bool& y,
const std::array<T, dim>&
x,
55 const std::array<T, dim>& w) {
56 std::array<T, dim> grad;
57 T dot_product = std::inner_product(w.begin(), w.end(), x.begin(),
T { 0.0 });
59 for (
size_t i = 0; i <
dim; ++i) {
65 template <
typename T,
size_t dim,
typename InStack,
66 typename Element = std::array<T, dim> >
68 size_t max_iterations,
double gamma = 0.002,
69 double epsilon = 0.0001) {
71 Element weights, new_weights;
72 weights[0] = weights[1] = weights[2] = 0;
76 while (iter < max_iterations) {
79 .Map([&weights](
const std::pair<bool, Element>& elem) -> Element {
80 return gradient(elem.first, elem.second, weights);
82 .Sum([](
const Element& a,
const Element& b) -> Element {
84 std::transform(a.begin(), a.end(), b.begin(),
85 result.begin(), std::plus<T>());
89 std::transform(weights.begin(), weights.end(), grad.begin(),
91 [&gamma](
const T& a,
const T& b) ->
T 92 {
return a - gamma * b; });
95 weights = new_weights;
98 if (norm < epsilon)
break;
101 return std::make_tuple(weights, norm, iter);
104 template <
typename T,
size_t dim,
typename InStack,
105 typename Element = std::array<T, dim> >
107 const Element& weights) {
108 size_t expected_true =
110 .Filter([](
const std::pair<T, Element>& elem) ->
bool {
115 size_t expected_false = data.Keep().Size() - expected_true;
117 using Prediction = std::pair<bool, bool>;
118 auto classification =
120 .Map([&weights](
const std::pair<T, Element>& elem) -> Prediction {
121 const Element& coords = elem.second;
122 T predicted_y = std::inner_product(
123 weights.begin(), weights.end(), coords.begin(),
T { 0.0 });
125 bool prediction = (
sigmoid(predicted_y) > 0.5);
126 return Prediction { elem.first, prediction };
131 classification.Keep()
132 .Filter([](
const Prediction& p) {
return p.first && p.second; })
137 .Filter([](
const Prediction& p) {
return !p.first && !p.second; })
140 return std::make_tuple(expected_true, true_trues,
141 expected_false, true_falses);
147 #endif // !THRILL_EXAMPLES_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_HEADER
DIA is the interface between the user and the Thrill framework.
auto logit_test(const DIA< std::pair< bool, Element >, InStack > &data, const Element &weights)
static constexpr bool debug
auto gradient(const bool &y, const std::array< T, dim > &x, const std::array< T, dim > &w)
T calc_norm(const std::array< T, dim > &weights, const std::array< T, dim > &new_weights)
std::array< T, dim > Element
auto logit_train(const DIA< std::pair< bool, Element >, InStack > &data, size_t max_iterations, double gamma=0.002, double epsilon=0.0001)