31 double DistanceSquare(
const Point& b)
const {
32 return (x - b.x) * (x - b.x) + (y - b.y) * (y - b.y);
35 return Point { x + b.x, y + b.y };
37 Point operator / (
double s)
const {
38 return Point { x / s, y / s };
44 return os <<
'(' << p.x <<
',' << p.y <<
')';
48 struct ClosestCenter {
54 std::ostream&
operator << (std::ostream& os,
const ClosestCenter& cc) {
55 return os <<
'(' << cc.cluster_id
56 <<
':' << cc.point <<
':' << cc.count <<
')';
62 std::default_random_engine rng(std::random_device { } ());
63 std::uniform_real_distribution<double> dist(0.0, 1000.0);
70 return Point { dist(rng), dist(rng) };
75 points.Print(
"points");
81 for (
size_t iter = 0; iter < 10; ++iter)
84 std::vector<Point> local_centers = centers.
AllGather();
90 [local_centers](
const Point& p) {
91 double min_dist = p.DistanceSquare(local_centers[0]);
92 size_t cluster_id = 0;
94 for (
size_t i = 1; i < local_centers.size(); ++i) {
95 double dist = p.DistanceSquare(local_centers[i]);
97 min_dist = dist, cluster_id = i;
99 return ClosestCenter { cluster_id, p, 1 };
104 [](
const ClosestCenter& cc) {
return cc.cluster_id; },
106 [](
const ClosestCenter& a,
const ClosestCenter& b) {
107 return ClosestCenter {
108 a.cluster_id, a.point + b.point, a.count + b.count
111 .Map([](
const ClosestCenter& cc) {
112 return cc.point / cc.count;
115 new_centers.Print(
"new_centers");
121 centers.
Print(
"final centers");
DIA is the interface between the user and the Thrill framework.
std::ostream & operator<<(std::ostream &os, const Point &p)
make ostream-able for Print()
auto Process(thrill::Context &ctx)
our main processing method
std::vector< ValueType > AllGather() const
Returns the whole DIA in an std::vector on each worker.
auto Generate(Context &ctx, size_t size, const GenerateFunction &generate_function)
Generate is a Source-DOp, which creates a DIA of given size using a generator function.
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
thrill::common::Vector< D, double > Point
Compile-Time Fixed-Dimensional Points.
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
auto Sample(size_t sample_size) const
Select up to sample_size items uniformly at random and return a new DIA<T>.
void Print(const std::string &name=std::string()) const
Print is an Action, which collects all data of the DIA at the worker 0 and prints using ostream seria...
DIA< ValueType > Collapse() const
Create a CollapseNode which is mainly used to collapse the LOp chain into a DIA<T> with an empty stac...