25 #include <boost/spirit/include/qi.hpp> 38 double DistanceSquare(
const Point& b)
const {
39 return (x - b.x) * (x - b.x) + (y - b.y) * (y - b.y);
42 return Point { x + b.x, y + b.y };
44 Point operator / (
double s)
const {
45 return Point { x / s, y / s };
51 return os <<
'(' << p.x <<
',' << p.y <<
')';
55 struct ClosestCenter {
61 std::ostream&
operator << (std::ostream& os,
const ClosestCenter& cc) {
62 return os <<
'(' << cc.cluster_id
63 <<
':' << cc.point <<
':' << cc.count <<
')';
75 for (
size_t iter = 0; iter < 10; ++iter)
78 std::vector<Point> local_centers = centers.
AllGather();
84 [local_centers](
const Point& p) {
85 double min_dist = p.DistanceSquare(local_centers[0]);
86 size_t cluster_id = 0;
88 for (
size_t i = 1; i < local_centers.size(); ++i) {
89 double dist = p.DistanceSquare(local_centers[i]);
91 min_dist = dist, cluster_id = i;
93 return ClosestCenter { cluster_id, p, 1 };
98 [](
const ClosestCenter& cc) {
return cc.cluster_id; },
100 [](
const ClosestCenter& a,
const ClosestCenter& b) {
101 return ClosestCenter {
102 a.cluster_id, a.point + b.point, a.count + b.count
105 .Map([](
const ClosestCenter& cc) {
106 return cc.point / cc.count;
124 centers.
Print(
"final centers");
129 std::default_random_engine rng(std::random_device { } ());
130 std::uniform_real_distribution<double> dist(0.0, 1000.0);
137 return Point { dist(rng), dist(rng) };
140 return points.Cache().Execute();
147 namespace qi = boost::spirit::qi;
156 std::string::const_iterator begin = input.begin(), end = input.end();
160 qi::double_ >> qi::double_,
165 die(
"Could not parse point coordinates: " << input);
168 return points.Cache();
172 int main(
int argc,
char* argv[]) {
183 std::cerr <<
"Usage: " << argv[0]
184 <<
" [points] [output]" << std::endl;
std::ostream & operator<<(std::ostream &os, const Point &p)
make ostream-able for Print()
DIA is the interface between the user and the Thrill framework.
thrill::DIA< Point > GeneratePoints(thrill::Context &ctx)
std::vector< ValueType > AllGather() const
Returns the whole DIA in an std::vector on each worker.
auto Generate(Context &ctx, size_t size, const GenerateFunction &generate_function)
Generate is a Source-DOp, which creates a DIA of given size using a generator function.
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
thrill::common::Vector< D, double > Point
Compile-Time Fixed-Dimensional Points.
#define die(msg)
Instead of std::terminate(), throw the output the message via an exception.
void Process(const thrill::DIA< Point > &points, const char *output)
our main processing method
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
thrill::DIA< Point > LoadPoints(thrill::Context &ctx, const char *path)
[step6 LoadPoints]
DIA< std::string > ReadLines(Context &ctx, const std::string &filepath)
ReadLines is a DOp, which reads a file from the file system and creates an ordered DIA according to a...
static by_string to_string(int val)
convert to string
auto Sample(size_t sample_size) const
Select up to sample_size items uniformly at random and return a new DIA<T>.
void Print(const std::string &name=std::string()) const
Print is an Action, which collects all data of the DIA at the worker 0 and prints using ostream seria...
int main(int argc, char *argv[])
[step6 LoadPoints]
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
auto Map(const MapFunction &map_function) const
Map applies map_function : to each item of a DIA and delivers a new DIA contains the returned values...
DIA< ValueType > Collapse() const
Create a CollapseNode which is mainly used to collapse the LOp chain into a DIA<T> with an empty stac...