This C++ snippet shows our (unoptimized) working example of Word Count in Thrill.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
using namespace thrill;

size_t WordCountExample(Context& ctx) {

auto lines = ReadLines(ctx, "wordcount.in");

auto word_pairs = lines.template FlatMap<WordCountPair>(
[](const std::string& line, auto emit) -> void {
/* map lambda: emit each word */
for (const std::string& word : common::split(line, ' ')) {
if (word.size() != 0)
emit(WordCountPair(word, 1));
}
});

auto red_words = word_pairs.ReduceBy(
[](const WordCountPair& in) -> std::string {
/* reduction key: the word string */
return in.first;
},
[](const WordCountPair& a, const WordCountPair& b) -> WordCountPair {
/* associative reduction operator: add counters */
return WordCountPair(a.first, a.second + b.second);
});

red_words.Map(
[](const WordCountPair& wc) {
return wc.first + ": " + std::to_string(wc.second);
})
.WriteLinesMany(
"wordcount_" + std::to_string(ctx.my_rank()) + ".out");

return 0;
}

int main(int argc, char* argv[]) {
return api::Run(WordCountExample);
}