This C++ snippet shows our (unoptimized) working example of Word Count in Thrill.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
using namespace thrill;

size_t WordCountExample(Context& ctx) {

    auto lines = ReadLines(ctx, "wordcount.in");

    auto word_pairs = lines.template FlatMap<WordCountPair>(
        [](const std::string& line, auto emit) -> void {
                /* map lambda: emit each word */
            for (const std::string& word : common::split(line, ' ')) {
                if (word.size() != 0)
                    emit(WordCountPair(word, 1));
            }
        });

    auto red_words =  word_pairs.ReduceBy(
        [](const WordCountPair& in) -> std::string {
            /* reduction key: the word string */
            return in.first;
        },
        [](const WordCountPair& a, const WordCountPair& b) -> WordCountPair {
            /* associative reduction operator: add counters */
            return WordCountPair(a.first, a.second + b.second);
        });

    red_words.Map(
        [](const WordCountPair& wc) {
            return wc.first + ": " + std::to_string(wc.second);
        })
    .WriteLinesMany(
        "wordcount_" + std::to_string(ctx.my_rank()) + ".out");

    return 0;
}

int main(int argc, char* argv[]) {
    return api::Run(WordCountExample);
}