Thrill  0.1
word_count_simple.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/word_count/word_count_simple.cpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
15 
16 #include <iostream>
17 #include <string>
18 #include <utility>
19 
21  std::string input, std::string output) {
22  using Pair = std::pair<std::string, size_t>;
23  auto word_pairs =
24  ReadLines(ctx, input)
25  .template FlatMap<Pair>(
26  // flatmap lambda: split and emit each word
27  [](const std::string& line, auto emit) {
28  tlx::split_view(' ', line, [&](tlx::string_view sv) {
29  emit(Pair(sv.to_string(), 1));
30  });
31  });
32  word_pairs.ReduceByKey(
33  // key extractor: the word string
34  [](const Pair& p) { return p.first; },
35  // commutative reduction: add counters
36  [](const Pair& a, const Pair& b) {
37  return Pair(a.first, a.second + b.second);
38  })
39  .Map([](const Pair& p) {
40  return p.first + ": "
41  + std::to_string(p.second);
42  })
43  .WriteLines(output);
44 }
45 
46 int main(int argc, char* argv[]) {
47  if (argc != 3) {
48  std::cout << "Usage: " << argv[0] << " <input> <output>" << std::endl;
49  return -1;
50  }
51 
52  return thrill::Run(
53  [&](thrill::Context& ctx) { WordCount(ctx, argv[1], argv[2]); });
54 }
55 
56 /******************************************************************************/
void WordCount(thrill::Context &ctx, std::string input, std::string output)
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
Definition: context.cpp:947
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
Definition: context.hpp:221
DIA< std::string > ReadLines(Context &ctx, const std::string &filepath)
ReadLines is a DOp, which reads a file from the file system and creates an ordered DIA according to a...
Definition: read_lines.hpp:454
static by_string to_string(int val)
convert to string
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
StringView is a reference to a part of a string, consisting of only a char pointer and a length...
Definition: string_view.hpp:32
std::string to_string() const
Returns the data of this StringView as a std::string.
static void split_view(char sep, const std::string &str, Functor &&callback, std::string::size_type limit=std::string::npos)
Split the given string at each separator character into distinct substrings, and call the given callb...
Definition: split_view.hpp:38
int main(int argc, char *argv[])