Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
word_count_simple.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/word_count/word_count_simple.cpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
15 
16 #include <iostream>
17 #include <string>
18 #include <utility>
19 
22 
23 void WordCount(thrill::Context& ctx,
24  std::string input, std::string output) {
25  using Pair = std::pair<std::string, size_t>;
26  auto word_pairs =
27  ReadLines(ctx, input)
28  .template FlatMap<Pair>(
29  // flatmap lambda: split and emit each word
30  [](const std::string& line, auto emit) {
31  SplitView(line, ' ', [&](StringView sv) {
32  emit(Pair(sv.ToString(), 1));
33  });
34  });
35  word_pairs.ReduceByKey(
36  // key extractor: the word string
37  [](const Pair& p) { return p.first; },
38  // commutative reduction: add counters
39  [](const Pair& a, const Pair& b) {
40  return Pair(a.first, a.second + b.second);
41  })
42  .Map([](const Pair& p) {
43  return p.first + ": "
44  + std::to_string(p.second);
45  })
46  .WriteLines(output);
47 }
48 
49 int main(int argc, char* argv[]) {
50  if (argc != 3) {
51  std::cout << "Usage: " << argv[0] << " <input> <output>" << std::endl;
52  return -1;
53  }
54 
55  return thrill::Run(
56  [&](thrill::Context& ctx) { WordCount(ctx, argv[1], argv[2]); });
57 }
58 
59 /******************************************************************************/
void WordCount(thrill::Context &ctx, std::string input, std::string output)
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
Definition: context.cpp:887
static void SplitView(const std::string &str, char sep, F &&callback, std::string::size_type limit=std::string::npos)
Split the given string at each separator character into distinct substrings, and call the given callb...
DIA< std::string > ReadLines(Context &ctx, const std::string &filepath)
ReadLines is a DOp, which reads a file from the file system and creates an ordered DIA according to a...
Definition: read_lines.hpp:452
static by_string to_string(int val)
convert to string
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
std::string ToString() const
Returns the data of this StringView as an std::string.
int main(int argc, char *argv[])
StringView is a reference to a part of a string, consisting of only a char pointer and a length...
Definition: string_view.hpp:31