Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
word_count.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/word_count/word_count.hpp
3  *
4  * This file contains the WordCount core example. See word_count_run.cpp for how
5  * to run it on different inputs.
6  *
7  * Part of Project Thrill - http://project-thrill.org
8  *
9  * Copyright (C) 2015 Alexander Noe <[email protected]>
10  * Copyright (C) 2016 Timo Bingmann <[email protected]>
11  *
12  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
13  ******************************************************************************/
14 
15 #pragma once
16 #ifndef THRILL_EXAMPLES_WORD_COUNT_WORD_COUNT_HEADER
17 #define THRILL_EXAMPLES_WORD_COUNT_WORD_COUNT_HEADER
18 
21 
22 #include <string>
23 #include <utility>
24 
25 namespace examples {
26 namespace word_count {
27 
28 using namespace thrill; // NOLINT
29 
30 using WordCountPair = std::pair<std::string, size_t>;
31 
32 //! The most basic WordCount user program: reads a DIA containing std::string
33 //! words, and returns a DIA containing WordCountPairs.
34 template <typename InputStack>
35 auto WordCount(const DIA<std::string, InputStack>&input) {
36 
37  auto word_pairs = input.template FlatMap<WordCountPair>(
38  [](const std::string& line, auto emit) -> void {
39  /* map lambda: emit each word */
41  line, ' ', [&](const common::StringView& sv) {
42  if (sv.size() == 0) return;
43  emit(WordCountPair(sv.ToString(), 1));
44  });
45  });
46 
47  return word_pairs.ReduceByKey(
48  [](const WordCountPair& in) -> std::string {
49  /* reduction key: the word string */
50  return in.first;
51  },
52  [](const WordCountPair& a, const WordCountPair& b) -> WordCountPair {
53  /* associative reduction operator: add counters */
54  return WordCountPair(a.first, a.second + b.second);
55  });
56 }
57 
58 /******************************************************************************/
59 
60 using HashWord = std::pair<size_t, std::string>;
61 using HashWordCount = std::pair<HashWord, size_t>;
62 
64  size_t operator () (const HashWord& w) const {
65  // return first which is the hash of the word
66  return w.first;
67  }
68 };
69 
70 //! The second WordCount user program: reads a DIA containing std::string words,
71 //! creates hash values from the words prior to reducing by hash and
72 //! word. Returns a DIA containing WordCountPairs.
73 template <typename InputStack>
74 auto HashWordCountExample(const DIA<std::string, InputStack>&input) {
75 
76  std::hash<std::string> string_hasher;
77 
78  auto r =
79  input
80  .template FlatMap<std::string>(
81  [](const std::string& line, auto emit) {
82  /* map lambda: emit each word */
84  line, ' ', [&](const common::StringView& sv) {
85  if (sv.size() == 0) return;
86  emit(sv.ToString());
87  });
88  })
89  .Map([&](const std::string& word) {
90  return HashWordCount(HashWord(string_hasher(word), word), 1);
91  })
92  .ReduceByKey(
93  [](const HashWordCount& in) {
94  /* reduction key: the word string */
95  return in.first;
96  },
97  [](const HashWordCount& a, const HashWordCount& b) {
98  /* associative reduction operator: add counters */
99  return HashWordCount(a.first, a.second + b.second);
100  },
102  .Map([](const HashWordCount& in) {
103  return WordCountPair(in.first.second, in.second);
104  });
105  return r;
106 }
107 
108 } // namespace word_count
109 } // namespace examples
110 
111 #endif // !THRILL_EXAMPLES_WORD_COUNT_WORD_COUNT_HEADER
112 
113 /******************************************************************************/