38 const std::vector<std::string>& input_filelist,
const std::string& output) {
43 auto lines =
ReadLines(ctx, input_filelist);
59 <<
" benchmark=wordcount" 60 <<
" files=" << input_filelist.size()
70 const std::vector<std::string>& input_filelist,
const std::string& output) {
75 auto lines =
ReadLines(ctx, input_filelist);
91 <<
" benchmark=wordcount_hash" 92 <<
" files=" << input_filelist.size()
108 std::default_random_engine rng(std::random_device { } ());
126 word_pairs.Execute();
134 std::default_random_engine rng(std::random_device { } ());
152 word_pairs.Execute();
158 int main(
int argc,
char* argv[]) {
164 "output file pattern");
166 std::vector<std::string> input;
168 "input file pattern(s)");
170 bool generate =
false;
171 clp.
add_bool(
'g',
"generate", generate,
172 "generate random words, first file pattern " 173 "specifies approximately how many.");
175 bool hash_words =
false;
176 clp.
add_bool(
'H',
"hash_words", hash_words,
177 "explicitly calculate hash values for words " 178 "to accelerate reduction.");
180 if (!clp.
process(argc, argv)) {
190 if (!common::from_str<size_t>(input[0], num_words))
191 die(
"For generated word data, set input to the number of words.");
net::FlowControlChannel & net
static void RunHashWordCount(api::Context &ctx, const std::vector< std::string > &input_filelist, const std::string &output)
auto Generate(Context &ctx, size_t size, const GenerateFunction &generate_function)
Generate is a Source-DOp, which creates a DIA of given size using a generator function.
static void RunWordCount(api::Context &ctx, const std::vector< std::string > &input_filelist, const std::string &output)
void WordCount(thrill::Context &ctx, std::string input, std::string output)
size_t num_hosts() const
Returns the total number of hosts.
std::pair< std::string, size_t > WordCountPair
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
void Barrier()
A trivial global barrier.
int main(int argc, char *argv[])
auto HashWordCountExample(const DIA< std::string, InputStack > &input)
std::string RandomTextWriterGenerate(size_t num_words, RandomGenerator &rng)
#define die(msg)
Instead of std::terminate(), throw the output the message via an exception.
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
void enable_consume(bool consume=true)
Sets consume-mode flag such that DIA contents may be consumed during PushData().
DIA< std::string > ReadLines(Context &ctx, const std::string &filepath)
ReadLines is a DOp, which reads a file from the file system and creates an ordered DIA according to a...
static by_string to_string(int val)
convert to string
void print_result(std::ostream &os)
print nicely formatted result of processing
static void RunWordCountGenerated(api::Context &ctx, size_t num_words, const std::string &output)
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Command line parser which automatically fills variables and prints nice usage messages.
static void RunHashWordCountGenerated(api::Context &ctx, size_t num_words, const std::string &output)
void add_string(char key, const std::string &longkey, std::string &dest, const std::string &desc)
add string option -key, –longkey and store to dest
net::Traffic Traffic() const
calculate overall traffic for final stats
size_t my_rank() const
Global rank of this worker among all other workers in the system.
void add_param_stringlist(const std::string &name, std::vector< std::string > &dest, const std::string &desc)
void add_bool(char key, const std::string &longkey, bool &dest, const std::string &desc)
net::Manager & net_manager()
bool process(int argc, const char *const *argv, std::ostream &os)