Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
wavelet_tree2.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/suffix_sorting/wavelet_tree2.cpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  * Copyright (C) 2016 Simon Gog <[email protected]>
8  *
9  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
10  ******************************************************************************/
11 
12 #include <thrill/api/collapse.hpp>
13 #include <thrill/api/dia.hpp>
14 #include <thrill/api/generate.hpp>
15 #include <thrill/api/max.hpp>
16 #include <thrill/api/print.hpp>
18 #include <thrill/api/sort.hpp>
19 #include <thrill/api/window.hpp>
21 #include <thrill/common/logger.hpp>
22 #include <tlx/cmdline_parser.hpp>
24 
25 #include <algorithm>
26 #include <limits>
27 #include <random>
28 #include <stdexcept>
29 #include <string>
30 #include <tuple>
31 #include <utility>
32 #include <vector>
33 
34 static constexpr bool debug = true;
35 
36 using namespace thrill; // NOLINT
37 
38 template <typename InputDIA>
39 auto ConstructWaveletTree(const InputDIA& input_dia) {
40 
41  uint64_t max_value = input_dia.Max();
42  sLOG << "max_value" << max_value;
43 
44  uint64_t level = tlx::integer_log2_ceil(max_value);
45  uint64_t mask = (~uint64_t(0)) << level;
46  uint64_t maskbit = uint64_t(1) << level;
47 
48  using PairBI = std::pair<uint8_t, uint64_t>;
49 
50  auto wt = input_dia.template FlatMap<PairBI>(
51  [level](const uint64_t& x, auto emit) {
52  for (size_t i = 0; i <= level; ++i) {
53  emit(std::make_pair(i, x));
54  }
55  });
56  auto wt2 = wt.Sort([mask](const PairBI& a, const PairBI& b) {
57  if (a.first != b.first) {
58  return a.first < b.first;
59  }
60  else {
61  return (a.second & (mask >> a.first)) < (b.second & (mask >> a.first));
62  }
63  });
64 
65  if (debug)
66  wt2.Map([](const PairBI& x) {
67  return std::to_string(x.first) + " " + std::to_string(x.second);
68  }).Print("wt");
69 
70  auto binary_wt = wt2.Window(
71  DisjointTag, 64,
72  [maskbit](size_t, const std::vector<PairBI>& v) {
73  uint64_t x = 0;
74  for (size_t i = 0; i < v.size(); ++i) {
75  uint64_t b = (v[i].second & ((maskbit) >> v[i].first)) != 0;
76  x |= (b << i);
77  }
78  return x;
79  });
80 
81  if (debug)
82  binary_wt.Print("BINARY_WT");
83 
84  binary_wt.WriteBinary("BINARY_WT");
85 }
86 
87 int main(int argc, char* argv[]) {
89 
90  cp.set_author("Timo Bingmann <[email protected]>");
91  cp.set_author("Simon Gog <[email protected]>");
92 
93  std::string input_path;
94 
95  cp.add_opt_param_string("input", input_path,
96  "Path to input file.");
97 
98  if (!cp.process(argc, argv))
99  return -1;
100 
101  return Run(
102  [&](Context& ctx) {
103  if (input_path.size()) {
104  auto input_dia = ReadBinary<uint64_t>(ctx, input_path);
105  ConstructWaveletTree(input_dia);
106  }
107  else {
108  std::default_random_engine rng(std::random_device { } ());
109  auto input_dia =
110  Generate(ctx, 32,
111  [&](size_t) { return uint64_t(rng() % 32); });
112  ConstructWaveletTree(input_dia);
113  }
114  });
115 }
116 
117 /******************************************************************************/
auto Generate(Context &ctx, size_t size, const GenerateFunction &generate_function)
Generate is a Source-DOp, which creates a DIA of given size using a generator function.
Definition: generate.hpp:85
int main(int argc, char *argv[])
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
Definition: context.cpp:887
void add_opt_param_string(const std::string &name, std::string &dest, const std::string &desc)
add optional string parameter [name] with description and store to dest
static by_string to_string(int val)
convert to string
#define sLOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:184
list x
Definition: gen_data.py:39
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
static constexpr bool debug
Command line parser which automatically fills variables and prints nice usage messages.
unsigned integer_log2_ceil(int i)
calculate the log2 ceiling of an integer type (by repeated bit shifts)
void set_author(const std::string &author)
Set author of program, will be wrapped.
const struct DisjointTag DisjointTag
global const DisjointTag instance
Definition: dia.hpp:67
bool process(int argc, const char *const *argv, std::ostream &os)
auto ConstructWaveletTree(const InputDIA &input_dia)