Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
wavelet_tree.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/suffix_sorting/wavelet_tree.cpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
11 #include <thrill/api/collapse.hpp>
12 #include <thrill/api/dia.hpp>
13 #include <thrill/api/generate.hpp>
14 #include <thrill/api/max.hpp>
15 #include <thrill/api/print.hpp>
17 #include <thrill/api/sort.hpp>
19 #include <thrill/common/logger.hpp>
20 #include <tlx/cmdline_parser.hpp>
22 
23 #include <algorithm>
24 #include <limits>
25 #include <random>
26 #include <stdexcept>
27 #include <string>
28 #include <tuple>
29 #include <utility>
30 #include <vector>
31 
32 namespace examples {
33 namespace suffix_sorting {
34 
35 static constexpr bool debug = false;
36 
37 using namespace thrill; // NOLINT
38 
39 template <typename InputDIA>
40 auto ConstructWaveletTree(const InputDIA& input_dia) {
41 
42  uint64_t max_value = input_dia.Max();
43  sLOG << "max_value" << max_value;
44 
45  uint64_t level = tlx::integer_log2_ceil(max_value);
46  uint64_t mask = (~uint64_t(0)) << level;
47  uint64_t maskbit = uint64_t(1) << level;
48 
49  DIA<uint64_t> wt = input_dia.Collapse();
50  if (debug) wt.Print("wt");
51 
52  while (mask != (~uint64_t(0))) {
53 
54  // switch to next level
55  --level;
56  mask = (mask >> 1) | 0x8000000000000000llu;
57  maskbit >>= 1;
58 
59  sLOG << "maskbit" << maskbit << "mask" << std::hex << mask;
60 
61  wt.Map(
62  [maskbit](const uint64_t& a) {
63  return (a & maskbit) != 0;
64  })
65  .WriteBinary(common::str_sprintf("wt-%04u-", unsigned(level)));
66 
67  wt = wt.Sort(
68  [mask](const uint64_t& a, const uint64_t& b) {
69  return (a & mask) < (b & mask);
70  });
71 
72  if (debug) wt.Print("wt");
73  }
74 }
75 
76 } // namespace suffix_sorting
77 } // namespace examples
78 
79 int main(int argc, char* argv[]) {
80 
81  using namespace thrill; // NOLINT
82  using namespace examples::suffix_sorting;
83 
85 
86  cp.set_author("Timo Bingmann <[email protected]>");
87 
88  std::string input_path;
89 
90  cp.add_opt_param_string("input", input_path,
91  "Path to input file.");
92 
93  if (!cp.process(argc, argv))
94  return -1;
95 
96  return Run(
97  [&](Context& ctx) {
98  if (input_path.size()) {
99  auto input_dia = ReadBinary<uint64_t>(ctx, input_path);
100  ConstructWaveletTree(input_dia);
101  }
102  else {
103  std::default_random_engine rng(std::random_device { } ());
104  auto input_dia =
105  Generate(ctx, 32,
106  [&](size_t) { return uint64_t(rng() % 32); });
107  ConstructWaveletTree(input_dia);
108  }
109  });
110 }
111 
112 /******************************************************************************/
#define sLOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:34
auto Generate(Context &ctx, size_t size, const GenerateFunction &generate_function)
Generate is a Source-DOp, which creates a DIA of given size using a generator function.
Definition: generate.hpp:85
int Run(const std::function< void(Context &)> &job_startpoint)
Runs the given job startpoint with a Context instance.
Definition: context.cpp:863
String str_sprintf(const char *fmt,...) TLX_ATTRIBUTE_FORMAT_PRINTF(1
Helper for using sprintf to format into std::string and also to_string converters.
int main(int argc, char *argv[])
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
static constexpr bool debug
Command line parser which automatically fills variables and prints nice usage messages.
auto ConstructWaveletTree(const InputDIA &input_dia, const std::string &output_path)
unsigned integer_log2_ceil(int i)
calculate the log2 ceiling of an integer type (by repeated bit shifts)
void set_author(const std::string &author)
Set author of program, will be wrapped.
auto ConstructWaveletTree(const InputDIA &input_dia)