Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
construct_wt.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/suffix_sorting/construct_wt.hpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  * Copyright (C) 2016 Simon Gog <[email protected]>
8  *
9  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
10  ******************************************************************************/
11 
12 #pragma once
13 #ifndef THRILL_EXAMPLES_SUFFIX_SORTING_CONSTRUCT_WT_HEADER
14 #define THRILL_EXAMPLES_SUFFIX_SORTING_CONSTRUCT_WT_HEADER
15 
16 #include <thrill/api/collapse.hpp>
17 #include <thrill/api/max.hpp>
18 #include <thrill/api/print.hpp>
19 #include <thrill/api/sort.hpp>
20 #include <thrill/api/window.hpp>
22 #include <thrill/common/logger.hpp>
24 
25 #include <algorithm>
26 #include <limits>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 namespace examples {
32 namespace suffix_sorting {
33 
34 template <typename InputDIA>
36  const InputDIA& input_dia, const std::string& output_path) {
37  static constexpr bool debug = false;
38 
39  using namespace thrill; // NOLINT
40 
41  uint64_t max_value = input_dia.Keep().Max();
42  sLOG << "max_value" << max_value;
43 
44  uint64_t level = tlx::integer_log2_ceil(max_value);
45  uint64_t mask = (~uint64_t(0)) << level;
46  uint64_t maskbit = uint64_t(1) << level;
47 
48  DIA<uint8_t> wt = input_dia.Collapse();
49 // if (debug) wt.Print("wt");
50 
51  while (mask != (~uint64_t(0))) {
52  // switch to next level
53  --level;
54  mask = (mask >> 1) | 0x8000000000000000llu;
55  maskbit >>= 1;
56 
57  sLOG << "maskbit" << maskbit << "mask" << std::hex << mask;
58 
59  wt.Keep().Window(
60  DisjointTag, 64,
61  [maskbit](size_t, const std::vector<uint8_t>& v) {
62  uint64_t x = 0;
63  for (size_t i = 0; i < v.size(); ++i) {
64  if (v[i] & maskbit) {
65  x |= uint64_t(1) << i;
66  }
67  }
68  return x;
69  })
70  .WriteBinary(
71  output_path + common::str_sprintf("-lvl%02u-", unsigned(level)));
72 
73  wt = wt.Sort(
74  [mask](const uint8_t& a, const uint8_t& b) {
75  return (a & mask) < (b & mask);
76  });
77 
78  if (debug) wt.Print("wt");
79  }
80 }
81 
82 } // namespace suffix_sorting
83 } // namespace examples
84 
85 #endif // !THRILL_EXAMPLES_SUFFIX_SORTING_CONSTRUCT_WT_HEADER
86 
87 /******************************************************************************/
String str_sprintf(const char *fmt,...) TLX_ATTRIBUTE_FORMAT_PRINTF(1
Helper for using sprintf to format into std::string and also to_string converters.
#define sLOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:184
list x
Definition: gen_data.py:39
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
static constexpr bool debug
auto ConstructWaveletTree(const InputDIA &input_dia, const std::string &output_path)
unsigned integer_log2_ceil(int i)
calculate the log2 ceiling of an integer type (by repeated bit shifts)
const struct DisjointTag DisjointTag
global const DisjointTag instance
Definition: dia.hpp:67