Thrill  0.1
check_sa.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/suffix_sorting/check_sa.hpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
11 #pragma once
12 #ifndef THRILL_EXAMPLES_SUFFIX_SORTING_CHECK_SA_HEADER
13 #define THRILL_EXAMPLES_SUFFIX_SORTING_CHECK_SA_HEADER
14 
15 #include <thrill/api/generate.hpp>
16 #include <thrill/api/max.hpp>
17 #include <thrill/api/size.hpp>
18 #include <thrill/api/sort.hpp>
19 #include <thrill/api/window.hpp>
20 #include <thrill/api/zip.hpp>
21 
22 #include <utility>
23 
24 namespace examples {
25 namespace suffix_sorting {
26 
27 template <typename InputDIA, typename SuffixArrayDIA>
28 bool CheckSA(const InputDIA& input, const SuffixArrayDIA& suffix_array) {
29 
30  using namespace thrill; // NOLINT
32 
33  using Char = typename InputDIA::ValueType;
34  using Index = typename SuffixArrayDIA::ValueType;
35 
36  //! A pair (rank, index)
37  struct IndexRank {
38  Index index;
39  Index rank;
41 
42  struct Index3 {
43  Index index;
44  Index next;
45  Char ch;
47 
48  uint64_t input_size = input.Keep().Size();
49 
50  auto isa_pair =
51  suffix_array
52  // build tuples with index: (SA[i]) -> (i, SA[i]),
53  .ZipWithIndex([](const Index& sa, const size_t& i) {
54  return IndexRank { sa, Index(i) };
55  })
56  // take (i, SA[i]) and sort to (ISA[i], i)
57  .Sort([](const IndexRank& a, const IndexRank& b) {
58  return a.index < b.index;
59  });
60 
61  // Zip (ISA[i], i) with [0,n) and check that the second component was a
62  // permutation of [0,n)
63  Index perm_check =
64  isa_pair.Keep()
65  .ZipWithIndex([](const IndexRank& ir, const size_t& index) -> Index {
66  return ir.index == Index(index) ? 0 : 1;
67  })
68  // sum over all boolean values.
69  .Max();
70 
71  if (perm_check != Index(0)) {
72  LOG1 << "Error: suffix array is not a permutation of 0..n-1.";
73  return false;
74  }
75 
76  using IndexPair = std::pair<Index, Index>;
77 
78  auto order_check =
79  isa_pair
80  // build (ISA[i], ISA[i+1], T[i])
81  .template FlatWindow<IndexPair>(
82  2, [input_size](size_t index, const RingBuffer<IndexRank>& rb, auto emit) {
83  emit(IndexPair { rb[0].rank, rb[1].rank });
84  if (index == input_size - 2) {
85  // emit sentinel at end
86  emit(IndexPair { rb[1].rank, input_size });
87  }
88  })
89  .Zip(input,
90  [](const std::pair<Index, Index>& isa_pair, const Char& ch) {
91  return Index3 { isa_pair.first, isa_pair.second, ch };
92  })
93  // and sort to (i, ISA[SA[i]+1], T[SA[i]])
94  .Sort([](const Index3& a, const Index3& b) {
95  return a.index < b.index;
96  });
97 
98  char order_check_sum =
99  order_check
100  // check that no pair violates the order
101  .Window(
102  2,
103  [input_size](size_t index, const RingBuffer<Index3>& rb) -> char {
104 
105  if (rb[0].ch > rb[1].ch) {
106  // simple check of first character of suffix failed.
107  LOG1 << "Error: suffix array position "
108  << index << " ordered incorrectly.";
109  return 1;
110  }
111  else if (rb[0].ch == rb[1].ch) {
112  if (rb[1].next == Index(input_size)) {
113  // last suffix of string must be first among those with
114  // same first character
115  LOG1 << "Error: suffix array position "
116  << index << " ordered incorrectly.";
117  return 1;
118  }
119  if (rb[0].next != Index(input_size) && rb[0].next > rb[1].next) {
120  // positions SA[i] and SA[i-1] has same first character
121  // but their suffixes are ordered incorrectly: the
122  // suffix position of SA[i] is given by ISA[SA[i]]
123  LOG1 << "Error: suffix array position "
124  << index << " ordered incorrectly.";
125  return 1;
126  }
127  }
128  // else (rb[0].ch < rb[1].ch) -> okay.
129  return 0;
130  })
131  .Max();
132 
133  return (order_check_sum == 0);
134 }
135 
136 } // namespace suffix_sorting
137 } // namespace examples
138 
139 #endif // !THRILL_EXAMPLES_SUFFIX_SORTING_CHECK_SA_HEADER
140 
141 /******************************************************************************/
tlx::RingBuffer< Type, Allocator > RingBuffer
Definition: ring_buffer.hpp:21
A ring (circular) buffer of static (non-growing) size.
Definition: ring_buffer.hpp:36
#define LOG1
Definition: logger.hpp:28
Definition: bfs.hpp:21
auto Zip(const ZipFunction &zip_function, const DIA< FirstDIAType, FirstDIAStack > &first_dia, const DIAs &... dias)
Zips two DIAs of equal size in style of functional programming by applying zip_function to the i-th e...
Definition: zip.hpp:426
bool CheckSA(const InputDIA &input, const SuffixArrayDIA &suffix_array)
Definition: check_sa.hpp:28
struct examples::suffix_sorting::IndexRank TLX_ATTRIBUTE_PACKED