45 namespace suffix_sorting {
49 template <
typename AlphabetType>
54 return std::tie(ch[0], ch[1], ch[2])
55 == std::tie(b.ch[0], b.ch[1], b.ch[2]);
59 return std::tie(ch[0], ch[1], ch[2])
60 < std::tie(b.ch[0], b.ch[1], b.ch[2]);
63 friend std::ostream&
operator << (std::ostream& os,
const Chars& chars) {
64 return os <<
'[' << chars.ch[0] <<
',' << chars.ch[1]
65 <<
',' << chars.ch[2] <<
']';
68 static Chars Lowest() {
71 std::numeric_limits<AlphabetType>::lowest(),
72 std::numeric_limits<AlphabetType>::lowest(),
73 std::numeric_limits<AlphabetType>::lowest()
80 template <
typename Index,
typename AlphabetType>
83 Chars<AlphabetType> chars;
85 AlphabetType at_radix(
size_t depth)
const {
return chars.ch[depth]; }
87 friend std::ostream&
operator << (std::ostream& os,
const IndexChars& tc) {
88 return os <<
'[' << tc.index <<
'|' << tc.chars <<
']';
93 template <
typename Index>
98 friend std::ostream&
operator << (std::ostream& os,
const IndexRank& tr) {
99 return os <<
'(' << tr.index <<
'|' << tr.rank <<
')';
104 template <
typename Index,
typename AlphabetType>
105 struct StringFragmentMod0 {
110 friend std::ostream&
operator << (std::ostream& os,
111 const StringFragmentMod0& sf) {
112 return os <<
"i=" << sf.index
113 <<
" t0=" << sf.t0 <<
" t1=" << sf.t1
114 <<
" r1=" << sf.r1 <<
" r2=" << sf.r2;
119 template <
typename Index,
typename AlphabetType>
120 struct StringFragmentMod1 {
125 friend std::ostream&
operator << (std::ostream& os,
126 const StringFragmentMod1& sf) {
127 return os <<
"i=" << sf.index
128 <<
" t0=" << sf.t0 <<
" r0=" << sf.r0 <<
" r1=" << sf.r1;
133 template <
typename Index,
typename AlphabetType>
134 struct StringFragmentMod2 {
139 friend std::ostream&
operator << (std::ostream& os,
140 const StringFragmentMod2& sf) {
141 return os <<
"i=" << sf.index
142 <<
" t0=" << sf.t0 <<
" r0=" << sf.r0
143 <<
" t1=" << sf.t1 <<
" r2=" << sf.r2;
148 template <
typename Index,
typename AlphabetType>
149 struct StringFragment {
154 AlphabetType text[2];
160 StringFragmentMod0<Index, AlphabetType> mod0;
161 StringFragmentMod1<Index, AlphabetType> mod1;
162 StringFragmentMod2<Index, AlphabetType> mod2;
165 StringFragment() =
default;
168 explicit StringFragment(
169 const StringFragmentMod0<Index, AlphabetType>& _mod0) : mod0(_mod0) { }
172 explicit StringFragment(
173 const StringFragmentMod1<Index, AlphabetType>& _mod1) : mod1(_mod1) { }
176 explicit StringFragment(
177 const StringFragmentMod2<Index, AlphabetType>& _mod2) : mod2(_mod2) { }
179 friend std::ostream&
operator << (std::ostream& os,
180 const StringFragment& tc) {
182 if (tc.index % 3 == 0)
183 return os <<
"0|" << tc.mod0 <<
']';
184 else if (tc.index % 3 == 1)
185 return os <<
"1|" << tc.mod1 <<
']';
186 else if (tc.index % 3 == 2)
187 return os <<
"2|" << tc.mod2 <<
']';
191 AlphabetType at_radix(
size_t depth)
const {
return common.text[depth]; }
192 Index sort_rank()
const {
return common.ranks[0]; }
198 { 1, 0, 0 }, { 1, 0, 1 }, { 2, 1, 1 }
201 { 1, 1, 0 }, { 0, 0, 0 }, { 0, 0, 0 }
204 { 2, 1, 1 }, { 0, 0, 0 }, { 0, 0, 0 }
208 template <
typename StringFragment>
209 struct FragmentComparator {
211 bool operator () (
const StringFragment& a,
const StringFragment& b)
const {
213 unsigned ai = a.index % 3, bi = b.index % 3;
215 const size_t*
params = fragment_comparator_params[ai][bi];
217 for (
size_t d = 0; d < params[0]; ++d)
219 if (a.common.text[d] == b.common.text[d])
continue;
220 return (a.common.text[d] < b.common.text[d]);
223 return a.common.ranks[params[1]] < b.common.ranks[params[2]];
227 template <
typename Index,
typename Char>
228 struct CharsRanks12 {
234 std::ostream& os,
const CharsRanks12& c) {
235 return os <<
"(ch=" << c.chars
236 <<
" r1=" << c.rank1 <<
" r2=" << c.rank2 <<
")";
240 template <
typename Index,
typename Char>
241 struct IndexCR12Pair {
243 CharsRanks12<Index, Char> cr0;
244 CharsRanks12<Index, Char> cr1;
247 template <
typename Type,
size_t MaxDepth>
248 class RadixSortFragment
251 explicit RadixSortFragment(
size_t K) : K_(K) { }
252 template <
typename CompareFunction>
254 typename std::vector<Type>::iterator begin,
255 typename std::vector<Type>::iterator end,
256 const CompareFunction& cmp)
const {
258 thrill::common::radix_sort_CI<MaxDepth>(
259 begin, end, K_, cmp, [](
auto begin,
auto end,
auto) {
261 std::sort(begin, end, [](
const Type& a,
const Type& b) {
262 return a.sort_rank() < b.sort_rank();
267 std::sort(begin, end, cmp);
280 template <
typename Index,
typename InputDIA>
284 using Char =
typename InputDIA::ValueType;
285 using IndexChars = dc3_local::IndexChars<Index, Char>;
286 using IndexRank = dc3_local::IndexRank<Index>;
287 using Chars = dc3_local::Chars<Char>;
289 Context& ctx = input_dia.context();
291 auto triple_unsorted =
294 .template FlatWindow<IndexChars>(
298 emit(IndexChars { Index(index), {
299 { rb[0], rb[1], rb[2] }
305 if (index % 3 != 0) {
308 { rb.size() >= 1 ? rb[0] : Char(),
309 rb.size() >= 2 ? rb[1] : Char(), Char() }
313 if (index + 1 == input_size && input_size % 3 == 1) {
317 emit(IndexChars { Index(input_size), Chars::Lowest() });
322 triple_unsorted.Keep().Print(
"triple_unsorted");
327 .Sort([](
const IndexChars& a,
const IndexChars& b) {
328 return a.chars < b.chars;
332 triple_sorted.Keep().Print(
"triple_sorted");
335 auto triple_index_sorted =
337 .Map([](
const IndexChars& tc) {
return tc.index; })
340 auto triple_prenames =
342 .template FlatWindow<Index>(
344 assert(rb.size() == 2);
347 if (index == 0) emit(0);
350 emit(rb[0].chars == rb[1].chars ? 0 : 1);
354 triple_prenames.Keep().Print(
"triple_prenames");
356 auto triple_lexname_sums = triple_prenames.PrefixSum();
359 triple_lexname_sums.Keep().Print(
"triple_lexname_sums");
362 const Index max_lexname = triple_lexname_sums.Keep().Max();
365 const Index size_subp = Index((input_size / 3) * 2 + (input_size % 3 != 0));
368 const Index size_mod1 = Index(input_size / 3 + (input_size % 3 != 0));
371 sLOG1 <<
"max_lexname=" << max_lexname
372 <<
" size_subp=" << size_subp
373 <<
" size_mod1=" << size_mod1;
378 if (max_lexname + Index(1) != size_subp) {
388 [](
const Index& triple_index,
const Index& rank) {
393 triple_ranks.Keep().Print(
"triple_ranks");
397 auto triple_ranks_sorted =
400 if (a.index % 3 == b.index % 3)
401 return a.index < b.index;
403 return a.index % 3 < b.index % 3;
407 triple_ranks_sorted.Keep().Print(
"triple_ranks_sorted");
417 string_mod12.
Keep().Print(
"string_mod12");
419 using RecStringFragment = dc3_local::StringFragment<Index, Index>;
422 string_mod12, size_subp, max_lexname + Index(1));
428 suffix_array_rec.
Keep().Print(
"suffix_array_rec");
432 .
ZipWithIndex([](
const RecStringFragment& sa,
const size_t& i) {
439 return a.index % size_mod1 < b.index % size_mod1 || (
440 a.index % size_mod1 == b.index % size_mod1 &&
449 [size_mod1](
size_t,
const std::vector<IndexRank>& ir) {
451 die_unless(ir[1].index >= size_mod1 || ir[1].rank == Index(0));
455 ranks_mod12.
Keep().Print(
"ranks_mod12");
460 sLOG1 <<
"*** recursion finished ***";
463 triple_index_sorted.Keep().Print(
"triple_index_sorted");
474 return a.index / 3 < b.index / 3 || (
475 a.index / 3 == b.index / 3 &&
484 [](
size_t,
const std::vector<IndexRank>& ir) {
486 die_unless(ir[1].index % 3 != 1 || ir[1].rank == Index(0));
490 ranks_mod12.
Keep().Print(
"ranks_mod12");
499 using StringFragmentMod0 = dc3_local::StringFragmentMod0<Index, Char>;
500 using StringFragmentMod1 = dc3_local::StringFragmentMod1<Index, Char>;
501 using StringFragmentMod2 = dc3_local::StringFragmentMod2<Index, Char>;
503 using CharsRanks12 = dc3_local::CharsRanks12<Index, Char>;
504 using IndexCR12Pair = dc3_local::IndexCR12Pair<Index, Char>;
506 auto zip_triple_pairs1 =
511 [](
const std::array<Char, 3>& ch,
const std::array<IndexRank, 2>& mod12) {
512 return CharsRanks12 {
514 { ch[0], ch[1], ch[2] }
515 }, mod12[0].rank, mod12[1].rank
518 std::make_tuple(std::numeric_limits<Char>::lowest(),
IndexRank { 0, 0 }),
519 input_dia, ranks_mod12);
522 zip_triple_pairs1.Keep().Print(
"zip_triple_pairs1");
524 auto zip_triple_pairs =
526 .template FlatWindow<IndexCR12Pair>(
527 2, [size_mod1](
size_t index,
529 emit(IndexCR12Pair { Index(3 * index), rb[0], rb[1] });
530 if (index + 2 == size_mod1) {
532 emit(IndexCR12Pair { Index(3 * (index + 1)), rb[1],
533 CharsRanks12 { Chars::Lowest(), 0, 0 }
538 auto fragments_mod0 =
540 .Map([](
const IndexCR12Pair& ip) {
541 return StringFragmentMod0 {
543 ip.cr0.rank1, ip.cr0.rank2,
544 ip.cr0.chars.ch[0], ip.cr0.chars.ch[1]
547 .Filter([input_size](
const StringFragmentMod0& mod0) {
548 return mod0.index < Index(input_size);
551 auto fragments_mod1 =
553 .Map([](
const IndexCR12Pair& ip) {
554 return StringFragmentMod1 {
556 ip.cr0.rank1, ip.cr0.rank2,
560 .Filter([input_size](
const StringFragmentMod1& mod1) {
561 return mod1.index < Index(input_size);
564 auto fragments_mod2 =
566 .Map([](
const IndexCR12Pair& ip) {
567 return StringFragmentMod2 {
569 ip.cr0.rank2, ip.cr1.rank1,
570 ip.cr0.chars.ch[2], ip.cr1.chars.ch[0]
573 .Filter([input_size](
const StringFragmentMod2& mod2) {
574 return mod2.index < Index(input_size);
578 fragments_mod0.Keep().Print(
"fragments_mod0");
579 fragments_mod1.Keep().Print(
"fragments_mod1");
580 fragments_mod2.Keep().Print(
"fragments_mod2");
585 using StringFragment = dc3_local::StringFragment<Index, Char>;
587 auto string_fragments_mod0 =
589 .Map([](
const StringFragmentMod0& mod0)
590 {
return StringFragment(mod0); });
592 auto string_fragments_mod1 =
594 .Map([](
const StringFragmentMod1& mod1)
595 {
return StringFragment(mod1); });
597 auto string_fragments_mod2 =
599 .Map([](
const StringFragmentMod2& mod2)
600 {
return StringFragment(mod2); });
603 Union(string_fragments_mod0,
604 string_fragments_mod1,
605 string_fragments_mod2)
606 .Sort(dc3_local::FragmentComparator<StringFragment>())
612 std::vector<Char> input_vec = input_dia.Keep().AllGather();
613 std::vector<Index> vec =
615 .Map([](
const StringFragment& a) {
return a.index; })
619 for (
const Index& index : vec)
621 std::cout << std::setw(5) << index <<
" =";
622 for (Index i = index;
623 i < index + Index(64) && i < Index(input_size); ++i) {
624 std::cout <<
' ' << input_vec[i];
634 return suffix_array.Collapse();
637 template <
typename Index,
typename InputDIA>
640 using Char =
typename InputDIA::ValueType;
641 using StringFragment = dc3_local::StringFragment<Index, Char>;
643 return DC3Recursive<Index>(input_dia, input_size,
K)
644 .Map([](
const StringFragment& a) {
return a.index; })
651 #if !THRILL_ON_TRAVIS
template DIA< uint32_t > DC3< uint32_t >(const DIA< uint8_t > &input_dia, size_t input_size, size_t K)
tlx::RingBuffer< Type, Allocator > RingBuffer
static constexpr size_t fragment_comparator_params[3][3][3]
tag structure for ZipWindow()
DIA< Index > DC3(const InputDIA &input_dia, size_t input_size, size_t K)
DIA is the interface between the user and the Thrill framework.
auto Union(const FirstDIA &first_dia, const DIAs &... dias)
Union is a LOp, which creates the union of all items from any number of DIAs as a single DIA...
A ring (circular) buffer of static (non-growing) size.
auto ZipWindow(const std::array< size_t, 1+sizeof ...(DIAs)> &window_size, const ZipFunction &zip_function, const DIA< FirstDIAType, FirstDIAStack > &first_dia, const DIAs &... dias)
Zips two DIAs of equal size in style of functional programming by applying zip_function to the i-th f...
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
struct examples::suffix_sorting::dc3_local::Chars TLX_ATTRIBUTE_PACKED
static by_string to_string(int val)
convert to string
DIA< dc3_local::StringFragment< Index, typename InputDIA::ValueType > > DC3Recursive(const InputDIA &input_dia, size_t input_size, size_t K)
auto Map(const MapFunction &map_function) const
Map applies map_function : to each item of a DIA and delivers a new DIA contains the returned values...
auto ZipWithIndex(const ZipFunction &zip_function) const
Zips each item of a DIA with its zero-based array index.
bool operator==(const uint_pair &b) const
equality checking operator
size_t my_rank() const
Global rank of this worker among all other workers in the system.
std::ostream & operator<<(std::ostream &os, const Status &s)
static const uint64_t K[80]
template DIA< uint64_t > DC3< uint64_t >(const DIA< uint8_t > &input_dia, size_t input_size, size_t K)
auto Zip(const SecondDIA &second_dia, const ZipFunction &zip_function) const
Zips two DIAs of equal size in style of functional programming by applying zip_function to the i-th e...
tag structure for Window() and FlatWindow()
const DIA & Keep(size_t increase=1) const
Mark the referenced DIANode for keeping, which makes children not consume the data when executing...
bool operator<(const uint_pair &b) const
less-than comparison operator
SortAlgorithm class for use with api::Sort() which calls radix_sort_CI() if K is small enough...