Thrill  0.1
radix_sort.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/sort/strings/radix_sort.hpp
3  *
4  * Out-of-place and in-place radix sort for strings. This is an internal
5  * implementation header, see tlx/sort/strings.hpp for public front-end
6  * functions.
7  *
8  * These are explicit stack-based most-significant-digit radix sort
9  * implementations. All implementations were written by Timo Bingmann and are
10  * based on work by Juha Kärkkäinen and Tommi Rantala. "Engineering Radix Sort
11  * for Strings." International Symposium on String Processing and Information
12  * Retrieval (SPIRE). Springer, 2008.
13  *
14  * Part of tlx - http://panthema.net/tlx
15  *
16  * Copyright (C) 2015-2019 Timo Bingmann <[email protected]>
17  *
18  * All rights reserved. Published under the Boost Software License, Version 1.0
19  ******************************************************************************/
20 
21 #ifndef TLX_SORT_STRINGS_RADIX_SORT_HEADER
22 #define TLX_SORT_STRINGS_RADIX_SORT_HEADER
23 
25 #include <tlx/define/likely.hpp>
28 
29 #include <stack>
30 #include <utility>
31 #include <vector>
32 
33 namespace tlx {
34 
35 //! \addtogroup tlx_sort
36 //! \{
37 
38 namespace sort_strings_detail {
39 
40 /******************************************************************************/
41 
42 static const size_t g_inssort_threshold = 32;
43 
44 /******************************************************************************/
45 // Out-of-place 8-bit radix-sort WITHOUT character caching.
46 
47 template <typename StringShadowPtr>
48 struct RadixStep_CE0 {
50  size_t idx, pos, bkt_size[256];
51 
53  typedef typename StringSet::Iterator Iterator;
54 
55  RadixStep_CE0(const StringShadowPtr& in_strptr, size_t depth)
56  : strptr(in_strptr) {
57 
58  const StringSet& ss = strptr.active();
59 
60  // count character occurrences
61  std::fill(bkt_size, bkt_size + 256, 0);
62  for (Iterator i = ss.begin(); i != ss.end(); ++i)
63  ++bkt_size[ss.get_uint8(ss[i], depth)];
64 
65  // prefix sum
66  Iterator bkt_index[256];
67  bkt_index[0] = strptr.shadow().begin();
68  for (size_t i = 1; i < 256; ++i)
69  bkt_index[i] = bkt_index[i - 1] + bkt_size[i - 1];
70 
71  // distribute
72  for (Iterator i = ss.begin(); i != ss.end(); ++i)
73  *(bkt_index[ss.get_uint8(ss[i], depth)]++) = std::move(ss[i]);
74 
75  // will increment to 1 on first process
76  idx = 0;
77  pos = bkt_size[0];
78 
79  // copy back finished strings in zeroth bucket
80  strptr.flip(0, pos).copy_back();
81 
82  // store lcps
83  if (strptr.with_lcp) {
84  size_t size = ss.size();
85 
86  // set lcps of zero-terminated strings
87  for (size_t i = 1; i < pos; ++i)
88  strptr.set_lcp(i, depth);
89 
90  // set lcps between non-empty bucket boundaries
91  size_t bkt = bkt_size[0], i = 1;
92  if (bkt > 0 && bkt < size)
93  strptr.set_lcp(bkt, depth);
94  while (i < 256) {
95  while (i < 256 && bkt_size[i] == 0)
96  ++i;
97  bkt += bkt_size[i];
98  if (bkt >= size)
99  break;
100  strptr.set_lcp(bkt, depth);
101  ++i;
102  }
103  }
104  }
105 };
106 
107 /*
108  * Out-of-place 8-bit radix-sort WITHOUT character caching.
109  */
110 template <typename StringShadowPtr>
111 static inline void
112 radixsort_CE0_loop(const StringShadowPtr& strptr, size_t depth, size_t memory) {
113 
114  typedef RadixStep_CE0<StringShadowPtr> RadixStep;
115 
116  std::stack<RadixStep, std::vector<RadixStep> > radixstack;
117  radixstack.emplace(strptr, depth);
118 
119  while (radixstack.size())
120  {
121  while (radixstack.top().idx < 255)
122  {
123  RadixStep& rs = radixstack.top();
124 
125  // process the bucket rs.idx
126  size_t bkt_size = rs.bkt_size[++rs.idx];
127 
128  if (bkt_size == 0) {
129  // done
130  }
131  else if (bkt_size < g_inssort_threshold) {
133  rs.strptr.flip(rs.pos, bkt_size).copy_back(),
134  depth + radixstack.size(),
135  memory - sizeof(RadixStep) * radixstack.size());
136  rs.pos += bkt_size;
137  }
138  else if (
139  TLX_UNLIKELY(
140  memory != 0 &&
141  memory < sizeof(RadixStep) * (radixstack.size() + 1)))
142  {
144  rs.strptr.flip(rs.pos, bkt_size).copy_back(),
145  depth + radixstack.size(),
146  memory - sizeof(RadixStep) * radixstack.size());
147  rs.pos += bkt_size;
148  }
149  else
150  {
151  rs.pos += bkt_size;
152  radixstack.emplace(rs.strptr.flip(rs.pos - bkt_size, bkt_size),
153  depth + radixstack.size());
154  // cannot add here, because rs may have invalidated
155  }
156  }
157  radixstack.pop();
158  }
159 }
160 
161 /*!
162  * Adapter method to allocate shadow array if needed.
163  */
164 template <typename StringPtr>
165 static inline void
166 radixsort_CE0(const StringPtr& strptr, size_t depth, size_t memory) {
167 
168  typedef typename StringPtr::StringSet StringSet;
169  const StringSet& ss = strptr.active();
170  if (ss.size() < g_inssort_threshold)
171  return insertion_sort(strptr, depth, memory);
172 
174 
175  // try to estimate the amount of memory used
176  size_t memory_use =
177  2 * sizeof(size_t) + sizeof(StringSet)
178  + ss.size() * sizeof(typename StringSet::String);
179  size_t memory_slack = 3 * sizeof(RadixStep);
180 
181  if (memory != 0 && memory < memory_use + memory_slack + 1)
182  return multikey_quicksort(strptr, depth, memory);
183 
184  typename StringSet::Container shadow = ss.allocate(ss.size());
186  strptr.add_shadow(StringSet(shadow)), depth, memory - memory_use);
187  StringSet::deallocate(shadow);
188 }
189 
190 /******************************************************************************/
191 // Out-of-place 8-bit radix-sort with character caching.
192 
193 template <typename StringShadowPtr>
196  size_t idx, pos, bkt_size[256];
197 
199  typedef typename StringSet::Iterator Iterator;
200 
201  RadixStep_CE2(const StringShadowPtr& in_strptr, size_t depth,
202  uint8_t* charcache) : strptr(in_strptr) {
203 
204  const StringSet& ss = strptr.active();
205  const size_t n = ss.size();
206 
207  // read characters and count character occurrences
208  std::fill(bkt_size, bkt_size + 256, 0);
209  uint8_t* cc = charcache;
210  for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
211  *cc = ss.get_uint8(ss[i], depth);
212  for (cc = charcache; cc != charcache + n; ++cc)
213  ++bkt_size[static_cast<uint8_t>(*cc)];
214 
215  // prefix sum
216  Iterator bkt_index[256];
217  bkt_index[0] = strptr.shadow().begin();
218  for (size_t i = 1; i < 256; ++i)
219  bkt_index[i] = bkt_index[i - 1] + bkt_size[i - 1];
220 
221  // distribute
222  cc = charcache;
223  for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
224  *(bkt_index[static_cast<uint8_t>(*cc)]++) = std::move(ss[i]);
225 
226  idx = 0; // will increment to 1 on first process
227  pos = bkt_size[0];
228 
229  // copy back finished strings in zeroth bucket
230  strptr.flip(0, pos).copy_back();
231 
232  // store lcps
233  if (strptr.with_lcp) {
234  size_t size = ss.size();
235 
236  // set lcps of zero-terminated strings
237  for (size_t i = 1; i < pos; ++i)
238  strptr.set_lcp(i, depth);
239 
240  // set lcps between non-empty bucket boundaries
241  size_t bkt = bkt_size[0], i = 1;
242  if (bkt > 0 && bkt < size)
243  strptr.set_lcp(bkt, depth);
244  while (i < 256) {
245  while (i < 256 && bkt_size[i] == 0)
246  ++i;
247  bkt += bkt_size[i];
248  if (bkt >= size)
249  break;
250  strptr.set_lcp(bkt, depth);
251  ++i;
252  }
253  }
254  }
255 };
256 
257 template <typename StringPtr>
258 static inline void
259 radixsort_CI3(const StringPtr& strptr, uint16_t* charcache,
260  size_t depth, size_t memory);
261 
262 /*
263  * Out-of-place 8-bit radix-sort with character caching.
264  */
265 template <typename StringShadowPtr>
266 static inline void
268  uint8_t* charcache, size_t depth, size_t memory) {
269 
270  typedef RadixStep_CE2<StringShadowPtr> RadixStep;
271 
272  std::stack<RadixStep, std::vector<RadixStep> > radixstack;
273  radixstack.emplace(strptr, depth, charcache);
274 
275  while (TLX_LIKELY(!radixstack.empty()))
276  {
277  while (TLX_LIKELY(radixstack.top().idx < 255))
278  {
279  RadixStep& rs = radixstack.top();
280 
281  // process the bucket rs.idx
282  size_t bkt_size = rs.bkt_size[++rs.idx];
283 
284  if (TLX_UNLIKELY(bkt_size == 0)) {
285  // done
286  }
287  else if (bkt_size < g_inssort_threshold) {
289  rs.strptr.flip(rs.pos, bkt_size).copy_back(),
290  depth + radixstack.size(),
291  memory - sizeof(RadixStep) * radixstack.size());
292  rs.pos += bkt_size;
293  }
294  else if (
295  TLX_UNLIKELY(
296  memory != 0 &&
297  memory < sizeof(RadixStep) * (radixstack.size() + 1)))
298  {
300  rs.strptr.flip(rs.pos, bkt_size).copy_back(),
301  depth + radixstack.size(),
302  memory - sizeof(RadixStep) * radixstack.size());
303  rs.pos += bkt_size;
304  }
305  else
306  {
307  // have to increment first, as rs may be invalidated
308  rs.pos += bkt_size;
309  radixstack.emplace(rs.strptr.flip(rs.pos - bkt_size, bkt_size),
310  depth + radixstack.size(), charcache);
311  }
312  }
313  radixstack.pop();
314  }
315 }
316 
317 template <typename StringPtr>
318 static inline void
319 radixsort_CE2(const StringPtr& strptr, size_t depth, size_t memory) {
320 
321  typedef typename StringPtr::StringSet StringSet;
322  const StringSet& ss = strptr.active();
323  if (ss.size() < g_inssort_threshold)
324  return insertion_sort(strptr, depth, memory);
325 
327 
328  // try to estimate the amount of memory used
329  size_t memory_use =
330  2 * sizeof(size_t) + sizeof(StringSet)
331  + ss.size() * sizeof(uint8_t)
332  + ss.size() * sizeof(typename StringSet::String);
333  size_t memory_slack = 3 * sizeof(RadixStep);
334 
335  if (memory != 0 && memory < memory_use + memory_slack + 1)
336  return radixsort_CI3(strptr, depth, memory);
337 
338  typename StringSet::Container shadow = ss.allocate(ss.size());
339  uint8_t* charcache = new uint8_t[ss.size()];
340 
341  radixsort_CE2_loop(strptr.add_shadow(StringSet(shadow)),
342  charcache, depth, memory - memory_use);
343 
344  delete[] charcache;
345  StringSet::deallocate(shadow);
346 }
347 
348 /******************************************************************************/
349 // Out-of-place adaptive radix-sort with character caching
350 
351 template <typename StringShadowPtr>
353  enum { RADIX = 0x10000 };
354 
356  size_t idx, pos, bkt_size[RADIX];
357 
359  typedef typename StringSet::Iterator Iterator;
360 
361  RadixStep_CE3(const StringShadowPtr& in_strptr, size_t depth,
362  uint16_t* charcache) : strptr(in_strptr) {
363 
364  const StringSet& ss = strptr.active();
365  const size_t n = ss.size();
366 
367  // read characters and count character occurrences
368  std::fill(bkt_size, bkt_size + RADIX, 0);
369  uint16_t* cc = charcache;
370  for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
371  *cc = ss.get_uint16(ss[i], depth);
372  for (cc = charcache; cc != charcache + n; ++cc)
373  ++bkt_size[static_cast<uint16_t>(*cc)];
374 
375  // prefix sum
376  simple_vector<Iterator> bkt_index(RADIX);
377  bkt_index[0] = strptr.shadow().begin();
378  for (size_t i = 1; i < RADIX; ++i)
379  bkt_index[i] = bkt_index[i - 1] + bkt_size[i - 1];
380 
381  // store lcps
382  if (strptr.with_lcp) {
383  // set lcps of zero-terminated strings
384  for (size_t i = 1; i < bkt_size[0]; ++i)
385  strptr.set_lcp(i, depth);
386 
387  // set lcps between non-empty bucket boundaries
388  size_t first = get_next_non_empty_bkt_index(0);
389  size_t bkt = bkt_index[first] + bkt_size[first] - bkt_index[0];
390 
391  size_t second = get_next_non_empty_bkt_index(first + 1);
392  while (second < RADIX)
393  {
394  size_t partial_equal = (first >> 8) == (second >> 8);
395  strptr.set_lcp(bkt, depth + partial_equal);
396  bkt += bkt_size[second];
397  first = second;
398  second = get_next_non_empty_bkt_index(second + 1);
399  }
400  }
401 
402  // distribute
403  cc = charcache;
404  for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
405  *(bkt_index[static_cast<uint16_t>(*cc)]++) = std::move(ss[i]);
406 
407  // will increment to 1 on first process
408  idx = 0;
409  pos = bkt_size[0];
410 
411  // copy back finished strings in zeroth bucket
412  strptr.flip(0, pos).copy_back();
413  }
414 
415  size_t get_next_non_empty_bkt_index(size_t start) {
416  if (start >= RADIX)
417  return RADIX;
418  while (start < RADIX && bkt_size[start] == 0)
419  ++start;
420  return start;
421  }
422 };
423 
424 /*
425  * Out-of-place adaptive radix-sort with character caching which starts with
426  * 16-bit radix sort and then switching to 8-bit for smaller string sets.
427  */
428 template <typename StringShadowPtr>
429 static inline void
431  uint16_t* charcache, size_t depth, size_t memory) {
432 
433  enum { RADIX = 0x10000 };
434 
435  typedef RadixStep_CE3<StringShadowPtr> RadixStep;
436 
437  std::stack<RadixStep, std::vector<RadixStep> > radixstack;
438  radixstack.emplace(strptr, depth, charcache);
439 
440  while (TLX_LIKELY(!radixstack.empty()))
441  {
442  while (TLX_LIKELY(radixstack.top().idx < RADIX - 1))
443  {
444  RadixStep& rs = radixstack.top();
445 
446  // process the bucket rs.idx
447  size_t bkt_size = rs.bkt_size[++rs.idx];
448 
449  if (TLX_UNLIKELY(bkt_size == 0)) {
450  // done
451  }
452  else if (TLX_UNLIKELY((rs.idx & 0xFF) == 0)) {
453  // zero-termination
454  rs.strptr.flip(rs.pos, bkt_size).copy_back();
455  for (size_t i = rs.pos + 1; i < rs.pos + bkt_size; ++i)
456  rs.strptr.set_lcp(i, depth + 2 * radixstack.size() - 1);
457  rs.pos += bkt_size;
458  }
459  else if (TLX_UNLIKELY(bkt_size < g_inssort_threshold))
460  {
462  rs.strptr.flip(rs.pos, bkt_size).copy_back(),
463  depth + 2 * radixstack.size(),
464  memory - sizeof(RadixStep) * radixstack.size());
465  rs.pos += bkt_size;
466  }
467  else if (bkt_size < RADIX)
468  {
470  rs.strptr.flip(rs.pos, bkt_size),
471  reinterpret_cast<uint8_t*>(charcache),
472  depth + 2 * radixstack.size(),
473  memory - sizeof(RadixStep) * radixstack.size());
474  rs.pos += bkt_size;
475  }
476  else if (
477  TLX_UNLIKELY(
478  memory != 0 &&
479  memory < sizeof(RadixStep) * (radixstack.size() + 1)))
480  {
482  rs.strptr.flip(rs.pos, bkt_size).copy_back(),
483  depth + 2 * radixstack.size(),
484  memory - sizeof(RadixStep) * radixstack.size());
485  rs.pos += bkt_size;
486  }
487  else
488  {
489  // have to increment first, as rs may be invalidated
490  rs.pos += bkt_size;
491  radixstack.emplace(
492  rs.strptr.flip(rs.pos - bkt_size, bkt_size),
493  depth + 2 * radixstack.size(), charcache);
494  }
495  }
496  radixstack.pop();
497  }
498 }
499 
500 template <typename StringPtr>
501 static inline void
502 radixsort_CE3(const StringPtr& strptr, size_t depth, size_t memory) {
503  enum { RADIX = 0x10000 };
504  typedef typename StringPtr::StringSet StringSet;
505 
506  const StringSet& ss = strptr.active();
507  if (ss.size() < g_inssort_threshold)
508  return insertion_sort(strptr, depth, memory);
509 
510  if (ss.size() < RADIX)
511  return radixsort_CE2(strptr, depth, memory);
512 
514 
515  // try to estimate the amount of memory used
516  size_t memory_use =
517  2 * sizeof(size_t) + sizeof(StringSet)
518  + ss.size() * sizeof(uint16_t)
519  + ss.size() * sizeof(typename StringSet::String);
520  size_t memory_slack = 3 * sizeof(RadixStep);
521 
522  if (memory != 0 && memory < memory_use + memory_slack + 1)
523  return radixsort_CE2(strptr, depth, memory);
524 
525  typename StringSet::Container shadow = ss.allocate(ss.size());
526  uint16_t* charcache = new uint16_t[ss.size()];
527 
528  radixsort_CE3_loop(strptr.add_shadow(StringSet(shadow)),
529  charcache, depth, memory - memory_use);
530 
531  delete[] charcache;
532  StringSet::deallocate(shadow);
533 }
534 
535 /******************************************************************************/
536 // In-place 8-bit radix-sort with character caching.
537 
538 template <typename StringPtr>
540  typedef typename StringPtr::StringSet StringSet;
541  typedef typename StringSet::Iterator Iterator;
542  typedef typename StringSet::String String;
543 
544  size_t idx, pos;
545  size_t bkt_size[256];
546 
547  RadixStep_CI2(const StringPtr& strptr,
548  size_t base, size_t depth, uint8_t* charcache) {
549 
550  const StringSet& ss = strptr.active();
551  size_t size = ss.size();
552 
553  // read characters and count character occurrences
554  std::fill(bkt_size, bkt_size + 256, 0);
555  uint8_t* cc = charcache;
556  for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
557  *cc = ss.get_uint8(ss[i], depth);
558  for (cc = charcache; cc != charcache + size; ++cc)
559  ++bkt_size[static_cast<uint8_t>(*cc)];
560 
561  // inclusive prefix sum
562  size_t bkt[256];
563  bkt[0] = bkt_size[0];
564  size_t last_bkt_size = bkt_size[0];
565  for (size_t i = 1; i < 256; ++i) {
566  bkt[i] = bkt[i - 1] + bkt_size[i];
567  if (bkt_size[i] != 0) last_bkt_size = bkt_size[i];
568  }
569 
570  // premute in-place
571  for (size_t i = 0, j; i < size - last_bkt_size; )
572  {
573  String perm = std::move(ss[ss.begin() + i]);
574  uint8_t permch = charcache[i];
575  while ((j = --bkt[permch]) > i)
576  {
577  std::swap(perm, ss[ss.begin() + j]);
578  std::swap(permch, charcache[j]);
579  }
580  ss[ss.begin() + i] = std::move(perm);
581  i += bkt_size[permch];
582  }
583 
584  // will increment idx to 1 in first step, bkt 0 is not sorted further
585  idx = 0;
586  pos = base + bkt_size[0];
587 
588  // store lcps
589  if (strptr.with_lcp) {
590  // set lcps of zero-terminated strings
591  for (size_t i = 1; i < bkt_size[0]; ++i)
592  strptr.set_lcp(i, depth);
593 
594  // set lcps between non-empty bucket boundaries
595  size_t lbkt = bkt_size[0], i = 1;
596  if (lbkt > 0 && lbkt < size)
597  strptr.set_lcp(lbkt, depth);
598  while (i < 256) {
599  while (i < 256 && bkt_size[i] == 0)
600  ++i;
601  lbkt += bkt_size[i];
602  if (lbkt >= size)
603  break;
604  strptr.set_lcp(lbkt, depth);
605  ++i;
606  }
607  }
608  }
609 };
610 
611 /*
612  * In-place 8-bit radix-sort with character caching.
613  */
614 template <typename StringPtr>
615 static inline void
616 radixsort_CI2(const StringPtr& strptr, uint8_t* charcache,
617  size_t depth, size_t memory) {
618 
619  typedef RadixStep_CI2<StringPtr> RadixStep;
620 
621  std::stack<RadixStep, std::vector<RadixStep> > radixstack;
622  radixstack.emplace(strptr, /* base */ 0, depth, charcache);
623 
624  while (TLX_LIKELY(!radixstack.empty()))
625  {
626  while (TLX_LIKELY(radixstack.top().idx < 255))
627  {
628  RadixStep& rs = radixstack.top();
629 
630  // process the bucket rs.idx
631  size_t bkt_size = rs.bkt_size[++rs.idx];
632 
633  if (TLX_UNLIKELY(bkt_size <= 1)) {
634  // done
635  rs.pos += bkt_size;
636  }
637  else if (bkt_size < g_inssort_threshold) {
639  strptr.sub(rs.pos, bkt_size),
640  depth + radixstack.size(),
641  memory - sizeof(RadixStep) * radixstack.size());
642  rs.pos += bkt_size;
643  }
644  else if (
645  TLX_UNLIKELY(
646  memory != 0 &&
647  memory < sizeof(RadixStep) * (radixstack.size() + 1)))
648  {
650  strptr.sub(rs.pos, bkt_size),
651  depth + radixstack.size(),
652  memory - sizeof(RadixStep) * radixstack.size());
653  rs.pos += bkt_size;
654  }
655  else
656  {
657  // have to increment first, as rs may be invalidated
658  rs.pos += bkt_size;
659  radixstack.emplace(
660  strptr.sub(rs.pos - bkt_size, bkt_size),
661  rs.pos - bkt_size, depth + radixstack.size(), charcache);
662  }
663  }
664  radixstack.pop();
665  }
666 }
667 
668 /*
669  * In-place 8-bit radix-sort with character caching.
670  */
671 template <typename StringPtr>
672 static inline void
673 radixsort_CI2(const StringPtr& strptr, size_t depth, size_t memory) {
674  typedef typename StringPtr::StringSet StringSet;
675 
676  if (strptr.size() < g_inssort_threshold)
677  return insertion_sort(strptr, depth, memory);
678 
679  typedef RadixStep_CI2<StringPtr> RadixStep;
680 
681  // try to estimate the amount of memory used
682  size_t memory_use =
683  2 * sizeof(size_t) + sizeof(StringSet)
684  + strptr.size() * sizeof(uint8_t);
685  size_t memory_slack = 3 * sizeof(RadixStep);
686 
687  if (memory != 0 && memory < memory_use + memory_slack + 1)
688  return multikey_quicksort(strptr, depth, memory);
689 
690  uint8_t* charcache = new uint8_t[strptr.size()];
691 
692  radixsort_CI2(strptr, charcache, depth, memory - memory_use);
693 
694  delete[] charcache;
695 }
696 
697 /******************************************************************************/
698 // In-place adaptive radix-sort with character caching
699 
700 template <typename StringPtr>
702  enum { RADIX = 0x10000 };
703 
704  typedef typename StringPtr::StringSet StringSet;
705  typedef typename StringSet::Iterator Iterator;
706  typedef typename StringSet::String String;
707 
708  size_t idx, pos;
709  size_t bkt_size[RADIX];
710 
711  RadixStep_CI3(const StringPtr& strptr, size_t base, size_t depth,
712  uint16_t* charcache) {
713 
714  const StringSet& ss = strptr.active();
715  const size_t n = ss.size();
716  // read characters and count character occurrences
717  std::fill(bkt_size, bkt_size + RADIX, 0);
718  uint16_t* cc = charcache;
719  for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
720  *cc = ss.get_uint16(ss[i], depth);
721  for (cc = charcache; cc != charcache + n; ++cc)
722  ++bkt_size[static_cast<uint16_t>(*cc)];
723 
724  // inclusive prefix sum
725  simple_vector<size_t> bkt_index(RADIX);
726  bkt_index[0] = bkt_size[0];
727  size_t last_bkt_size = bkt_size[0];
728  for (size_t i = 1; i < RADIX; ++i) {
729  bkt_index[i] = bkt_index[i - 1] + bkt_size[i];
730  if (bkt_size[i]) last_bkt_size = bkt_size[i];
731  }
732 
733  // store lcps
734  if (strptr.with_lcp) {
735  // set lcps of zero-terminated strings
736  for (size_t i = 1; i < bkt_size[0]; ++i)
737  strptr.set_lcp(i, depth);
738 
739  // set lcps between non-empty bucket boundaries
740  size_t first = get_next_non_empty_bkt_index(0);
741  size_t bkt = bkt_index[first];
742 
743  size_t second = get_next_non_empty_bkt_index(first + 1);
744  while (second < RADIX)
745  {
746  size_t partial_equal = (first >> 8) == (second >> 8);
747  strptr.set_lcp(bkt, depth + partial_equal);
748  bkt += bkt_size[second];
749  first = second;
750  second = get_next_non_empty_bkt_index(second + 1);
751  }
752  }
753 
754  // premute in-place
755  for (size_t i = 0, j; i < n - last_bkt_size; )
756  {
757  String perm = std::move(ss[ss.begin() + i]);
758  uint16_t permch = charcache[i];
759  while ((j = --bkt_index[permch]) > i)
760  {
761  std::swap(perm, ss[ss.begin() + j]);
762  std::swap(permch, charcache[j]);
763  }
764  ss[ss.begin() + i] = std::move(perm);
765  i += bkt_size[permch];
766  }
767 
768  // will increment to 1 on first process, bkt 0 is not sorted further
769  idx = 0;
770  pos = base + bkt_size[0];
771  }
772 
773  size_t get_next_non_empty_bkt_index(size_t start) {
774  if (start >= RADIX)
775  return RADIX;
776  while (start < RADIX && bkt_size[start] == 0)
777  ++start;
778  return start;
779  }
780 };
781 
782 /*
783  * In-place adaptive radix-sort with character caching which starts with 16-bit
784  * radix sort and then switching to 8-bit for smaller string sets.
785  */
786 template <typename StringPtr>
787 static inline void
788 radixsort_CI3(const StringPtr& strptr, uint16_t* charcache,
789  size_t depth, size_t memory) {
790  enum { RADIX = 0x10000 };
791 
792  typedef RadixStep_CI3<StringPtr> RadixStep;
793 
794  std::stack<RadixStep, std::vector<RadixStep> > radixstack;
795  radixstack.emplace(strptr, /* base */ 0, depth, charcache);
796 
797  while (TLX_LIKELY(!radixstack.empty()))
798  {
799  while (TLX_LIKELY(radixstack.top().idx < RADIX - 1))
800  {
801  RadixStep& rs = radixstack.top();
802 
803  // process the bucket rs.idx
804  size_t bkt_size = rs.bkt_size[++rs.idx];
805 
806  if (TLX_UNLIKELY(bkt_size <= 1)) {
807  // done
808  rs.pos += bkt_size;
809  }
810  else if (TLX_UNLIKELY((rs.idx & 0xFF) == 0)) {
811  // zero-termination
812  for (size_t i = rs.pos + 1; i < rs.pos + bkt_size; ++i)
813  strptr.set_lcp(i, depth + 2 * radixstack.size() - 1);
814 
815  rs.pos += bkt_size;
816  }
817  else if (TLX_UNLIKELY(bkt_size < g_inssort_threshold))
818  {
820  strptr.sub(rs.pos, bkt_size),
821  depth + 2 * radixstack.size(),
822  memory - sizeof(RadixStep) * radixstack.size());
823  rs.pos += bkt_size;
824  }
825  else if (bkt_size < RADIX)
826  {
828  strptr.sub(rs.pos, bkt_size),
829  reinterpret_cast<uint8_t*>(charcache),
830  depth + 2 * radixstack.size(),
831  memory - sizeof(RadixStep) * radixstack.size());
832  rs.pos += bkt_size;
833  }
834  else if (
835  TLX_UNLIKELY(
836  memory != 0 &&
837  memory < sizeof(RadixStep) * (radixstack.size() + 1)))
838  {
840  strptr.sub(rs.pos, bkt_size),
841  depth + 2 * radixstack.size(),
842  memory - sizeof(RadixStep) * radixstack.size());
843  rs.pos += bkt_size;
844  }
845  else
846  {
847  // have to increment first, as rs may be invalidated
848  rs.pos += bkt_size;
849  radixstack.emplace(
850  strptr.sub(rs.pos - bkt_size, bkt_size),
851  /* base */ rs.pos - bkt_size,
852  depth + 2 * radixstack.size(), charcache);
853  }
854  }
855  radixstack.pop();
856  }
857 }
858 
859 template <typename StringPtr>
860 static inline void
861 radixsort_CI3(const StringPtr& strptr, size_t depth, size_t memory) {
862  enum { RADIX = 0x10000 };
863  typedef typename StringPtr::StringSet StringSet;
864 
865  if (strptr.size() < g_inssort_threshold)
866  return insertion_sort(strptr, depth, memory);
867 
868  if (strptr.size() < RADIX)
869  return radixsort_CI2(strptr, depth, memory);
870 
871  typedef RadixStep_CI3<StringPtr> RadixStep;
872 
873  // try to estimate the amount of memory used
874  size_t memory_use =
875  2 * sizeof(size_t) + sizeof(StringSet)
876  + strptr.size() * sizeof(uint16_t);
877  size_t memory_slack = 3 * sizeof(RadixStep);
878 
879  if (memory != 0 && memory < memory_use + memory_slack + 1)
880  return radixsort_CI2(strptr, depth, memory);
881 
882  uint16_t* charcache = new uint16_t[strptr.size()];
883  radixsort_CI3(strptr, charcache, depth, memory - memory_use);
884  delete[] charcache;
885 }
886 
887 /******************************************************************************/
888 
889 } // namespace sort_strings_detail
890 
891 //! \}
892 
893 } // namespace tlx
894 
895 #endif // !TLX_SORT_STRINGS_RADIX_SORT_HEADER
896 
897 /******************************************************************************/
size_t size() const
return valid length
Definition: string_ptr.hpp:66
StringShadowPtr::StringSet StringSet
Definition: radix_sort.hpp:52
const StringSet & active() const
return currently active array
Definition: string_ptr.hpp:189
RadixStep_CI3(const StringPtr &strptr, size_t base, size_t depth, uint16_t *charcache)
Definition: radix_sort.hpp:711
RadixStep_CE0(const StringShadowPtr &in_strptr, size_t depth)
Definition: radix_sort.hpp:55
static void multikey_quicksort(const StringPtr &strptr, size_t depth, size_t memory)
Generic multikey quicksort for strings.
size_t get_next_non_empty_bkt_index(size_t start)
Definition: radix_sort.hpp:773
size_t get_next_non_empty_bkt_index(size_t start)
Definition: radix_sort.hpp:415
StringShadowPtr::StringSet StringSet
Definition: radix_sort.hpp:198
Simpler non-growing vector without initialization.
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
Definition: string_ptr.hpp:79
#define TLX_UNLIKELY(c)
Definition: likely.hpp:24
RadixStep_CI2(const StringPtr &strptr, size_t base, size_t depth, uint8_t *charcache)
Definition: radix_sort.hpp:547
static void radixsort_CE2(const StringPtr &strptr, size_t depth, size_t memory)
Definition: radix_sort.hpp:319
#define TLX_LIKELY(c)
Definition: likely.hpp:23
static void radixsort_CE0(const StringPtr &strptr, size_t depth, size_t memory)
Adapter method to allocate shadow array if needed.
Definition: radix_sort.hpp:166
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
Definition: string_ptr.hpp:234
StringPtr sub(size_t offset, size_t sub_size) const
Advance (both) pointers by given offset, return sub-array.
Definition: string_ptr.hpp:69
static const size_t g_inssort_threshold
Definition: radix_sort.hpp:42
StringShadowPtr flip(size_t offset, size_t sub_size) const
Definition: string_ptr.hpp:210
static void radixsort_CE0_loop(const StringShadowPtr &strptr, size_t depth, size_t memory)
Definition: radix_sort.hpp:112
static enable_if<!StringPtr::with_lcp, void >::type insertion_sort(const StringPtr &strptr, size_t depth, size_t)
static const bool with_lcp
if we want to save the LCPs
Definition: string_ptr.hpp:75
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
RadixStep_CE2(const StringShadowPtr &in_strptr, size_t depth, uint8_t *charcache)
Definition: radix_sort.hpp:201
static void radixsort_CE3(const StringPtr &strptr, size_t depth, size_t memory)
Definition: radix_sort.hpp:502
const StringSet & active() const
return currently active array
Definition: string_ptr.hpp:63
static const bool with_lcp
if we want to save the LCPs
Definition: string_ptr.hpp:230
WithShadow add_shadow(const StringSet &shadow) const
construct objectified string and shadow pointer class
Definition: string_ptr.hpp:343
static void radixsort_CE3_loop(const StringShadowPtr &strptr, uint16_t *charcache, size_t depth, size_t memory)
Definition: radix_sort.hpp:430
size_t size() const
return valid length
Definition: string_ptr.hpp:198
static void radixsort_CE2_loop(const StringShadowPtr &strptr, uint8_t *charcache, size_t depth, size_t memory)
Definition: radix_sort.hpp:267
RadixStep_CE3(const StringShadowPtr &in_strptr, size_t depth, uint16_t *charcache)
Definition: radix_sort.hpp:361
const StringSet & shadow() const
return current shadow array
Definition: string_ptr.hpp:192
StringShadowPtr::StringSet StringSet
Definition: radix_sort.hpp:358
static void radixsort_CI2(const StringPtr &strptr, uint8_t *charcache, size_t depth, size_t memory)
Definition: radix_sort.hpp:616
static void radixsort_CI3(const StringPtr &strptr, uint16_t *charcache, size_t depth, size_t memory)
Definition: radix_sort.hpp:788
Objectified string array pointer array.
Definition: string_ptr.hpp:47