Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
multikey_quicksort.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/sort/strings/multikey_quicksort.hpp
3  *
4  * Generic multikey quicksort for strings. This is an internal implementation
5  * header, see tlx/sort/strings.hpp for public front-end functions.
6  *
7  * Based on multikey quicksort, a quick sort algorithm for arrays of character
8  * strings by Bentley and Sedgewick.
9  *
10  * J. Bentley and R. Sedgewick. "Fast Algorithms for Sorting and Searching
11  * Strings." In Proceedings of 8th Annual ACM-SIAM Symposium on Discrete
12  * Algorithms, 1997.
13  *
14  * http://www.cs.princeton.edu/~rs/strings/index.html
15  *
16  * Part of tlx - http://panthema.net/tlx
17  *
18  * Copyright (C) 2015-2018 Timo Bingmann <[email protected]>
19  *
20  * All rights reserved. Published under the Boost Software License, Version 1.0
21  ******************************************************************************/
22 
23 #ifndef TLX_SORT_STRINGS_MULTIKEY_QUICKSORT_HEADER
24 #define TLX_SORT_STRINGS_MULTIKEY_QUICKSORT_HEADER
25 
27 
28 #include <algorithm>
29 #include <cstddef>
30 #include <utility>
31 
32 namespace tlx {
33 namespace sort_strings_detail {
34 
35 template <typename StringSet>
36 static inline void vec_swap(
37  typename StringSet::Iterator a, typename StringSet::Iterator b, size_t n) {
38  while (n-- > 0)
39  std::swap(*a++, *b++);
40 }
41 
42 template <typename StringSet>
43 static inline typename StringSet::Iterator med3func(
44  const StringSet& ss,
45  typename StringSet::Iterator a, typename StringSet::Iterator b,
46  typename StringSet::Iterator c, size_t depth) {
47  typename StringSet::Char va = ss.get_char(*a, depth);
48  typename StringSet::Char vb = ss.get_char(*b, depth);
49  if (va == vb)
50  return a;
51  typename StringSet::Char vc = ss.get_char(*c, depth);
52  if (vc == va || vc == vb)
53  return c;
54  return va < vb
55  ? (vb < vc ? b : (va < vc ? c : a))
56  : (vb > vc ? b : (va < vc ? a : c));
57 }
58 
59 /*!
60  * Generic multikey quicksort for strings. Based on multikey quicksort, a quick
61  * sort algorithm for arrays of character strings by Bentley and Sedgewick. This
62  * method requires up to O(maxlcp) memory due to the recursion stack and it runs
63  * in expected time O(D + n log n) and worst-case time O(D + n^2).
64  *
65  * J. Bentley and R. Sedgewick. Fast algorithms for sorting and searching
66  * strings. In Proceedings of 8th Annual ACM-SIAM Symposium on Discrete
67  * Algorithms, 1997.
68  */
69 template <typename StringSet>
70 static inline void multikey_quicksort(
71  const StringSet& ss, size_t depth, size_t memory) {
72  typedef typename StringSet::Iterator Iterator;
73 
74  const Iterator a = ss.begin();
75  size_t n = ss.size();
76 
77  // try to estimate the amount of memory in a stack frame
78  static const size_t memory_use =
79  2 * sizeof(size_t) + sizeof(StringSet) + 5 * sizeof(Iterator);
80 
81  if (n < 32 || (memory != 0 && memory < memory_use + 1)) {
82  return insertion_sort(ss, depth, memory);
83  }
84 
85  ptrdiff_t r;
86  Iterator pa, pb, pc, pd, pn;
87 
88  {
89  Iterator pl = a;
90  Iterator pm = a + (n / 2);
91  pn = a + (n - 1);
92  if (n > 30) {
93  // on big arrays: pseudomedian of 9
94  size_t d = (n / 8);
95  pl = med3func(ss, pl, pl + d, pl + 2 * d, depth);
96  pm = med3func(ss, pm - d, pm, pm + d, depth);
97  pn = med3func(ss, pn - 2 * d, pn - d, pn, depth);
98  }
99  pm = med3func(ss, pl, pm, pn, depth);
100  std::swap(*a, *pm);
101  int pivot = ss.get_char(*a, depth);
102  pa = pb = a + 1;
103  pc = pd = a + n - 1;
104  for ( ; ; ) {
105  while (pb <= pc && (r = static_cast<int>(ss.get_char(*pb, depth)) - pivot) <= 0) {
106  if (r == 0) std::swap(*pa++, *pb);
107  pb++;
108  }
109  while (pb <= pc && (r = static_cast<int>(ss.get_char(*pc, depth)) - pivot) >= 0) {
110  if (r == 0) std::swap(*pc, *pd--);
111  pc--;
112  }
113  if (pb > pc) break;
114  std::swap(*pb++, *pc--);
115  }
116  pn = a + n;
117 
118  r = std::min(pa - a, pb - pa);
119  vec_swap<StringSet>(a, pb - r, r);
120  r = std::min(pd - pc, pn - pd - 1);
121  vec_swap<StringSet>(pb, pn - r, r);
122  }
123 
124  if ((r = pb - pa) > 1)
125  multikey_quicksort(ss.sub(a, a + r), depth, memory - memory_use);
126  if (ss.get_char(*(a + r), depth) != 0)
127  multikey_quicksort(ss.sub(a + r, a + r + (pa - a) + (pn - pd - 1)),
128  depth + 1, memory - memory_use);
129  if ((r = pd - pc) > 1)
130  multikey_quicksort(ss.sub(a + n - r, a + n),
131  depth, memory - memory_use);
132 }
133 
134 } // namespace sort_strings_detail
135 } // namespace tlx
136 
137 #endif // !TLX_SORT_STRINGS_MULTIKEY_QUICKSORT_HEADER
138 
139 /******************************************************************************/
static void vec_swap(typename StringSet::Iterator a, typename StringSet::Iterator b, size_t n)
static void insertion_sort(const StringSet &ss, size_t depth, size_t)
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
static uint_pair min()
return an uint_pair instance containing the smallest value possible
Definition: uint_types.hpp:217
static void multikey_quicksort(const StringSet &ss, size_t depth, size_t memory)
Generic multikey quicksort for strings.
static StringSet::Iterator med3func(const StringSet &ss, typename StringSet::Iterator a, typename StringSet::Iterator b, typename StringSet::Iterator c, size_t depth)