Thrill  0.1
insertion_sort.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/sort/strings/insertion_sort.hpp
3  *
4  * Base insertion string sort. This is an internal implementation header, see
5  * tlx/sort/strings.hpp for public front-end functions.
6  *
7  * Part of tlx - http://panthema.net/tlx
8  *
9  * Copyright (C) 2015-2019 Timo Bingmann <[email protected]>
10  *
11  * All rights reserved. Published under the Boost Software License, Version 1.0
12  ******************************************************************************/
13 
14 #ifndef TLX_SORT_STRINGS_INSERTION_SORT_HEADER
15 #define TLX_SORT_STRINGS_INSERTION_SORT_HEADER
16 
17 #include <tlx/define/likely.hpp>
18 #include <tlx/meta/enable_if.hpp>
20 
21 namespace tlx {
22 
23 //! \addtogroup tlx_sort
24 //! \{
25 
26 namespace sort_strings_detail {
27 
28 /******************************************************************************/
29 
30 //! Generic insertion sort for abstract string sets. This method only requires
31 //! O(1) additional memory for sorting n strings, but runs in time O(nD).
32 template <typename StringPtr>
33 static inline
35 insertion_sort(const StringPtr& strptr, size_t depth, size_t /* memory */) {
36  typedef typename StringPtr::StringSet StringSet;
37  typedef typename StringSet::Iterator Iterator;
38  typedef typename StringSet::String String;
39  typedef typename StringSet::CharIterator CharIterator;
40 
41  // this stores the begin iterator and size n, making the loops faster
42  const typename StringPtr::StringSet& ss = strptr.active();
43  size_t n = ss.size();
44  if (n <= 1) return;
45 
46  const Iterator begin = ss.begin();
47  Iterator j;
48 
49  for (Iterator i = begin + 1; TLX_UNLIKELY(--n != 0); ++i)
50  {
51  String tmp = std::move(ss[i]);
52  j = i;
53 
54  while (TLX_LIKELY(j != begin))
55  {
56  CharIterator s = ss.get_chars(ss[j - 1], depth);
57  CharIterator t = ss.get_chars(tmp, depth);
58 
59  while (TLX_LIKELY(ss.is_equal(ss[j - 1], s, tmp, t)))
60  ++s, ++t;
61 
62  if (TLX_UNLIKELY(ss.is_leq(ss[j - 1], s, tmp, t))) {
63  break;
64  }
65 
66  ss[j] = std::move(ss[j - 1]);
67  --j;
68  }
69 
70  ss[j] = std::move(tmp);
71  }
72 }
73 
74 /******************************************************************************/
75 
76 //! LCP insertion sort for abstract string sets. Enabled via SFINAE if
77 //! StringPtr::with_lcp is true. This method only requires O(1) additional
78 //! memory for sorting n strings, and runs in time O(n^2 + D).
79 template <typename StringPtr>
80 static inline
82 insertion_sort(const StringPtr& strptr, size_t depth, size_t /* memory */) {
83  typedef typename StringPtr::StringSet StringSet;
84  typedef typename StringPtr::LcpType LcpType;
85  typedef typename StringSet::Iterator Iterator;
86  typedef typename StringSet::String String;
87  typedef typename StringSet::CharIterator CharIterator;
88 
89  // this stores the begin iterator and size n, making the loops faster
90  const StringSet& ss = strptr.active();
91  size_t n = ss.size();
92  if (n <= 1) return;
93 
94  const Iterator begin = ss.begin();
95 
96  for (size_t j = 0; j < n - 1; ++j)
97  {
98  // insert strings[j] into sorted strings[0..j-1]
99 
100  String new_str = std::move(ss[begin + j]);
101  LcpType new_lcp = depth; // start with LCP depth
102 
103  size_t i = j;
104  while (i > 0)
105  {
106  LcpType prev_lcp = new_lcp;
107 
108  String cur_str = std::move(ss[begin + i - 1]);
109  LcpType cur_lcp = strptr.get_lcp(i);
110 
111  if (cur_lcp < new_lcp)
112  {
113  // CASE 1: lcp goes down -> insert string
114 
115  // move comparison string back
116  ss[begin + i - 1] = std::move(cur_str);
117  break;
118  }
119  else if (cur_lcp == new_lcp)
120  {
121  // CASE 2: compare more characters
122 
123  CharIterator c1 = ss.get_chars(new_str, new_lcp);
124  CharIterator c2 = ss.get_chars(cur_str, new_lcp);
125 
126  while (ss.is_equal(new_str, c1, cur_str, c2))
127  ++c1, ++c2, ++new_lcp;
128 
129  // if (new_str >= curr_str) -> insert string
130  if (!ss.is_less(new_str, c1, cur_str, c2))
131  {
132  // update lcp of prev (smaller string) with inserted string
133  strptr.set_lcp(i, new_lcp);
134  // lcp of inserted string with next string
135  new_lcp = prev_lcp;
136 
137  // move comparison string back
138  ss[begin + i - 1] = std::move(cur_str);
139  break;
140  }
141  }
142  // else (cur_lcp > new_lcp), CASE 3: nothing to do
143 
144  ss[begin + i] = std::move(cur_str);
145  strptr.set_lcp(i + 1, cur_lcp);
146 
147  --i;
148  }
149 
150  ss[begin + i] = std::move(new_str);
151  strptr.set_lcp(i + 1, new_lcp);
152  }
153 
154  // last loop specialized with checks for out-of-bound access to lcp.
155  {
156  size_t j = n - 1;
157 
158  // insert strings[j] into sorted strings[0..j-1]
159 
160  String new_str = std::move(ss[begin + j]);
161  LcpType new_lcp = depth; // start with LCP depth
162 
163  size_t i = j;
164  while (i > 0)
165  {
166  LcpType prev_lcp = new_lcp;
167 
168  String cur_str = std::move(ss[begin + i - 1]);
169  LcpType cur_lcp = strptr.get_lcp(i);
170 
171  if (cur_lcp < new_lcp)
172  {
173  // CASE 1: lcp goes down -> insert string
174 
175  // move comparison string back
176  ss[begin + i - 1] = std::move(cur_str);
177  break;
178  }
179  else if (cur_lcp == new_lcp)
180  {
181  // CASE 2: compare more characters
182 
183  CharIterator c1 = ss.get_chars(new_str, new_lcp);
184  CharIterator c2 = ss.get_chars(cur_str, new_lcp);
185 
186  while (ss.is_equal(new_str, c1, cur_str, c2))
187  ++c1, ++c2, ++new_lcp;
188 
189  // if (new_str >= curr_str) -> insert string
190  if (!ss.is_less(new_str, c1, cur_str, c2))
191  {
192  // update lcp of prev (smaller string) with inserted string
193  strptr.set_lcp(i, new_lcp);
194  // lcp of inserted string with next string
195  new_lcp = prev_lcp;
196 
197  // move comparison string back
198  ss[begin + i - 1] = std::move(cur_str);
199  break;
200  }
201  }
202  // else (cur_lcp > new_lcp), CASE 3: nothing to do
203 
204  ss[begin + i] = std::move(cur_str);
205  if (i + 1 < n) // check out-of-bounds copy
206  strptr.set_lcp(i + 1, cur_lcp);
207 
208  --i;
209  }
210 
211  ss[begin + i] = std::move(new_str);
212  if (i + 1 < n) { // check out-of-bounds save
213  strptr.set_lcp(i + 1, new_lcp);
214  }
215  }
216 }
217 
218 /******************************************************************************/
219 
220 } // namespace sort_strings_detail
221 
222 //! \}
223 
224 } // namespace tlx
225 
226 #endif // !TLX_SORT_STRINGS_INSERTION_SORT_HEADER
227 
228 /******************************************************************************/
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
Definition: string_ptr.hpp:79
#define TLX_UNLIKELY(c)
Definition: likely.hpp:24
#define TLX_LIKELY(c)
Definition: likely.hpp:23
static enable_if<!StringPtr::with_lcp, void >::type insertion_sort(const StringPtr &strptr, size_t depth, size_t)
const StringSet & active() const
return currently active array
Definition: string_ptr.hpp:63
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.
Definition: enable_if.hpp:21
Objectified string array pointer array.
Definition: string_ptr.hpp:47