Thrill  0.1
radix_sort.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/common/radix_sort.hpp
3  *
4  * An implementations of generic 8-bit radix sort using key caching (requires n
5  * extra bytes of memory) and in-place permutation reordering.
6  *
7  * Part of Project Thrill - http://project-thrill.org
8  *
9  * Copyright (C) 2012-2016 Timo Bingmann <[email protected]>
10  *
11  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
12  ******************************************************************************/
13 
14 #pragma once
15 #ifndef THRILL_COMMON_RADIX_SORT_HEADER
16 #define THRILL_COMMON_RADIX_SORT_HEADER
17 
19 #include <thrill/common/logger.hpp>
21 
22 #include <algorithm>
23 #include <functional>
24 #include <type_traits>
25 
26 namespace thrill {
27 namespace common {
28 
29 /*!
30  * Internal helper method, use radix_sort_CI below.
31  */
32 template <
33  size_t MaxDepth, typename Iterator, typename Char,
34  typename Comparator =
35  std::less<typename std::iterator_traits<Iterator>::value_type>,
36  typename SubSorter = tlx::NoOperation<void> >
37 static inline
38 void radix_sort_CI(Iterator begin, Iterator end, size_t K,
39  const Comparator& cmp,
40  const SubSorter& sub_sort, size_t depth,
41  Char* char_cache) {
42 
43  const size_t size = end - begin;
44  if (size < 32)
45  return std::sort(begin, end, cmp);
46 
47  using value_type = typename std::iterator_traits<Iterator>::value_type;
48 
49  // cache characters
50  Char* cc = char_cache;
51  for (Iterator it = begin; it != end; ++it, ++cc) {
52  *cc = it->at_radix(depth);
53  assert(static_cast<size_t>(*cc) < K);
54  }
55 
56  // count character occurrences
57  size_t* bkt_size = reinterpret_cast<size_t*>(alloca(K * sizeof(size_t)));
58  std::fill(bkt_size, bkt_size + K, 0);
59  for (const Char* cci = char_cache; cci != char_cache + size; ++cci)
60  ++bkt_size[*cci];
61 
62  // inclusive prefix sum
63  size_t* bkt_index = reinterpret_cast<size_t*>(alloca(K * sizeof(size_t)));
64  bkt_index[0] = bkt_size[0];
65  size_t last_bkt_size = bkt_size[0];
66  for (size_t i = 1; i < K; ++i) {
67  bkt_index[i] = bkt_index[i - 1] + bkt_size[i];
68  if (bkt_size[i]) last_bkt_size = bkt_size[i];
69  }
70 
71  // premute in-place
72  for (size_t i = 0, j; i < size - last_bkt_size; )
73  {
74  value_type v = std::move(begin[i]);
75  Char vc = std::move(char_cache[i]);
76  while ((j = --bkt_index[vc]) > i)
77  {
78  using std::swap;
79  swap(v, begin[j]);
80  swap(vc, char_cache[j]);
81  }
82  begin[i] = std::move(v);
83  i += bkt_size[vc];
84  }
85 
86  if (depth + 1 == MaxDepth) {
87  // allow post-radix sorting when max depth is reached
88  size_t bsum = 0;
89  for (size_t i = 0; i < K; bsum += bkt_size[i++]) {
90  if (bkt_size[i] <= 1) continue;
91  sub_sort(begin + bsum, begin + bsum + bkt_size[i], cmp);
92  }
93  }
94  else {
95  // recurse
96  size_t bsum = 0;
97  for (size_t i = 0; i < K; bsum += bkt_size[i++]) {
98  if (bkt_size[i] <= 1) continue;
99  radix_sort_CI<MaxDepth>(
100  begin + bsum, begin + bsum + bkt_size[i],
101  K, cmp, sub_sort, depth + 1, char_cache);
102  }
103  }
104 }
105 
106 /*!
107  * Radix sort the iterator range [begin,end). Sort unconditionally up to depth
108  * MaxDepth, then call the sub_sort method for further sorting. Small buckets
109  * are sorted using std::sort() with given comparator. Characters are extracted
110  * from items in the range using the at_radix(depth) method. All character
111  * values must be less than K (the counting array size).
112  */
113 template <
114  size_t MaxDepth, typename Iterator,
115  typename Comparator =
116  std::less<typename std::iterator_traits<Iterator>::value_type>,
117  typename SubSorter = tlx::NoOperation<void> >
118 static inline
119 void radix_sort_CI(Iterator begin, Iterator end, size_t K,
120  const Comparator& cmp = Comparator(),
121  const SubSorter& sub_sort = SubSorter()) {
122 
123  if (MaxDepth == 0) {
124  // allow post-radix sorting when max depth is reached
125  sub_sort(begin, end, cmp);
126  return;
127  }
128 
129  const size_t size = end - begin;
130 
131  using CharRet = decltype(begin->at_radix(0));
132  using Char = typename std::remove_cv<
133  typename std::remove_reference<CharRet>::type>::type;
134 
135  // allocate character cache once
136  Char* char_cache = new Char[size];
137  radix_sort_CI<MaxDepth>(
138  begin, end, K, cmp, sub_sort, /* depth */ 0, char_cache);
139  delete[] char_cache;
140 }
141 
142 /*!
143  * SortAlgorithm class for use with api::Sort() which calls radix_sort_CI() if K
144  * is small enough.
145  */
146 template <typename Type, size_t MaxDepth>
148 {
149 public:
150  explicit RadixSort(size_t K) : K_(K) { }
151  template <typename Iterator, typename CompareFunction>
152  void operator () (Iterator begin, Iterator end,
153  const CompareFunction& cmp) const {
154  if (K_ < 4096)
155  thrill::common::radix_sort_CI<MaxDepth>(begin, end, K_, cmp);
156  else
157  std::sort(begin, end, cmp);
158  }
159 
160 private:
161  const size_t K_;
162 };
163 
164 } // namespace common
165 } // namespace thrill
166 
167 #endif // !THRILL_COMMON_RADIX_SORT_HEADER
168 
169 /******************************************************************************/
Specialized noop functor which returns a void.
static void radix_sort_CI(Iterator begin, Iterator end, size_t K, const Comparator &cmp, const SubSorter &sub_sort, size_t depth, Char *char_cache)
Internal helper method, use radix_sort_CI below.
Definition: radix_sort.hpp:38
void operator()(Iterator begin, Iterator end, const CompareFunction &cmp) const
Definition: radix_sort.hpp:152
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
static const uint64_t K[80]
Definition: sha512.cpp:32
SortAlgorithm class for use with api::Sort() which calls radix_sort_CI() if K is small enough...
Definition: radix_sort.hpp:147