Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
radix_sort.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/common/radix_sort.hpp
3  *
4  * An implementations of generic 8-bit radix sort using key caching (requires n
5  * extra bytes of memory) and in-place permutation reordering.
6  *
7  * Part of Project Thrill - http://project-thrill.org
8  *
9  * Copyright (C) 2012-2016 Timo Bingmann <[email protected]>
10  *
11  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
12  ******************************************************************************/
13 
14 #pragma once
15 #ifndef THRILL_COMMON_RADIX_SORT_HEADER
16 #define THRILL_COMMON_RADIX_SORT_HEADER
17 
19 #include <thrill/common/logger.hpp>
20 
21 #include <algorithm>
22 #include <functional>
23 #include <type_traits>
24 
25 namespace thrill {
26 namespace common {
27 
28 /*!
29  * Internal helper method, use radix_sort_CI below.
30  */
31 template <
32  size_t MaxDepth, typename Iterator, typename Char,
33  typename Comparator =
34  std::less<typename std::iterator_traits<Iterator>::value_type>,
35  typename SubSorter = NoOperation<void> >
36 static inline
37 void radix_sort_CI(Iterator begin, Iterator end, size_t K,
38  const Comparator& cmp,
39  const SubSorter& sub_sort, size_t depth,
40  Char* char_cache) {
41 
42  const size_t size = end - begin;
43  if (size < 32)
44  return std::sort(begin, end, cmp);
45 
46  using value_type = typename std::iterator_traits<Iterator>::value_type;
47 
48  // cache characters
49  Char* cc = char_cache;
50  for (Iterator it = begin; it != end; ++it, ++cc) {
51  *cc = it->at_radix(depth);
52  assert(static_cast<size_t>(*cc) < K);
53  }
54 
55  // count character occurrences
56  size_t* bkt_size = reinterpret_cast<size_t*>(alloca(K * sizeof(size_t)));
57  std::fill(bkt_size, bkt_size + K, 0);
58  for (const Char* cci = char_cache; cci != char_cache + size; ++cci)
59  ++bkt_size[*cci];
60 
61  // inclusive prefix sum
62  size_t* bkt_index = reinterpret_cast<size_t*>(alloca(K * sizeof(size_t)));
63  bkt_index[0] = bkt_size[0];
64  size_t last_bkt_size = bkt_size[0];
65  for (size_t i = 1; i < K; ++i) {
66  bkt_index[i] = bkt_index[i - 1] + bkt_size[i];
67  if (bkt_size[i]) last_bkt_size = bkt_size[i];
68  }
69 
70  // premute in-place
71  for (size_t i = 0, j; i < size - last_bkt_size; )
72  {
73  value_type v = std::move(begin[i]);
74  Char vc = std::move(char_cache[i]);
75  while ((j = --bkt_index[vc]) > i)
76  {
77  using std::swap;
78  swap(v, begin[j]);
79  swap(vc, char_cache[j]);
80  }
81  begin[i] = std::move(v);
82  i += bkt_size[vc];
83  }
84 
85  if (depth + 1 == MaxDepth) {
86  // allow post-radix sorting when max depth is reached
87  size_t bsum = 0;
88  for (size_t i = 0; i < K; bsum += bkt_size[i++]) {
89  if (bkt_size[i] <= 1) continue;
90  sub_sort(begin + bsum, begin + bsum + bkt_size[i], cmp);
91  }
92  }
93  else {
94  // recurse
95  size_t bsum = 0;
96  for (size_t i = 0; i < K; bsum += bkt_size[i++]) {
97  if (bkt_size[i] <= 1) continue;
98  radix_sort_CI<MaxDepth>(
99  begin + bsum, begin + bsum + bkt_size[i],
100  K, cmp, sub_sort, depth + 1, char_cache);
101  }
102  }
103 }
104 
105 /*!
106  * Radix sort the iterator range [begin,end). Sort unconditionally up to depth
107  * MaxDepth, then call the sub_sort method for further sorting. Small buckets
108  * are sorted using std::sort() with given comparator. Characters are extracted
109  * from items in the range using the at_radix(depth) method. All character
110  * values must be less than K (the counting array size).
111  */
112 template <
113  size_t MaxDepth, typename Iterator,
114  typename Comparator =
115  std::less<typename std::iterator_traits<Iterator>::value_type>,
116  typename SubSorter = NoOperation<void> >
117 static inline
118 void radix_sort_CI(Iterator begin, Iterator end, size_t K,
119  const Comparator& cmp = Comparator(),
120  const SubSorter& sub_sort = SubSorter()) {
121 
122  if (MaxDepth == 0) {
123  // allow post-radix sorting when max depth is reached
124  sub_sort(begin, end, cmp);
125  return;
126  }
127 
128  const size_t size = end - begin;
129 
130  using CharRet = decltype(begin->at_radix(0));
131  using Char = typename std::remove_cv<
132  typename std::remove_reference<CharRet>::type>::type;
133 
134  // allocate character cache once
135  Char* char_cache = new Char[size];
136  radix_sort_CI<MaxDepth>(
137  begin, end, K, cmp, sub_sort, /* depth */ 0, char_cache);
138  delete[] char_cache;
139 }
140 
141 /*!
142  * SortAlgorithm class for use with api::Sort() which calls radix_sort_CI() if K
143  * is small enough.
144  */
145 template <typename Type, size_t MaxDepth>
147 {
148 public:
149  explicit RadixSort(size_t K) : K_(K) { }
150  template <typename Iterator, typename CompareFunction>
151  void operator () (Iterator begin, Iterator end,
152  const CompareFunction& cmp) const {
153  if (K_ < 4096)
154  thrill::common::radix_sort_CI<MaxDepth>(begin, end, K_, cmp);
155  else
156  std::sort(begin, end, cmp);
157  }
158 
159 private:
160  const size_t K_;
161 };
162 
163 } // namespace common
164 } // namespace thrill
165 
166 #endif // !THRILL_COMMON_RADIX_SORT_HEADER
167 
168 /******************************************************************************/
static void radix_sort_CI(Iterator begin, Iterator end, size_t K, const Comparator &cmp, const SubSorter &sub_sort, size_t depth, Char *char_cache)
Internal helper method, use radix_sort_CI below.
Definition: radix_sort.hpp:37
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
void operator()(Iterator begin, Iterator end, const CompareFunction &cmp) const
Definition: radix_sort.hpp:151
SortAlgorithm class for use with api::Sort() which calls radix_sort_CI() if K is small enough...
Definition: radix_sort.hpp:146