Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
multiway_merge.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/core/multiway_merge.hpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
11 #pragma once
12 #ifndef THRILL_CORE_MULTIWAY_MERGE_HEADER
13 #define THRILL_CORE_MULTIWAY_MERGE_HEADER
14 
16 
17 #include <algorithm>
18 #include <functional>
19 #include <utility>
20 #include <vector>
21 
22 namespace thrill {
23 namespace core {
24 
25 template <
26  typename ValueType,
27  typename ReaderIterator,
28  typename Comparator,
29  bool Stable = false>
31 {
32 public:
33  using Reader = typename std::iterator_traits<ReaderIterator>::value_type;
34 
35  using LoserTreeType = tlx::LoserTree<
36  /* stable */ Stable, ValueType, Comparator>;
37 
38  MultiwayMergeTree(ReaderIterator readers_begin, ReaderIterator readers_end,
39  const Comparator& comp)
40  : readers_(readers_begin),
41  num_inputs_(static_cast<unsigned>(readers_end - readers_begin)),
43  lt_(static_cast<unsigned>(num_inputs_), comp),
45 
46  for (unsigned t = 0; t < num_inputs_; ++t)
47  {
48  if (TLX_LIKELY(readers_[t].HasNext())) {
49  current_[t].first = true;
50  current_[t].second = readers_[t].template Next<ValueType>();
51  lt_.insert_start(&current_[t].second, t, false);
52  }
53  else {
54  current_[t].first = false;
55  lt_.insert_start(nullptr, t, true);
56  assert(remaining_inputs_ > 0);
58  }
59  }
60 
61  lt_.init();
62  }
63 
64  bool HasNext() const {
65  return (remaining_inputs_ != 0);
66  }
67 
68  std::pair<ValueType, unsigned> NextWithSource() {
69  // take next smallest element out
70  unsigned top = lt_.min_source();
71  ValueType res = std::move(current_[top].second);
72 
73  if (TLX_LIKELY(readers_[top].HasNext())) {
74  current_[top].first = true;
75  current_[top].second = readers_[top].template Next<ValueType>();
76  lt_.delete_min_insert(&current_[top].second, false);
77  }
78  else {
79  current_[top].first = false;
80  lt_.delete_min_insert(nullptr, true);
81  assert(remaining_inputs_ > 0);
83  }
84 
85  return std::make_pair(res, top);
86  }
87 
88  ValueType Next() {
89 
90  // take next smallest element out
91  unsigned top = lt_.min_source();
92  ValueType res = std::move(current_[top].second);
93 
94  if (TLX_LIKELY(readers_[top].HasNext())) {
95  current_[top].first = true;
96  current_[top].second = readers_[top].template Next<ValueType>();
97  lt_.delete_min_insert(&current_[top].second, false);
98  }
99  else {
100  current_[top].first = false;
101  lt_.delete_min_insert(nullptr, true);
102  assert(remaining_inputs_ > 0);
104  }
105 
106  return res;
107  }
108 
109 private:
110  ReaderIterator readers_;
111  unsigned num_inputs_;
113 
115  //! current values in each input (exist flag, value)
116  std::vector<std::pair<bool, ValueType> > current_;
117 };
118 
119 /*!
120  * Sequential multi-way merging switch for a file writer as output
121  *
122  * The decision if based on the branching factor and runtime settings.
123  *
124  * \param seqs_begin Begin iterator of iterator pair input sequence.
125  * \param seqs_end End iterator of iterator pair input sequence.
126  * \param comp Comparator.
127  * \tparam Stable Stable merging incurs a performance penalty.
128  * \tparam Sentinels The sequences have a sentinel element.
129  * \return End iterator of output sequence.
130  */
131 template <typename ValueType, typename ReaderIterator,
132  typename Comparator = std::less<ValueType> >
134  ReaderIterator seqs_begin, ReaderIterator seqs_end,
135  const Comparator& comp = Comparator()) {
136 
137  assert(seqs_end - seqs_begin >= 1);
139  seqs_begin, seqs_end, comp);
140 }
141 
142 /*!
143  * Sequential multi-way merging switch for a file writer as output
144  *
145  * The decision if based on the branching factor and runtime settings.
146  *
147  * The merging is stable, ie, if the two next elements are equal according to
148  * the comparator, that of the file with the lower index is taken.
149  *
150  * \param seqs_begin Begin iterator of iterator pair input sequence.
151  * \param seqs_end End iterator of iterator pair input sequence.
152  * \param comp Comparator.
153  * \tparam Stable Stable merging incurs a performance penalty.
154  * \tparam Sentinels The sequences have a sentinel element.
155  * \return End iterator of output sequence.
156  */
157 template <typename ValueType, typename ReaderIterator,
158  typename Comparator = std::less<ValueType> >
160  ReaderIterator seqs_begin, ReaderIterator seqs_end,
161  const Comparator& comp = Comparator()) {
162 
163  assert(seqs_end - seqs_begin >= 1);
165  seqs_begin, seqs_end, comp);
166 }
167 
168 } // namespace core
169 } // namespace thrill
170 
171 #endif // !THRILL_CORE_MULTIWAY_MERGE_HEADER
172 
173 /******************************************************************************/
MultiwayMergeTree(ReaderIterator readers_begin, ReaderIterator readers_end, const Comparator &comp)
auto make_stable_multiway_merge_tree(ReaderIterator seqs_begin, ReaderIterator seqs_end, const Comparator &comp=Comparator())
Sequential multi-way merging switch for a file writer as output.
#define TLX_LIKELY(c)
Definition: likely.hpp:23
std::pair< ValueType, unsigned > NextWithSource()
auto make_multiway_merge_tree(ReaderIterator seqs_begin, ReaderIterator seqs_end, const Comparator &comp=Comparator())
Sequential multi-way merging switch for a file writer as output.
std::vector< std::pair< bool, ValueType > > current_
current values in each input (exist flag, value)
tlx::LoserTree< Stable, ValueType, Comparator > LoserTreeType
typename std::iterator_traits< ReaderIterator >::value_type Reader