Thrill  0.1
multiway_merge.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/core/multiway_merge.hpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
11 #pragma once
12 #ifndef THRILL_CORE_MULTIWAY_MERGE_HEADER
13 #define THRILL_CORE_MULTIWAY_MERGE_HEADER
14 
16 
17 #include <algorithm>
18 #include <functional>
19 #include <utility>
20 #include <vector>
21 
22 namespace thrill {
23 namespace core {
24 
25 template <
26  typename ValueType,
27  typename ReaderIterator,
28  typename Comparator,
29  bool Stable = false>
31 {
32 public:
33  using Reader = typename std::iterator_traits<ReaderIterator>::value_type;
34 
35  using LoserTreeType = tlx::LoserTree<Stable, ValueType, Comparator>;
36 
37  MultiwayMergeTree(ReaderIterator readers_begin, ReaderIterator readers_end,
38  const Comparator& comp)
39  : readers_(readers_begin),
40  num_inputs_(static_cast<unsigned>(readers_end - readers_begin)),
42  lt_(static_cast<unsigned>(num_inputs_), comp),
44 
45  for (unsigned t = 0; t < num_inputs_; ++t)
46  {
47  if (TLX_LIKELY(readers_[t].HasNext())) {
48  current_[t].first = true;
49  current_[t].second = readers_[t].template Next<ValueType>();
50  lt_.insert_start(&current_[t].second, t, false);
51  }
52  else {
53  current_[t].first = false;
54  lt_.insert_start(nullptr, t, true);
55  assert(remaining_inputs_ > 0);
57  }
58  }
59 
60  lt_.init();
61  }
62 
63  bool HasNext() const {
64  return (remaining_inputs_ != 0);
65  }
66 
67  std::pair<ValueType, unsigned> NextWithSource() {
68  // take next smallest element out
69  unsigned top = lt_.min_source();
70  ValueType res = std::move(current_[top].second);
71 
72  if (TLX_LIKELY(readers_[top].HasNext())) {
73  current_[top].first = true;
74  current_[top].second = readers_[top].template Next<ValueType>();
75  lt_.delete_min_insert(&current_[top].second, false);
76  }
77  else {
78  current_[top].first = false;
79  lt_.delete_min_insert(nullptr, true);
80  assert(remaining_inputs_ > 0);
82  }
83 
84  return std::make_pair(res, top);
85  }
86 
87  ValueType Next() {
88 
89  // take next smallest element out
90  unsigned top = lt_.min_source();
91  ValueType res = std::move(current_[top].second);
92 
93  if (TLX_LIKELY(readers_[top].HasNext())) {
94  current_[top].first = true;
95  current_[top].second = readers_[top].template Next<ValueType>();
96  lt_.delete_min_insert(&current_[top].second, false);
97  }
98  else {
99  current_[top].first = false;
100  lt_.delete_min_insert(nullptr, true);
101  assert(remaining_inputs_ > 0);
103  }
104 
105  return res;
106  }
107 
108 private:
109  ReaderIterator readers_;
110  unsigned num_inputs_;
112 
114  //! current values in each input (exist flag, value)
115  std::vector<std::pair<bool, ValueType> > current_;
116 };
117 
118 /*!
119  * Sequential multi-way merging switch for a file writer as output
120  *
121  * The decision if based on the branching factor and runtime settings.
122  *
123  * \param seqs_begin Begin iterator of iterator pair input sequence.
124  * \param seqs_end End iterator of iterator pair input sequence.
125  * \param comp Comparator.
126  * \tparam Stable Stable merging incurs a performance penalty.
127  * \tparam Sentinels The sequences have a sentinel element.
128  * \return End iterator of output sequence.
129  */
130 template <typename ValueType, typename ReaderIterator,
131  typename Comparator = std::less<ValueType> >
133  ReaderIterator seqs_begin, ReaderIterator seqs_end,
134  const Comparator& comp = Comparator()) {
135 
136  assert(seqs_end - seqs_begin >= 1);
138  seqs_begin, seqs_end, comp);
139 }
140 
141 /*!
142  * Sequential multi-way merging switch for a file writer as output
143  *
144  * The decision if based on the branching factor and runtime settings.
145  *
146  * The merging is stable, ie, if the two next elements are equal according to
147  * the comparator, that of the file with the lower index is taken.
148  *
149  * \param seqs_begin Begin iterator of iterator pair input sequence.
150  * \param seqs_end End iterator of iterator pair input sequence.
151  * \param comp Comparator.
152  * \tparam Stable Stable merging incurs a performance penalty.
153  * \tparam Sentinels The sequences have a sentinel element.
154  * \return End iterator of output sequence.
155  */
156 template <typename ValueType, typename ReaderIterator,
157  typename Comparator = std::less<ValueType> >
159  ReaderIterator seqs_begin, ReaderIterator seqs_end,
160  const Comparator& comp = Comparator()) {
161 
162  assert(seqs_end - seqs_begin >= 1);
164  seqs_begin, seqs_end, comp);
165 }
166 
167 } // namespace core
168 } // namespace thrill
169 
170 #endif // !THRILL_CORE_MULTIWAY_MERGE_HEADER
171 
172 /******************************************************************************/
MultiwayMergeTree(ReaderIterator readers_begin, ReaderIterator readers_end, const Comparator &comp)
auto make_stable_multiway_merge_tree(ReaderIterator seqs_begin, ReaderIterator seqs_end, const Comparator &comp=Comparator())
Sequential multi-way merging switch for a file writer as output.
#define TLX_LIKELY(c)
Definition: likely.hpp:23
std::pair< ValueType, unsigned > NextWithSource()
auto make_multiway_merge_tree(ReaderIterator seqs_begin, ReaderIterator seqs_end, const Comparator &comp=Comparator())
Sequential multi-way merging switch for a file writer as output.
tlx::LoserTree< Stable, ValueType, Comparator > LoserTreeType
std::vector< std::pair< bool, ValueType > > current_
current values in each input (exist flag, value)
typename std::iterator_traits< ReaderIterator >::value_type Reader