Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rebalance.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/api/rebalance.hpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  *
8  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
9  ******************************************************************************/
10 
11 #pragma once
12 #ifndef THRILL_API_REBALANCE_HEADER
13 #define THRILL_API_REBALANCE_HEADER
14 
15 #include <thrill/api/dia.hpp>
16 #include <thrill/api/dop_node.hpp>
17 #include <thrill/common/logger.hpp>
18 #include <thrill/data/file.hpp>
19 
20 #include <algorithm>
21 #include <vector>
22 
23 namespace thrill {
24 namespace api {
25 
26 /*!
27  * \ingroup api_layer
28  */
29 template <typename ValueType>
30 class RebalanceNode final : public DOpNode<ValueType>
31 {
32  static constexpr bool debug = false;
33 
34 public:
36  using Super::context_;
37 
38  template <typename ParentDIA>
39  explicit RebalanceNode(const ParentDIA& parent)
40  : Super(parent.ctx(), "Rebalance", { parent.id() }, { parent.node() }),
41  parent_stack_empty_(ParentDIA::stack_empty) {
42 
43  auto save_fn = [this](const ValueType& input) {
44  writer_.Put(input);
45  };
46  auto lop_chain = parent.stack().push(save_fn).fold();
47  parent.node()->AddChild(this, lop_chain);
48  }
49 
50  bool OnPreOpFile(const data::File& file, size_t /* parent_index */) final {
51  if (!parent_stack_empty_) {
53  << "Rebalance rejected File from parent "
54  << "due to non-empty function stack.";
55  return false;
56  }
57  assert(file_.num_items() == 0);
58  file_ = file.Copy();
59  return true;
60  }
61 
62  void StopPreOp(size_t /* id */) final {
63  // Push local elements to children
64  writer_.Close();
65  }
66 
67  //! Executes the rebalance operation.
68  void Execute() final {
69  LOG << "RebalanceNode::Execute() processing";
70 
71  size_t local_size;
72  local_size = file_.num_items();
73  sLOG << "local_size" << local_size;
74 
75  size_t local_rank = local_size;
76  size_t global_size = context_.net.ExPrefixSumTotal(local_rank);
77  sLOG << "local_rank" << local_rank;
78  sLOG << "global_size" << global_size;
79 
80  const size_t num_workers = context_.num_workers();
81  const double pre_pe =
82  static_cast<double>(global_size) / static_cast<double>(num_workers);
83 
84  // calculate offset vector
85  std::vector<size_t> offsets(num_workers + 1, 0);
86  for (size_t p = 0; p < num_workers; ++p) {
87  size_t limit = static_cast<size_t>(static_cast<double>(p) * pre_pe);
88  if (limit < local_rank) continue;
89 
90  offsets[p] = std::min(limit - local_rank, file_.num_items());
91  }
92  offsets[num_workers] = file_.num_items();
93  LOG << "offsets = " << offsets;
94 
95  stream_->template Scatter<ValueType>(
96  file_, offsets, /* consume */ true);
97  }
98 
99  void PushData(bool consume) final {
100  auto reader = stream_->GetCatReader(consume);
101  while (reader.HasNext()) {
102  this->PushItem(reader.template Next<ValueType>());
103  }
104  }
105 
106  void Dispose() final {
107  file_.Clear();
108  }
109 
110 private:
111  //! Local data file
112  data::File file_ { context_.GetFile(this) };
113  //! Data writer to local file (only active in PreOp).
114  data::File::Writer writer_ { file_.GetWriter() };
115  //! Whether the parent stack is empty
117 
118  //! CatStream for exchange
120 };
121 
122 template <typename ValueType, typename Stack>
124  assert(IsValid());
126  return DIA<ValueType>(tlx::make_counting<RebalanceNode>(*this));
127 }
128 
129 } // namespace api
130 } // namespace thrill
131 
132 #endif // !THRILL_API_REBALANCE_HEADER
133 
134 /******************************************************************************/
T TLX_ATTRIBUTE_WARN_UNUSED_RESULT ExPrefixSumTotal(T &value, const T &initial=T(), const BinarySumOp &sum_op=BinarySumOp())
Calculates the exclusive prefix sum over all workers, and delivers the total sum as well...
net::FlowControlChannel & net
Definition: context.hpp:443
virtual void Dispose()
Virtual clear method. Triggers actual disposing in sub-classes.
Definition: dia_base.hpp:188
DIA is the interface between the user and the Thrill framework.
Definition: dia.hpp:141
static constexpr bool debug
Definition: rebalance.hpp:32
BlockWriter contains a temporary Block object into which a) any serializable item can be stored or b)...
static constexpr bool g_debug_push_file
Definition: config.hpp:44
virtual void PushData(bool consume)=0
Virtual method for pushing data. Triggers actual pushing in sub-classes.
const bool parent_stack_empty_
Whether the parent stack is empty.
Definition: rebalance.hpp:116
data::File::Writer writer_
Data writer to local file (only active in PreOp).
Definition: rebalance.hpp:114
#define sLOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:184
auto Rebalance() const
Rebalance is a DOp, which rebalances a single DIA among all workers; in general, this operation is ne...
Definition: rebalance.hpp:123
data::CatStreamPtr GetNewCatStream(size_t dia_id)
Definition: context.cpp:1144
void PushItem(const ValueType &item) const
Method for derived classes to Push a single item to all children.
Definition: dia_node.hpp:147
RebalanceNode(const ParentDIA &parent)
Definition: rebalance.hpp:39
virtual void StopPreOp(size_t)
Virtual method for preparing end of PushData.
Definition: dia_base.hpp:173
#define LOGC(cond)
Explicitly specify the condition for logging.
Definition: logger.hpp:167
virtual void Execute()=0
Virtual execution method. Triggers actual computation in sub-classes.
data::File GetFile(size_t dia_id)
Returns a new File object containing a sequence of local Blocks.
Definition: context.hpp:280
A DOpNode is a typed node representing and distributed operations in Thrill.
Definition: dop_node.hpp:32
TLX_ATTRIBUTE_ALWAYS_INLINE BlockWriter & Put(const T &x)
Put appends a complete item, or fails with a FullException.
virtual bool OnPreOpFile(const data::File &, size_t)
Definition: dia_base.hpp:168
void Close()
Explicitly close the writer.
size_t num_workers() const
Global number of workers in the system.
Definition: context.hpp:248
static constexpr const T & min(const T &a, const T &b)
template for constexpr min, because std::min is not good enough.
Definition: functional.hpp:59
data::CatStreamPtr stream_
CatStream for exchange.
Definition: rebalance.hpp:119
Context & context_
associated Context
Definition: dia_base.hpp:293
#define LOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:172