Thrill  0.1
dia_base.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/api/dia_base.hpp
3  *
4  * Untyped super class of DIANode. Used to build the execution graph.
5  *
6  * Part of Project Thrill - http://project-thrill.org
7  *
8  * Copyright (C) 2015 Alexander Noe <[email protected]>
9  * Copyright (C) 2015 Sebastian Lamm <[email protected]>
10  * Copyright (C) 2015 Timo Bingmann <[email protected]>
11  *
12  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
13  ******************************************************************************/
14 
15 #pragma once
16 #ifndef THRILL_API_DIA_BASE_HEADER
17 #define THRILL_API_DIA_BASE_HEADER
18 
19 #include <thrill/api/context.hpp>
20 
21 #include <string>
22 #include <vector>
23 
24 namespace thrill {
25 namespace api {
26 
27 //! \ingroup api_layer
28 //! \{
29 
30 /*!
31  * Possible states a DIABase can be in.
32  */
33 enum class DIAState {
34  //! The DIABase has not been computed yet.
35  NEW,
36  //! The DIABase has been calculated but not explicitly cached. Data might
37  //! be available or has to be recalculated when needed
38  EXECUTED,
39  //! The DIABase is manually disposed by the user, needs to be recomputed
40  //! when accessed.
41  DISPOSED
42 };
43 
44 /*!
45  * Description of the amount of RAM the internal data structures of a DIANode
46  * require. Each DIANode implementation can specify this for its PreOp, Execute,
47  * and PostOp parts individually. The StageBuilder collects all requests,
48  * notifys the BlockPool to reduce its memory limits, and delivers the available
49  * amount to the DIANode in StartPreOp(), Execute(), and PushData() calls.
50  */
51 class DIAMemUse
52 {
53 public:
54  //! Implicit conversion of a size_t for a constant RAM usage request
55  DIAMemUse(size_t limit = 0) // NOLINT: implicit conversions desired
56  : limit_(limit) { }
57 
58  //! Maximum available RAM requested (limit will be determined in
59  //! StageBuilder by detecting the DIANodes in a Stage)
60  static DIAMemUse Max() { return DIAMemUse(max_limit_); }
61 
62  //! return amount of RAM reserved
63  size_t limit() const { return limit_; }
64 
65  //! test if sentinel for maximum RAM request
66  bool is_max() const { return limit_ == max_limit_; }
67 
68  //! implicit conversion to size_, but only if not is_max()
69  operator size_t () const { assert(!is_max()); return limit_; }
70 
71 private:
72  //! amount of RAM requested or reserved.
73  size_t limit_;
74 
75  //! sentinel for maximum available RAM.
76  static constexpr size_t max_limit_ = static_cast<size_t>(-1);
77 };
78 
79 /*!
80  * The DIABase is the untyped super class of DIANode. DIABases are used to build
81  * the execution graph, which is used to execute the computation.
82  *
83  * Each DIABase knows it's parents. Parents are node which have to computed
84  * previously. Not all DIABases have children (ActionNodes do not), hence,
85  * children are first introduced in DIANode.
86  */
88 {
89 public:
91 
92  /*!
93  * The constructor for a DIABase. Sets the parents for this node, but does
94  * not register it has a child, since this must be done with a callback.
95  */
96  DIABase(Context& ctx, const char* label,
97  const std::initializer_list<size_t>& parent_ids,
98  const std::initializer_list<DIABasePtr>& parents)
99  : context_(ctx), dia_id_(ctx.next_dia_id()),
100  label_(label), parents_(parents) {
101  logger_ << "class" << "DIABase"
102  << "event" << "create"
103  << "type" << "DOp"
104  << "parents" << parent_ids;
105  }
106 
107  /*!
108  * The constructor for a DIABase. Sets the parents for this node, but does
109  * not register it has a child, since this must be done with a callback.
110  */
111  DIABase(Context& ctx, const char* label,
112  std::vector<size_t>&& parent_ids,
113  std::vector<DIABasePtr>&& parents)
114  : context_(ctx), dia_id_(ctx.next_dia_id()),
115  label_(std::move(label)), parents_(std::move(parents)) {
116  logger_ << "class" << "DIABase"
117  << "event" << "create"
118  << "type" << "DOp"
119  << "parents" << parent_ids;
120  }
121 
122  //! non-copyable: delete copy-constructor
123  DIABase(const DIABase&) = delete;
124  //! non-copyable: delete assignment operator
125  DIABase& operator = (const DIABase&) = delete;
126  //! move-constructor: default
127  DIABase(DIABase&&) = default;
128  //! move-assignment operator: default
129  DIABase& operator = (DIABase&&) = default;
130 
131  //! Virtual destructor for a DIABase.
132  virtual ~DIABase() {
133  // Remove child pointer from parent If a parent loses all its childs its
134  // reference count should be zero and he should be removed
135 
136  logger_ << "class" << "DIABase"
137  << "event" << "destroy"
138  << "parents" << parent_ids();
139 
140  // de-register at parents (if still hooked there)
141  for (const DIABasePtr& p : parents_)
142  p->RemoveChild(this);
143  }
144 
145  //! Virtual method to determine whether a node contains data or not, and
146  //! hence if it can be Executed() and PushData() or whether it is only a
147  //! forwarding node. This is currently true only for Collapse() and Union().
148  virtual bool ForwardDataOnly() const { return false; }
149 
150  //! Virtual method used by StageBuilder to request information whether it
151  //! must call PushData on the parent of a CollapseNode or UnionNode to
152  //! correctly deliver data.
153  virtual bool RequireParentPushData(size_t /* parent_index */) const
154  { return false; }
155 
156  //! \name Pure Virtual Methods called by StageBuilder
157  //! \{
158 
159  //! Amount of RAM used by PreOp after StartPreOp()
160  virtual DIAMemUse PreOpMemUse() { return 0; }
161 
162  //! Virtual method for preparing start of PushData.
163  virtual void StartPreOp(size_t /* parent_index */) { }
164 
165  //! Virtual method for receiving a whole data::File of ValueType from
166  //! parent. Returns true if the file was accepted (requires that the child's
167  //! function stack is empty and that it can accept whole data::Files).
168  virtual bool OnPreOpFile(
169  const data::File& /* file */, size_t /* parent_index */)
170  { return false; }
171 
172  //! Virtual method for preparing end of PushData.
173  virtual void StopPreOp(size_t /* parent_index */) { }
174 
175  //! Amount of RAM used by Execute()
176  virtual DIAMemUse ExecuteMemUse() { return 0; }
177 
178  //! Virtual execution method. Triggers actual computation in sub-classes.
179  virtual void Execute() = 0;
180 
181  //! Amount of RAM used by PushData()
182  virtual DIAMemUse PushDataMemUse() { return 0; }
183 
184  //! Virtual method for pushing data. Triggers actual pushing in sub-classes.
185  virtual void PushData(bool consume) = 0;
186 
187  //! Virtual clear method. Triggers actual disposing in sub-classes.
188  virtual void Dispose() { }
189 
190  //! \}
191 
192  //! Performing push operation. Notifies children and calls actual push
193  //! method. Then cleans up the DIA graph by freeing parent references of
194  //! children.
195  virtual void RunPushData() = 0;
196 
197  //! Returns the children of this DIABase.
198  virtual std::vector<DIABase*> children() const = 0;
199 
200  //! Virtual method for removing a child.
201  virtual void RemoveChild(DIABase* node) = 0;
202 
203  //! Virtual method for removing all childs. Triggers actual removing in
204  //! sub-classes.
205  virtual void RemoveAllChildren() = 0;
206 
207  //! Returns the api::Context of this DIABase.
209  return context_;
210  }
211 
212  //! return unique id of DIANode subclass as stored by StatsNode
213  const size_t& dia_id() const {
214  return dia_id_;
215  }
216 
217  //! return label() of DIANode subclass as stored by StatsNode
218  const char * label() const {
219  return label_;
220  }
221 
222  //! make ostream-able.
223  friend std::ostream& operator << (std::ostream& os, const DIABase& d);
224 
225  //! Returns consume_counter_
226  virtual size_t consume_counter() const { return consume_counter_; }
227 
228  //! Virtual SetConsume flag which is called by the user via .Keep() or
229  //! .Consume() to set consumption.
230  virtual void IncConsumeCounter(size_t counter) {
231  if (consume_counter_ == kNeverConsume) return;
232  consume_counter_ += counter;
233  }
234 
235  //! Virtual SetConsume flag which is called by the user via .Keep() or
236  //! .Consume() to set consumption.
237  virtual void DecConsumeCounter(size_t counter) {
238  assert(consume_counter_ > 0);
239  if (consume_counter_ <= counter) {
240  consume_counter_ = 0;
241  return;
242  }
243  consume_counter_ -= counter;
244  }
245 
246  //! Virtual SetConsume flag which is called by the user via .Keep() or
247  //! .Consume() to set consumption.
248  virtual void SetConsumeCounter(size_t counter) {
249  consume_counter_ = counter;
250  }
251 
252  //! Returns the parents of this DIABase.
253  const std::vector<DIABasePtr>& parents() const {
254  return parents_;
255  }
256 
257  //! Returns the parents of this DIABase.
258  std::vector<size_t> parent_ids() const {
259  std::vector<size_t> ids;
260  for (const DIABasePtr& p : parents_) ids.push_back(p->dia_id());
261  return ids;
262  }
263 
264  //! Remove a parent
266  parents_.erase(
267  std::remove_if(
268  parents_.begin(), parents_.end(),
269  [p](const DIABasePtr& parent) { return parent.get() == p; }),
270  parents_.end());
271  }
272 
273  //! Run Scope and parents such that this node (usually an ActionNode) is
274  //! EXECUTED.
275  void RunScope();
276 
277  //! Return the Context's memory manager
279  return context_.mem_manager();
280  }
281 
282  DIAState state() const { return state_; }
283 
284  void set_state(const DIAState& state) { state_ = state; }
285 
286  void set_mem_limit(const DIAMemUse& mem_limit) { mem_limit_ = mem_limit; }
287 
288 protected:
289  //! \name Fixed DIA Information
290  //! \{
291 
292  //! associated Context
294 
295  //! DIA serial id
296  const size_t dia_id_;
297 
298  //! DOp node static label.
299  const char* const label_;
300 
301  //! \}
302 
303  //! \name Runtime Operational Variables
304  //! \{
305 
306  //! State of the DIANode. State is NEW on creation.
308 
309  //! Parents of this DIABase.
310  std::vector<DIABasePtr> parents_;
311 
312  //! Amount of memory the current execution stage of the DIA implementation
313  //! is allowed to use.
314  DIAMemUse mem_limit_ = 0;
315 
316  //! Consumption counter: when it reaches zero, PushData() is called with
317  //! consume = true
318  size_t consume_counter_ = 1;
319 
320  //! \}
321 
322 public:
323  //! Never full consume
324  static constexpr size_t kNeverConsume = static_cast<size_t>(-1);
325 
326  /**************************************************************************/
327  // JsonLogger for this DIANode
328 
330  &context_.logger_, "dia_id", dia_id(), "label", label()
331  };
332 };
333 
335 
336 //! \}
337 
338 } // namespace api
339 } // namespace thrill
340 
341 #endif // !THRILL_API_DIA_BASE_HEADER
342 
343 /******************************************************************************/
The DIABase has not been computed yet.
static DIAMemUse Max()
Definition: dia_base.hpp:60
virtual void Dispose()
Virtual clear method. Triggers actual disposing in sub-classes.
Definition: dia_base.hpp:188
std::vector< size_t > parent_ids() const
Returns the parents of this DIABase.
Definition: dia_base.hpp:258
Description of the amount of RAM the internal data structures of a DIANode require.
Definition: dia_base.hpp:51
Context & context()
Returns the api::Context of this DIABase.
Definition: dia_base.hpp:208
DIAState state() const
Definition: dia_base.hpp:282
const size_t dia_id_
DIA serial id.
Definition: dia_base.hpp:296
virtual DIAMemUse ExecuteMemUse()
Amount of RAM used by Execute()
Definition: dia_base.hpp:176
DIABase(Context &ctx, const char *label, const std::initializer_list< size_t > &parent_ids, const std::initializer_list< DIABasePtr > &parents)
The constructor for a DIABase.
Definition: dia_base.hpp:96
virtual void DecConsumeCounter(size_t counter)
Definition: dia_base.hpp:237
DIABase(Context &ctx, const char *label, std::vector< size_t > &&parent_ids, std::vector< DIABasePtr > &&parents)
The constructor for a DIABase.
Definition: dia_base.hpp:111
A File is an ordered sequence of Block objects for storing items.
Definition: file.hpp:56
void set_mem_limit(const DIAMemUse &mem_limit)
Definition: dia_base.hpp:286
const std::vector< DIABasePtr > & parents() const
Returns the parents of this DIABase.
Definition: dia_base.hpp:253
STL namespace.
size_t limit() const
return amount of RAM reserved
Definition: dia_base.hpp:63
const char * label() const
return label() of DIANode subclass as stored by StatsNode
Definition: dia_base.hpp:218
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
Definition: context.hpp:221
virtual bool ForwardDataOnly() const
Definition: dia_base.hpp:148
The DIABase is the untyped super class of DIANode.
Definition: dia_base.hpp:87
virtual DIAMemUse PreOpMemUse()
Amount of RAM used by PreOp after StartPreOp()
Definition: dia_base.hpp:160
const char *const label_
DOp node static label.
Definition: dia_base.hpp:299
virtual ~DIABase()
Virtual destructor for a DIABase.
Definition: dia_base.hpp:132
virtual size_t consume_counter() const
Returns consume_counter_.
Definition: dia_base.hpp:226
bool is_max() const
test if sentinel for maximum RAM request
Definition: dia_base.hpp:66
mem::Manager & mem_manager()
Return the Context&#39;s memory manager.
Definition: dia_base.hpp:278
std::vector< DIABasePtr > parents_
Parents of this DIABase.
Definition: dia_base.hpp:310
virtual void StopPreOp(size_t)
Virtual method for preparing end of PushData.
Definition: dia_base.hpp:173
common::JsonLogger logger_
Definition: context.hpp:462
virtual void StartPreOp(size_t)
Virtual method for preparing start of PushData.
Definition: dia_base.hpp:163
virtual void SetConsumeCounter(size_t counter)
Definition: dia_base.hpp:248
const size_t & dia_id() const
return unique id of DIANode subclass as stored by StatsNode
Definition: dia_base.hpp:213
void set_state(const DIAState &state)
Definition: dia_base.hpp:284
DIAState
Possible states a DIABase can be in.
Definition: dia_base.hpp:33
Object shared by allocators and other classes to track memory allocations.
Definition: manager.hpp:28
void RemoveParent(DIABase *p)
Remove a parent.
Definition: dia_base.hpp:265
virtual bool OnPreOpFile(const data::File &, size_t)
Definition: dia_base.hpp:168
size_t limit_
amount of RAM requested or reserved.
Definition: dia_base.hpp:73
JsonLogger is a receiver of JSON output objects for logging.
Definition: json_logger.hpp:69
virtual void IncConsumeCounter(size_t counter)
Definition: dia_base.hpp:230
DIAMemUse(size_t limit=0)
Implicit conversion of a size_t for a constant RAM usage request.
Definition: dia_base.hpp:55
virtual bool RequireParentPushData(size_t) const
Definition: dia_base.hpp:153
Context & context_
associated Context
Definition: dia_base.hpp:293
Provides reference counting abilities for use with CountingPtr.
virtual DIAMemUse PushDataMemUse()
Amount of RAM used by PushData()
Definition: dia_base.hpp:182
std::ostream & operator<<(std::ostream &os, const DIABase &d)
make ostream-able.
Definition: dia_base.cpp:449