Thrill  0.1
dia.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/api/dia.hpp
3  *
4  * Interface for Operations, holds pointer to node and lambda from node to state
5  *
6  * Part of Project Thrill - http://project-thrill.org
7  *
8  * Copyright (C) 2015 Alexander Noe <[email protected]>
9  * Copyright (C) 2015 Sebastian Lamm <[email protected]>
10  * Copyright (C) 2015 Timo Bingmann <[email protected]>
11  * Copyright (C) 2015 Huyen Chau Nguyen <[email protected]>
12  *
13  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
14  ******************************************************************************/
15 
16 #pragma once
17 #ifndef THRILL_API_DIA_HEADER
18 #define THRILL_API_DIA_HEADER
19 
21 #include <thrill/api/context.hpp>
22 #include <thrill/api/dia_node.hpp>
26 
27 #include <cassert>
28 #include <functional>
29 #include <ostream>
30 #include <string>
31 #include <utility>
32 #include <vector>
33 
34 namespace thrill {
35 namespace api {
36 
37 //! \ingroup api_layer
38 //! \{
39 
40 //! tag structure for ReduceByKey(), and ReduceToIndex()
41 template <bool Value>
44  static const bool value = Value;
45 };
46 
47 //! global const VolatileKeyFlag instance
48 const struct VolatileKeyFlag<true> VolatileKeyTag;
49 
50 //! global const VolatileKeyFlag instance
51 const struct VolatileKeyFlag<false> NoVolatileKeyTag;
52 
53 //! tag structure for ReduceToIndex()
56 };
57 
58 //! global const SkipPreReducePhaseTag instance
60 
61 //! tag structure for Window() and FlatWindow()
62 struct DisjointTag {
64 };
65 
66 //! global const DisjointTag instance
67 const struct DisjointTag DisjointTag;
68 
69 //! tag structure for Zip()
70 struct CutTag {
71  CutTag() { }
72 };
73 
74 //! global const CutTag instance
75 const struct CutTag CutTag;
76 
77 //! tag structure for Zip()
78 struct PadTag {
79  PadTag() { }
80 };
81 
82 //! global const PadTag instance
83 const struct PadTag PadTag;
84 
85 //! tag structure for Zip()
88 };
89 
90 //! global const NoRebalanceTag instance
92 
93 //! tag structure for Read()
96 };
97 
98 //! global const LocalStorageTag instance
100 
101 //! tag structure for ReduceByKey()
102 template <bool Value>
105  static const bool value = Value;
106 };
107 
108 //! global const DuplicateDetectionFlag instance
110 
111 //! global const DuplicateDetectionFlag instance
113 
114 //! tag structure for GroupByKey(), and InnerJoin()
115 template <bool Value>
118  static const bool value = Value;
119 };
120 
121 //! global const LocationDetectionFlag instance
123 
124 //! global const LocationDetectionFlag instance
126 
127 /*!
128  * DIA is the interface between the user and the Thrill framework. A DIA can be
129  * imagined as an immutable array, even though the data does not need to be
130  * which represents the state after the previous DOp or Action. Additionally, a
131  * DIA stores the local lambda function chain of type Stack, which can transform
132  * elements of the DIANode to elements of this DIA. DOps/Actions create a DIA
133  * and a new DIANode, to which the DIA links to. LOps only create a new DIA,
134  * which link to the previous DIANode.
135  *
136  * \tparam ValueType Type of elements currently in this DIA.
137  * \tparam Stack Type of the function chain.
138  */
139 template <typename ValueType_,
140  typename Stack_ = tlx::FunctionStack<ValueType_> >
141 class DIA
142 {
143  friend class Context;
144 
145  //! alias for convenience.
146  template <typename Function>
147  using FunctionTraits = common::FunctionTraits<Function>;
148 
149 public:
150  //! type of the items virtually in the DIA, which is the type emitted by the
151  //! current LOp stack.
152  using ValueType = ValueType_;
153 
154  //! Type of this function stack
155  using Stack = Stack_;
156 
157  //! type of the items delivered by the DOp, and pushed down the function
158  //! stack towards the next nodes. If the function stack contains LOps nodes,
159  //! these may transform the type.
160  using StackInput = typename Stack::Input;
161 
162  //! boolean indication whether this FunctionStack is empty
163  static constexpr bool stack_empty = Stack::empty;
164 
165  //! type of pointer to the real node object implementation. This object has
166  //! base item type StackInput which is transformed by the function stack
167  //! lambdas further. But even pushing more lambdas does not change the stack
168  //! input type.
170 
171  //! default-constructor: invalid DIA
172  DIA() = default;
173 
174  //! Return whether the DIA is valid.
175  bool IsValid() const { return node_.get() != nullptr; }
176 
177  //! Assert that the DIA is valid.
178  void AssertValid() const { assert(IsValid()); }
179 
180  /*!
181  * Constructor of a new DIA with a pointer to a DIANode and a
182  * function chain from the DIANode to this DIA.
183  *
184  * \param node Pointer to the last DIANode, DOps and Actions create a new
185  * DIANode, LOps link to the DIANode of the previous DIA.
186  *
187  * \param stack Function stack consisting of functions between last DIANode
188  * and this DIA.
189  *
190  * \param dia_id Serial id of DIA, which includes LOps
191  *
192  * \param label static string label of DIA.
193  */
194  DIA(const DIANodePtr& node, const Stack& stack,
195  size_t dia_id, const char* label)
196  : node_(node), stack_(stack),
197  dia_id_(dia_id), label_(label) { }
198 
199  /*!
200  * Constructor of a new DIA supporting move semantics of nodes.
201  *
202  * \param node Pointer to the last DIANode, DOps and Actions create a new
203  * DIANode, LOps link to the DIANode of the previous DIA.
204  *
205  * \param stack Function stack consisting of functions between last DIANode
206  * and this DIA.
207  *
208  * \param dia_id Serial id of DIA, which includes LOps
209  *
210  * \param label static string label of DIA.
211  */
212  DIA(DIANodePtr&& node, const Stack& stack,
213  size_t dia_id, const char* label)
214  : node_(std::move(node)), stack_(stack),
215  dia_id_(dia_id), label_(label) { }
216 
217  /*!
218  * Constructor of a new DIA with a real backing DIABase.
219  *
220  * \param node Pointer to the last DIANode, DOps and Actions create a new
221  * DIANode, LOps link to the DIANode of the previous DIA.
222  */
223  explicit DIA(DIANodePtr&& node)
224  : DIA(std::move(node), tlx::FunctionStack<ValueType>(),
225  node->dia_id(), node->label()) { }
226 
227  /*!
228  * Copy-Constructor of a DIA with empty function chain from a DIA with
229  * a non-empty chain. The functionality of the chain is stored in a newly
230  * created LOpNode. The current DIA than points to this LOpNode. This
231  * is needed to support assignment operations between DIA's.
232  */
233 #ifdef THRILL_DOXYGEN_IGNORE
234  template <typename AnyStack>
235  DIA(const DIA<ValueType, AnyStack>& rhs);
236 #else
237  template <typename AnyStack>
238  DIA(const DIA<ValueType, AnyStack>& rhs)
239 #if __GNUC__ && !__clang__
240  // the attribute warning does not work with gcc?
241  __attribute__ ((warning( // NOLINT
242  "Casting to DIA creates LOpNode instead of inline chaining.\n"
243  "Consider whether you can use auto instead of DIA.")));
244 #elif __GNUC__ && __clang__
245  __attribute__ ((deprecated)); // NOLINT
246 #else
247  ; // NOLINT
248 #endif
249 #endif // THRILL_DOXYGEN_IGNORE
250 
251  //! \name Const Accessors
252  //! \{
253 
254  //! Returns a pointer to the according DIANode.
255  const DIANodePtr& node() const {
256  assert(IsValid());
257  return node_;
258  }
259 
260  //! Returns the number of references to the according DIANode.
261  size_t node_refcount() const {
262  assert(IsValid());
263  return node_->reference_count();
264  }
265 
266  //! Returns the stored function chain.
267  const Stack& stack() const {
268  assert(IsValid());
269  return stack_;
270  }
271 
272  //! Return context_ of DIANode, e.g. for creating new LOps and DOps
273  Context& context() const {
274  assert(IsValid());
275  return node_->context();
276  }
277 
278  //! Return context_ of DIANode, e.g. for creating new LOps and DOps
279  Context& ctx() const {
280  assert(IsValid());
281  return node_->context();
282  }
283 
284  //! Returns id_
285  size_t id() const { return dia_id_; }
286 
287  //! Returns label_
288  const char * label() const { return label_; }
289 
290  //! \}
291 
292  /*!
293  * Dispose of the DIANode's data.
294  */
295  const DIA& Dispose() const {
296  assert(IsValid());
297  if (node_->context().consume() && node_->consume_counter() == 0) {
298  die("Dispose() called on "
299  << *node_ << " which was already consumed.");
300  }
301  node_->Dispose();
302  return *this;
303  }
304 
305  /*!
306  * Mark the referenced DIANode for keeping, which makes children not consume
307  * the data when executing. This does not create a new DIA, but returns the
308  * existing one.
309  */
310  const DIA& Keep(size_t increase = 1) const {
311  assert(IsValid());
312  if (node_->context().consume() && node_->consume_counter() == 0) {
313  die("Keep() called on "
314  << *node_ << " which was already consumed.");
315  }
316  node_->IncConsumeCounter(increase);
317  return *this;
318  }
319 
320  /*!
321  * Mark the referenced DIANode for keeping forever, which makes children not
322  * consume the data when executing. This does not create a new DIA, but
323  * returns the existing one.
324  */
325  const DIA& KeepForever() const {
326  assert(IsValid());
327  node_->SetConsumeCounter(DIABase::kNeverConsume);
328  return *this;
329  }
330 
331  /*!
332  * Execute DIA's scope and parents such that this (Action)Node is
333  * Executed. This does not create a new DIA, but returns the existing one.
334  */
335  const DIA& Execute() const {
336  assert(IsValid());
337  node_->RunScope();
338  return *this;
339  }
340 
341  //! \name Local Operations (LOps)
342  //! \{
343 
344  /*!
345  * Map applies `map_function` : \f$ A \to B \f$ to each item of a DIA and
346  * delivers a new DIA contains the returned values, which may be of a
347  * different type.
348  *
349  * The function chain of the returned DIA is this DIA's stack_ chained with
350  * map_fn.
351  *
352  * \param map_function Map function of type MapFunction, which maps each
353  * element to an element of a possibly different type.
354  *
355  * \ingroup dia_lops
356  */
357  template <typename MapFunction>
358  auto Map(const MapFunction& map_function) const {
359  assert(IsValid());
360 
361  using MapArgument
362  = typename FunctionTraits<MapFunction>::template arg_plain<0>;
363  using MapResult
365  auto conv_map_function =
366  [map_function](const MapArgument& input, auto emit_func) {
367  emit_func(map_function(input));
368  };
369 
370  static_assert(
372  "MapFunction has the wrong input type");
373 
374  size_t new_id = context().next_dia_id();
375 
376  node_->context().logger_
377  << "dia_id" << new_id
378  << "label" << "Map"
379  << "class" << "DIA"
380  << "event" << "create"
381  << "type" << "LOp"
382  << "parents" << (common::Array<size_t>{ dia_id_ });
383 
384  auto new_stack = stack_.push(conv_map_function);
386  node_, new_stack, new_id, "Map");
387  }
388 
389  /*!
390  * Each item of a DIA is tested using `filter_function` : \f$ A \to
391  * \textrm{bool} \f$ to determine whether it is copied into the output DIA
392  * or excluded.
393  *
394  * \image html dia_ops/Filter.svg
395  *
396  * The function chain of the returned DIA is this DIA's stack_ chained with
397  * filter_function.
398  *
399  * \param filter_function Filter function of type FilterFunction, which maps
400  * each element to a boolean.
401  *
402  * \ingroup dia_lops
403  */
404  template <typename FilterFunction>
405  auto Filter(const FilterFunction& filter_function) const {
406  assert(IsValid());
407 
408  using FilterArgument
409  = typename FunctionTraits<FilterFunction>::template arg_plain<0>;
410  auto conv_filter_function =
411  [filter_function](const FilterArgument& input, auto emit_func) {
412  if (filter_function(input)) emit_func(input);
413  };
414 
415  static_assert(
417  "FilterFunction has the wrong input type");
418 
419  size_t new_id = context().next_dia_id();
420 
421  node_->context().logger_
422  << "dia_id" << new_id
423  << "label" << "Filter"
424  << "class" << "DIA"
425  << "event" << "create"
426  << "type" << "LOp"
427  << "parents" << (common::Array<size_t>{ dia_id_ });
428 
429  auto new_stack = stack_.push(conv_filter_function);
431  node_, new_stack, new_id, "Filter");
432  }
433 
434  /*!
435  * \brief Each item of a DIA is expanded by the `flatmap_function` : \f$ A
436  * \to \textrm{array}(B) \f$ to zero or more items of different type, which
437  * are concatenated in the resulting DIA. The return type of
438  * `flatmap_function` must be specified as template parameter.
439  *
440  * \image html dia_ops/FlatMap.svg
441  *
442  * FlatMap is a LOp, which maps this DIA according to the flatmap_function
443  * given by the user. The flatmap_function maps each element to elements of
444  * a possibly different type. The flatmap_function has an emitter function
445  * as it's second parameter. This emitter is called once for each element to
446  * be emitted. The function chain of the returned DIA is this DIA's stack_
447  * chained with flatmap_function.
448 
449  * \tparam ResultType ResultType of the FlatmapFunction, if different from
450  * item type of DIA.
451  *
452  * \param flatmap_function Map function of type FlatmapFunction, which maps
453  * each element to elements of a possibly different type.
454  *
455  * \ingroup dia_lops
456  */
457  template <typename ResultType = ValueType, typename FlatmapFunction>
458  auto FlatMap(const FlatmapFunction& flatmap_function) const {
459  assert(IsValid());
460 
461  size_t new_id = context().next_dia_id();
462 
463  node_->context().logger_
464  << "dia_id" << new_id
465  << "label" << "FlatMap"
466  << "class" << "DIA"
467  << "event" << "create"
468  << "type" << "LOp"
469  << "parents" << (common::Array<size_t>{ dia_id_ });
470 
471  auto new_stack = stack_.push(flatmap_function);
473  node_, new_stack, new_id, "FlatMap");
474  }
475 
476  /*!
477  * Each item of a DIA is copied into the output DIA with success probability
478  * p (an independent Bernoulli trial).
479  *
480  * \ingroup dia_lops
481  */
482  auto BernoulliSample(double p) const;
483 
484  /*!
485  * Union is a LOp, which creates the union of all items from any number of
486  * DIAs as a single DIA, where the items are in an arbitrary order. All
487  * input DIAs must contain the same type, which is also the output DIA's
488  * type.
489  *
490  * The Union operation concatenates all _local_ pieces of a DIA, no
491  * rebalancing is performed, and no communication is needed.
492  *
493  * \ingroup dia_lops
494  */
495  template <typename SecondDIA>
496  auto Union(const SecondDIA& second_dia) const;
497 
498  //! \}
499 
500  //! \name Actions
501  //! \{
502 
503  /*!
504  * Computes the total size of all elements across all workers.
505  *
506  * \ingroup dia_actions
507  */
508  size_t Size() const;
509 
510  /*!
511  * Lazily computes the total size of all elements across all workers.
512  *
513  * \ingroup dia_actions
514  */
515  Future<size_t> SizeFuture() const;
516 
517  /*!
518  * Returns the whole DIA in an std::vector on each worker. This is only for
519  * testing purposes and should not be used on large datasets.
520  *
521  * \image html dia_ops/AllGather.svg
522  *
523  * \ingroup dia_actions
524  */
525  std::vector<ValueType> AllGather() const;
526 
527  /**
528  * \brief AllGather is an Action, which returns the whole DIA in an
529  * std::vector on each worker. This is only for testing purposes and should
530  * not be used on large datasets.
531  *
532  * \image html dia_ops/AllGather.svg
533  *
534  * \ingroup dia_actions
535  */
536  void AllGather(std::vector<ValueType>* out_vector) const;
537 
538  /*!
539  * Returns the whole DIA in an std::vector on each worker. This is only for
540  * testing purposes and should not be used on large datasets.
541  *
542  * \image html dia_ops/AllGather.svg
543  *
544  * \ingroup dia_actions
545  */
546  Future<std::vector<ValueType> > AllGatherFuture() const;
547 
548  /*!
549  * Print is an Action, which collects all data of the DIA at the worker 0
550  * and prints using ostream serialization. It is implemented using Gather().
551  *
552  * \ingroup dia_actions
553  */
554  void Print(const std::string& name = std::string()) const;
555 
556  /*!
557  * Print is an Action, which collects all data of the DIA at the worker 0
558  * and prints using ostream serialization. It is implemented using Gather().
559  *
560  * \ingroup dia_actions
561  */
562  void Print(const std::string& name, std::ostream& out) const;
563 
564  /*!
565  * Gather is an Action, which collects all data of the DIA into a vector at
566  * the given worker. This should only be done if the received data can fit
567  * into RAM of the one worker.
568  *
569  * \ingroup dia_actions
570  */
571  std::vector<ValueType> Gather(size_t target_id = 0) const;
572 
573  /*!
574  * Gather is an Action, which collects all data of the DIA into a vector at
575  * the given worker. This should only be done if the received data can fit
576  * into RAM of the one worker.
577  *
578  * \ingroup dia_actions
579  */
580  void Gather(size_t target_id, std::vector<ValueType>* out_vector) const;
581 
582  /*!
583  * Select up to sample_size items uniformly at random and return a new
584  * DIA<T>.
585  */
586  auto Sample(size_t sample_size) const;
587 
588  /*!
589  * AllReduce is an Action, which computes the reduction sum of all elements
590  * globally and delivers the same value on all workers.
591  *
592  * \image html dia_ops/AllReduce.svg
593  *
594  * \param reduce_function Reduce function.
595  *
596  * \ingroup dia_actions
597  */
598  template <typename ReduceFunction>
599  ValueType AllReduce(const ReduceFunction& reduce_function) const;
600 
601  /*!
602  * AllReduce is an Action, which computes the reduction sum of all elements
603  * globally and delivers the same value on all workers.
604  *
605  * \image html dia_ops/AllReduce.svg
606  *
607  * \param reduce_function Reduce function.
608  *
609  * \param initial_value Initial value of the reduction.
610  *
611  * \ingroup dia_actions
612  */
613  template <typename ReduceFunction>
614  ValueType AllReduce(const ReduceFunction& reduce_function,
615  const ValueType& initial_value) const;
616 
617  /*!
618  * AllReduce is an ActionFuture, which computes the reduction sum of
619  * all elements globally and delivers the same value on all workers.
620  *
621  * \image html dia_ops/AllReduce.svg
622  *
623  * \param reduce_function Reduce function.
624  *
625  * \ingroup dia_actions
626  */
627  template <typename ReduceFunction>
628  Future<ValueType> AllReduceFuture(
629  const ReduceFunction& reduce_function) const;
630 
631  /*!
632  * AllReduce is an ActionFuture, which computes the reduction sum of
633  * all elements globally and delivers the same value on all workers.
634  *
635  * \image html dia_ops/AllReduce.svg
636  *
637  * \param reduce_function Reduce function.
638  *
639  * \param initial_value Initial value of the reduction.
640  *
641  * \ingroup dia_actions
642  */
643  template <typename ReduceFunction>
644  Future<ValueType> AllReduceFuture(
645  const ReduceFunction& reduce_function,
646  const ValueType& initial_value) const;
647 
648  /*!
649  * Sum is an Action, which computes the sum of all elements globally.
650  *
651  * \image html dia_ops/Sum.svg
652  *
653  * \param sum_function Sum function.
654  *
655  * \ingroup dia_actions
656  */
657  template <typename SumFunction = std::plus<ValueType> >
658  ValueType Sum(const SumFunction& sum_function = SumFunction()) const;
659 
660  /*!
661  * Sum is an Action, which computes the sum of all elements globally.
662  *
663  * \image html dia_ops/Sum.svg
664  *
665  * \param sum_function Sum function.
666  *
667  * \param initial_value Initial value of the sum.
668  *
669  * \ingroup dia_actions
670  */
671  template <typename SumFunction = std::plus<ValueType> >
672  ValueType Sum(const SumFunction& sum_function,
673  const ValueType& initial_value) const;
674 
675  /*!
676  * Sum is an ActionFuture, which computes the sum of all elements
677  * globally.
678  *
679  * \image html dia_ops/Sum.svg
680  *
681  * \param sum_function Sum function.
682  *
683  * \param initial_value Initial value of the sum.
684  *
685  * \ingroup dia_actions
686  */
687  template <typename SumFunction = std::plus<ValueType> >
688  Future<ValueType> SumFuture(
689  const SumFunction& sum_function = SumFunction(),
690  const ValueType& initial_value = ValueType()) const;
691 
692  /*!
693  * Min is an Action, which computes the minimum of all elements globally.
694  *
695  * \image html dia_ops/Sum.svg
696  *
697  * \ingroup dia_actions
698  */
699  ValueType Min() const;
700 
701  /*!
702  * Min is an Action, which computes the minimum of all elements globally.
703  *
704  * \image html dia_ops/Sum.svg
705  *
706  * \param initial_value Initial value of the min.
707  *
708  * \ingroup dia_actions
709  */
710  ValueType Min(const ValueType& initial_value) const;
711 
712  /*!
713  * Min is an ActionFuture, which computes the minimum of all elements
714  * globally.
715  *
716  * \image html dia_ops/Sum.svg
717  *
718  * \ingroup dia_actions
719  */
720  Future<ValueType> MinFuture() const;
721 
722  /*!
723  * Min is an ActionFuture, which computes the minimum of all elements
724  * globally.
725  *
726  * \image html dia_ops/Sum.svg
727  *
728  * \param initial_value Initial value of the min.
729  *
730  * \ingroup dia_actions
731  */
732  Future<ValueType> MinFuture(const ValueType& initial_value) const;
733 
734  /*!
735  * Max is an Action, which computes the maximum of all elements globally.
736  *
737  * \image html dia_ops/Sum.svg
738  *
739  * \ingroup dia_actions
740  */
741  ValueType Max() const;
742 
743  /*!
744  * Max is an Action, which computes the maximum of all elements globally.
745  *
746  * \image html dia_ops/Sum.svg
747  *
748  * \param initial_value Initial value of the max.
749  *
750  * \ingroup dia_actions
751  */
752  ValueType Max(const ValueType& initial_value) const;
753 
754  /*!
755  * Max is an ActionFuture, which computes the maximum of all elements
756  * globally.
757  *
758  * \image html dia_ops/Sum.svg
759  *
760  * \ingroup dia_actions
761  */
762  Future<ValueType> MaxFuture() const;
763 
764  /*!
765  * Max is an ActionFuture, which computes the maximum of all elements
766  * globally.
767  *
768  * \image html dia_ops/Sum.svg
769  *
770  * \param initial_value Initial value of the max.
771  *
772  * \ingroup dia_actions
773  */
774  Future<ValueType> MaxFuture(const ValueType& initial_value) const;
775 
776  /*!
777  * Compute the approximate number of distinct elements in the DIA.
778  *
779  * \tparam p Number of bits to use for index. Should be between 4 and 16.
780  * \ingroup dia_actions
781  */
782  template <size_t p>
783  double HyperLogLog() const;
784 
785  /*!
786  * WriteLinesOne is an Action, which writes std::strings to a single output
787  * file.
788  *
789  * \param filepath Destination of the output file.
790  *
791  * \ingroup dia_actions
792  */
793  void WriteLinesOne(const std::string& filepath) const;
794 
795  /*!
796  * WriteLinesOne is an ActionFuture, which writes std::strings to a single
797  * output file.
798  *
799  * \param filepath Destination of the output file.
800  *
801  * \ingroup dia_actions
802  */
803  Future<void> WriteLinesOneFuture(
804  const std::string& filepath) const;
805 
806  /*!
807  * WriteLines is an Action, which writes std::strings to multiple output
808  * files. Strings are written using fstream with a newline after each
809  * entry. Each worker creates its individual file.
810  *
811  * \image html dia_ops/WriteLines.svg
812  *
813  * \param filepath Destination of the output file. This filepath must
814  * contain two special substrings: `"$$$$$"` is replaced by the worker id
815  * and `"#####"` will be replaced by the file chunk id. The last occurrences
816  * of `"$"` and `"#"` are replaced, otherwise `"$$$$"` and/or `"##########"`
817  * are automatically appended.
818  *
819  * \param target_file_size target size of each individual file.
820  *
821  * \ingroup dia_actions
822  */
823  void WriteLines(const std::string& filepath,
824  size_t target_file_size = 128* 1024* 1024) const;
825 
826  /*!
827  * WriteLines is an ActionFuture, which writes std::strings to multiple
828  * output files. Strings are written using fstream with a newline after each
829  * entry. Each worker creates its individual file.
830  *
831  * \image html dia_ops/WriteLines.svg
832  *
833  * \param filepath Destination of the output file. This filepath must
834  * contain two special substrings: `"$$$$$"` is replaced by the worker id
835  * and `"#####"` will be replaced by the file chunk id. The last occurrences
836  * of `"$"` and `"#"` are replaced, otherwise `"$$$$"` and/or `"##########"`
837  * are automatically appended.
838  *
839  * \param target_file_size target size of each individual file.
840  *
841  * \ingroup dia_actions
842  */
843  Future<void> WriteLinesFuture(
844  const std::string& filepath,
845  size_t target_file_size = 128* 1024* 1024) const;
846 
847  /*!
848  * WriteBinary is a function, which writes a DIA to many files per
849  * worker. The input DIA can be recreated with ReadBinary and equal
850  * filepath.
851  *
852  * \image html dia_ops/WriteBinary.svg
853  *
854  * \param filepath Destination of the output file. This filepath must
855  * contain two special substrings: `"$$$$$"` is replaced by the worker id
856  * and `"#####"` will be replaced by the file chunk id. The last occurrences
857  * of `"$"` and `"#"` are replaced, otherwise `"$$$$"` and/or `"##########"`
858  * are automatically appended.
859  *
860  * \param max_file_size size limit of individual file.
861  *
862  * \ingroup dia_actions
863  */
864  void WriteBinary(const std::string& filepath,
865  size_t max_file_size = 128* 1024* 1024) const;
866 
867  /*!
868  * WriteBinary is a function, which writes a DIA to many files per
869  * worker. The input DIA can be recreated with ReadBinary and equal
870  * filepath.
871  *
872  * \image html dia_ops/WriteBinary.svg
873  *
874  * \param filepath Destination of the output file. This filepath must
875  * contain two special substrings: `"$$$$$"` is replaced by the worker id
876  * and `"#####"` will be replaced by the file chunk id. The last occurrences
877  * of `"$"` and `"#"` are replaced, otherwise `"$$$$"` and/or `"##########"`
878  * are automatically appended.
879  *
880  * \param max_file_size size limit of individual file.
881  *
882  * \ingroup dia_actions
883  */
884  Future<void> WriteBinaryFuture(
885  const std::string& filepath,
886  size_t max_file_size = 128* 1024* 1024) const;
887 
888  //! \}
889 
890  /*!
891  * \name Distributed Operations (DOps)
892  *
893  * \details This list of DOps are methods of the <b>main DIA class</b> and
894  * called as <tt>A.Method(params)</tt>. Methods combining two or more DIAs
895  * are available as \ref dia_dops_free "free functions".
896  *
897  * \{
898  */
899 
900  /*!
901  * ReduceByKey is a DOp, which groups elements of the DIA with the
902  * key_extractor and reduces each key-bucket to a single element using the
903  * associative reduce_function. The reduce_function defines how two elements
904  * can be reduced to a single element of equal type.
905  *
906  * \image html dia_ops/ReduceByKey.svg
907  *
908  * The key of the reduced element has to be equal to the keys of the input
909  * elements. Since ReduceBy is a DOp, it creates a new DIANode. The DIA
910  * returned by Reduce links to this newly created DIANode. The stack_ of the
911  * returned DIA consists of the PostOp of Reduce, as a reduced element can
912  * directly be chained to the following LOps.
913  *
914  * \param key_extractor Key extractor function, which maps each element to a
915  * key of possibly different type.
916  *
917  * \tparam ReduceFunction Type of the reduce_function. This is a function
918  * reducing two elements of L's result type to a single element of equal
919  * type.
920  *
921  * \param reduce_function Reduce function, which defines how the key buckets
922  * are reduced to a single element. This function is applied associative but
923  * not necessarily commutative.
924  *
925  * \param reduce_config Reduce configuration.
926  *
927  * \ingroup dia_dops
928  */
929  template <typename KeyExtractor, typename ReduceFunction,
930  typename ReduceConfig = class DefaultReduceConfig>
931  auto ReduceByKey(
932  const KeyExtractor& key_extractor,
933  const ReduceFunction& reduce_function,
934  const ReduceConfig& reduce_config = ReduceConfig()) const;
935 
936  /*!
937  * ReduceByKey is a DOp, which groups elements of the DIA with the
938  * key_extractor and reduces each key-bucket to a single element using the
939  * associative reduce_function. The reduce_function defines how two elements
940  * can be reduced to a single element of equal type.
941  *
942  * \image html dia_ops/ReduceByKey.svg
943  *
944  * The key of the reduced element has to be equal to the keys of the input
945  * elements. Since ReduceBy is a DOp, it creates a new DIANode. The DIA
946  * returned by Reduce links to this newly created DIANode. The stack_ of the
947  * returned DIA consists of the PostOp of Reduce, as a reduced element can
948  * directly be chained to the following LOps.
949  *
950  * \param key_extractor Key extractor function, which maps each element to a
951  * key of possibly different type.
952  *
953  * \tparam ReduceFunction Type of the reduce_function. This is a function
954  * reducing two elements of L's result type to a single element of equal
955  * type.
956  *
957  * \param reduce_function Reduce function, which defines how the key buckets
958  * are reduced to a single element. This function is applied associative but
959  * not necessarily commutative.
960  *
961  * \param reduce_config Reduce configuration.
962  *
963  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
964  *
965  * \ingroup dia_dops
966  */
967  template <typename KeyExtractor, typename ReduceFunction,
968  typename ReduceConfig, typename KeyHashFunction>
969  auto ReduceByKey(
970  const KeyExtractor& key_extractor,
971  const ReduceFunction& reduce_function,
972  const ReduceConfig& reduce_config,
973  const KeyHashFunction& key_hash_function) const;
974 
975  /*!
976  * ReduceByKey is a DOp, which groups elements of the DIA with the
977  * key_extractor and reduces each key-bucket to a single element using the
978  * associative reduce_function. The reduce_function defines how two elements
979  * can be reduced to a single element of equal type.
980  *
981  * \image html dia_ops/ReduceByKey.svg
982  *
983  * The key of the reduced element has to be equal to the keys of the input
984  * elements. Since ReduceBy is a DOp, it creates a new DIANode. The DIA
985  * returned by Reduce links to this newly created DIANode. The stack_ of the
986  * returned DIA consists of the PostOp of Reduce, as a reduced element can
987  * directly be chained to the following LOps.
988  *
989  * \param key_extractor Key extractor function, which maps each element to a
990  * key of possibly different type.
991  *
992  * \tparam ReduceFunction Type of the reduce_function. This is a function
993  * reducing two elements of L's result type to a single element of equal
994  * type.
995  *
996  * \param reduce_function Reduce function, which defines how the key buckets
997  * are reduced to a single element. This function is applied associative but
998  * not necessarily commutative.
999  *
1000  * \param reduce_config Reduce configuration.
1001  *
1002  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1003  *
1004  * \param key_equal_function Function to compare keys in reduce hash tables.
1005  *
1006  * \ingroup dia_dops
1007  */
1008  template <typename KeyExtractor, typename ReduceFunction,
1009  typename ReduceConfig,
1010  typename KeyHashFunction, typename KeyEqualFunction>
1011  auto ReduceByKey(
1012  const KeyExtractor& key_extractor,
1013  const ReduceFunction& reduce_function,
1014  const ReduceConfig& reduce_config,
1015  const KeyHashFunction& key_hash_function,
1016  const KeyEqualFunction& key_equal_function) const;
1017 
1018  /*!
1019  * ReduceByKey is a DOp, which groups elements of the DIA with the
1020  * key_extractor and reduces each key-bucket to a single element using the
1021  * associative reduce_function. The reduce_function defines how two elements
1022  * can be reduced to a single element of equal type.
1023  *
1024  * \image html dia_ops/ReduceByKey.svg
1025  *
1026  * In contrast to ReduceBy, the reduce_function is allowed to change the key
1027  * (Example: Integers with modulo function as key_extractor). Creates
1028  * overhead as both key and value have to be sent in shuffle step. Since
1029  * ReduceByKey is a DOp, it creates a new DIANode. The DIA returned by
1030  * Reduce links to this newly created DIANode. The stack_ of the returned
1031  * DIA consists of the PostOp of Reduce, as a reduced element can directly
1032  * be chained to the following LOps.
1033  *
1034  * \param volatile_key_flag tag
1035  *
1036  * \param key_extractor Key extractor function, which maps each element to a
1037  * key of possibly different type.
1038  *
1039  * \tparam ReduceFunction Type of the reduce_function. This is a function
1040  * reducing two elements of L's result type to a single element of equal
1041  * type.
1042  *
1043  * \param reduce_function Reduce function, which defines how the key buckets
1044  * are reduced to a single element. This function is applied associative but
1045  * not necessarily commutative.
1046  *
1047  * \param reduce_config Reduce configuration.
1048  *
1049  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1050  *
1051  * \param key_equal_function Function to compare keys in reduce hash tables.
1052  *
1053  * \ingroup dia_dops
1054  */
1055  template <bool VolatileKeyValue,
1056  typename KeyExtractor, typename ReduceFunction,
1057  typename ReduceConfig = class DefaultReduceConfig,
1058  typename KeyHashFunction =
1059  std::hash<typename FunctionTraits<KeyExtractor>::result_type>,
1060  typename KeyEqualFunction =
1061  std::equal_to<typename FunctionTraits<KeyExtractor>::result_type> >
1062  auto ReduceByKey(
1064  const KeyExtractor& key_extractor,
1065  const ReduceFunction& reduce_function,
1066  const ReduceConfig& reduce_config = ReduceConfig(),
1067  const KeyHashFunction& key_hash_function = KeyHashFunction(),
1068  const KeyEqualFunction& key_equal_function = KeyEqualFunction()) const;
1069 
1070  /*!
1071  * ReduceByKey is a DOp, which groups elements of the DIA with the
1072  * key_extractor and reduces each key-bucket to a single element using the
1073  * associative reduce_function. The reduce_function defines how two elements
1074  * can be reduced to a single element of equal type.
1075  *
1076  * \image html dia_ops/ReduceByKey.svg
1077  *
1078  * In contrast to ReduceBy, the reduce_function is allowed to change the key
1079  * (Example: Integers with modulo function as key_extractor). Creates
1080  * overhead as both key and value have to be sent in shuffle step. Since
1081  * ReduceByKey is a DOp, it creates a new DIANode. The DIA returned by
1082  * Reduce links to this newly created DIANode. The stack_ of the returned
1083  * DIA consists of the PostOp of Reduce, as a reduced element can directly
1084  * be chained to the following LOps.
1085  *
1086  * \param duplicate_detection_flag tag
1087  *
1088  * \param key_extractor Key extractor function, which maps each element to a
1089  * key of possibly different type.
1090  *
1091  * \tparam ReduceFunction Type of the reduce_function. This is a function
1092  * reducing two elements of L's result type to a single element of equal
1093  * type.
1094  *
1095  * \param reduce_function Reduce function, which defines how the key buckets
1096  * are reduced to a single element. This function is applied associative but
1097  * not necessarily commutative.
1098  *
1099  * \param reduce_config Reduce configuration.
1100  *
1101  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1102  *
1103  * \param key_equal_function Function to compare keys in reduce hash tables.
1104  *
1105  * \ingroup dia_dops
1106  */
1107  template <bool DuplicateDetectionValue,
1108  typename KeyExtractor, typename ReduceFunction,
1109  typename ReduceConfig = class DefaultReduceConfig,
1110  typename KeyHashFunction =
1111  std::hash<typename FunctionTraits<KeyExtractor>::result_type>,
1112  typename KeyEqualFunction =
1113  std::equal_to<typename FunctionTraits<KeyExtractor>::result_type> >
1114  auto ReduceByKey(
1116  const KeyExtractor& key_extractor,
1117  const ReduceFunction& reduce_function,
1118  const ReduceConfig& reduce_config = ReduceConfig(),
1119  const KeyHashFunction& key_hash_function = KeyHashFunction(),
1120  const KeyEqualFunction& key_equal_function = KeyEqualFunction()) const;
1121 
1122  /*!
1123  * ReduceByKey is a DOp, which groups elements of the DIA with the
1124  * key_extractor and reduces each key-bucket to a single element using the
1125  * associative reduce_function. The reduce_function defines how two elements
1126  * can be reduced to a single element of equal type.
1127  *
1128  * \image html dia_ops/ReduceByKey.svg
1129  *
1130  * In contrast to ReduceBy, the reduce_function is allowed to change the key
1131  * (Example: Integers with modulo function as key_extractor). Creates
1132  * overhead as both key and value have to be sent in shuffle step. Since
1133  * ReduceByKey is a DOp, it creates a new DIANode. The DIA returned by
1134  * Reduce links to this newly created DIANode. The stack_ of the returned
1135  * DIA consists of the PostOp of Reduce, as a reduced element can directly
1136  * be chained to the following LOps.
1137  *
1138  * \param key_extractor Key extractor function, which maps each element to a
1139  * key of possibly different type.
1140  *
1141  * \tparam ReduceFunction Type of the reduce_function. This is a function
1142  * reducing two elements of L's result type to a single element of equal
1143  * type.
1144  *
1145  * \param reduce_function Reduce function, which defines how the key buckets
1146  * are reduced to a single element. This function is applied associative but
1147  * not necessarily commutative.
1148  *
1149  * \param reduce_config Reduce configuration.
1150  *
1151  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1152  *
1153  * \param key_equal_function Function to compare keys in reduce hash tables.
1154  *
1155  * \ingroup dia_dops
1156  */
1157  template <bool VolatileKeyValue,
1158  bool DuplicateDetectionValue,
1159  typename KeyExtractor, typename ReduceFunction,
1160  typename ReduceConfig = class DefaultReduceConfig,
1161  typename KeyHashFunction =
1162  std::hash<typename FunctionTraits<KeyExtractor>::result_type>,
1163  typename KeyEqualFunction =
1164  std::equal_to<typename FunctionTraits<KeyExtractor>::result_type> >
1165  auto ReduceByKey(
1168  const KeyExtractor& key_extractor,
1169  const ReduceFunction& reduce_function,
1170  const ReduceConfig& reduce_config = ReduceConfig(),
1171  const KeyHashFunction& key_hash_function = KeyHashFunction(),
1172  const KeyEqualFunction& key_equal_function = KeyEqualFunction()) const;
1173 
1174  /*!
1175  * ReducePair is a DOp, which groups key-value-pairs in the input DIA by
1176  * their key and reduces each key-bucket to a single element using the
1177  * associative reduce_function. The reduce_function defines how two elements
1178  * can be reduced to a single element of equal type. The reduce_function is
1179  * allowed to change the key. Since ReducePair is a DOp, it creates a new
1180  * DIANode. The DIA returned by Reduce links to this newly created
1181  * DIANode. The stack_ of the returned DIA consists of the PostOp of Reduce,
1182  * as a reduced element can directly be chained to the following LOps.
1183  *
1184  * \tparam ReduceFunction Type of the reduce_function. This is a function
1185  * reducing two elements of L's result type to a single element of equal
1186  * type.
1187  *
1188  * \param reduce_function Reduce function, which defines how the key buckets
1189  * are reduced to a single element. This function is applied associative but
1190  * not necessarily commutative.
1191  *
1192  * \param reduce_config Reduce configuration.
1193  *
1194  * \ingroup dia_dops
1195  */
1196  template <typename ReduceFunction,
1197  typename ReduceConfig = class DefaultReduceConfig>
1198  auto ReducePair(
1199  const ReduceFunction& reduce_function,
1200  const ReduceConfig& reduce_config = ReduceConfig()) const;
1201 
1202  /*!
1203  * ReducePair is a DOp, which groups key-value-pairs in the input DIA by
1204  * their key and reduces each key-bucket to a single element using the
1205  * associative reduce_function. The reduce_function defines how two elements
1206  * can be reduced to a single element of equal type. The reduce_function is
1207  * allowed to change the key. Since ReducePair is a DOp, it creates a new
1208  * DIANode. The DIA returned by Reduce links to this newly created
1209  * DIANode. The stack_ of the returned DIA consists of the PostOp of Reduce,
1210  * as a reduced element can directly be chained to the following LOps.
1211  *
1212  * \tparam ReduceFunction Type of the reduce_function. This is a function
1213  * reducing two elements of L's result type to a single element of equal
1214  * type.
1215  *
1216  * \param reduce_function Reduce function, which defines how the key buckets
1217  * are reduced to a single element. This function is applied associative but
1218  * not necessarily commutative.
1219  *
1220  * \param reduce_config Reduce configuration.
1221  *
1222  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1223  *
1224  * \ingroup dia_dops
1225  */
1226  template <typename ReduceFunction, typename ReduceConfig,
1227  typename KeyHashFunction>
1228  auto ReducePair(
1229  const ReduceFunction& reduce_function,
1230  const ReduceConfig& reduce_config,
1231  const KeyHashFunction& key_hash_function) const;
1232 
1233  /*!
1234  * ReducePair is a DOp, which groups key-value-pairs in the input DIA by
1235  * their key and reduces each key-bucket to a single element using the
1236  * associative reduce_function. The reduce_function defines how two elements
1237  * can be reduced to a single element of equal type. The reduce_function is
1238  * allowed to change the key. Since ReducePair is a DOp, it creates a new
1239  * DIANode. The DIA returned by Reduce links to this newly created
1240  * DIANode. The stack_ of the returned DIA consists of the PostOp of Reduce,
1241  * as a reduced element can directly be chained to the following LOps.
1242  *
1243  * \tparam ReduceFunction Type of the reduce_function. This is a function
1244  * reducing two elements of L's result type to a single element of equal
1245  * type.
1246  *
1247  * \param reduce_function Reduce function, which defines how the key buckets
1248  * are reduced to a single element. This function is applied associative but
1249  * not necessarily commutative.
1250  *
1251  * \param reduce_config Reduce configuration.
1252  *
1253  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1254  *
1255  * \param key_equal_function Function to compare keys in reduce hash tables.
1256  *
1257  * \ingroup dia_dops
1258  */
1259  template <typename ReduceFunction, typename ReduceConfig,
1260  typename KeyHashFunction, typename KeyEqualFunction>
1261  auto ReducePair(
1262  const ReduceFunction& reduce_function,
1263  const ReduceConfig& reduce_config,
1264  const KeyHashFunction& key_hash_function,
1265  const KeyEqualFunction& key_equal_function) const;
1266 
1267  /*!
1268  * ReducePair is a DOp, which groups key-value-pairs in the input DIA by
1269  * their key and reduces each key-bucket to a single element using the
1270  * associative reduce_function. The reduce_function defines how two elements
1271  * can be reduced to a single element of equal type. The reduce_function is
1272  * allowed to change the key. Since ReducePair is a DOp, it creates a new
1273  * DIANode. The DIA returned by Reduce links to this newly created
1274  * DIANode. The stack_ of the returned DIA consists of the PostOp of Reduce,
1275  * as a reduced element can directly be chained to the following LOps.
1276  *
1277  * \tparam ReduceFunction Type of the reduce_function. This is a function
1278  * reducing two elements of L's result type to a single element of equal
1279  * type.
1280  *
1281  * \param reduce_function Reduce function, which defines how the key buckets
1282  * are reduced to a single element. This function is applied associative but
1283  * not necessarily commutative.
1284  *
1285  * \param reduce_config Reduce configuration.
1286  *
1287  * \param key_hash_function Function to hash keys extracted by KeyExtractor.
1288  *
1289  * \param key_equal_function Function to compare keys in reduce hash tables.
1290  *
1291  * \ingroup dia_dops
1292  */
1293  template <bool DuplicateDetectionValue,
1294  typename ReduceFunction,
1295  typename ReduceConfig = class DefaultReduceConfig,
1296  typename KeyHashFunction,
1297  typename KeyEqualFunction
1298  >
1299  auto ReducePair(
1301  const ReduceFunction& reduce_function,
1302  const ReduceConfig& reduce_config = ReduceConfig(),
1303  const KeyHashFunction& key_hash_function = KeyHashFunction(),
1304  const KeyEqualFunction& key_equal_function = KeyEqualFunction()) const;
1305 
1306  /*!
1307  * ReduceToIndex is a DOp, which groups elements of the DIA with the
1308  * key_extractor returning an unsigned integers and reduces each key-bucket
1309  * to a single element using the associative reduce_function. In contrast
1310  * to ReduceBy, ReduceToIndex returns a DIA in a defined order, which has
1311  * the reduced element with key i in position i.
1312  *
1313  * \image html dia_ops/ReduceToIndex.svg
1314  *
1315  * The reduce_function defines how two elements can be reduced to a single
1316  * element of equal type. The key of the reduced element has to be equal to
1317  * the keys of the input elements. Since ReduceToIndex is a DOp, it creates
1318  * a new DIANode. The DIA returned by ReduceToIndex links to this newly
1319  * created DIANode. The stack_ of the returned DIA consists of the PostOp of
1320  * ReduceToIndex, as a reduced element can directly be chained to the
1321  * following LOps.
1322  *
1323  * \param key_extractor Key extractor function, which maps each element to a
1324  * key of possibly different type.
1325  *
1326  * \tparam ReduceFunction Type of the reduce_function. This is a function
1327  * reducing two elements of L's result type to a single element of equal
1328  * type.
1329  *
1330  * \param reduce_function Reduce function, which defines how the key buckets
1331  * are reduced to a single element. This function is applied associative but
1332  * not necessarily commutative.
1333  *
1334  * \param size Resulting DIA size. Consequently, the key_extractor function
1335  * but always return < size for any element in the input DIA.
1336  *
1337  * \param neutral_element Item value with which to start the reduction in
1338  * each array cell.
1339  *
1340  * \param reduce_config Reduce configuration.
1341  *
1342  * \ingroup dia_dops
1343  */
1344  template <typename KeyExtractor, typename ReduceFunction,
1345  typename ReduceConfig = class DefaultReduceToIndexConfig>
1346  auto ReduceToIndex(
1347  const KeyExtractor& key_extractor,
1348  const ReduceFunction& reduce_function,
1349  size_t size,
1350  const ValueType& neutral_element = ValueType(),
1351  const ReduceConfig& reduce_config = ReduceConfig()) const;
1352 
1353  /*!
1354  * ReduceToIndex is a DOp, which groups elements of the DIA with the
1355  * key_extractor returning an unsigned integers and reduces each key-bucket
1356  * to a single element using the associative reduce_function. In contrast
1357  * to ReduceByKey, ReduceToIndex returns a DIA in a defined order, which has
1358  * the reduced element with key i in position i. The reduce_function
1359  * defines how two elements can be reduced to a single element of equal
1360  * type.
1361  *
1362  * \image html dia_ops/ReduceToIndex.svg
1363  *
1364  * ReduceToIndex is the equivalent to ReduceByKey, as the
1365  * reduce_function is allowed to change the key. Since ReduceToIndex
1366  * is a DOp, it creates a new DIANode. The DIA returned by ReduceToIndex
1367  * links to this newly created DIANode. The stack_ of the returned DIA
1368  * consists of the PostOp of ReduceToIndex, as a reduced element can
1369  * directly be chained to the following LOps.
1370  *
1371  * \param key_extractor Key extractor function, which maps each element to a
1372  * key of possibly different type.
1373  *
1374  * \tparam ReduceFunction Type of the reduce_function. This is a function
1375  * reducing two elements of L's result type to a single element of equal
1376  * type.
1377  *
1378  * \param reduce_function Reduce function, which defines how the key buckets
1379  * are reduced to a single element. This function is applied associative but
1380  * not necessarily commutative.
1381  *
1382  * \param size Resulting DIA size. Consequently, the key_extractor function
1383  * but always return < size for any element in the input DIA.
1384  *
1385  * \param neutral_element Item value with which to start the reduction in
1386  * each array cell.
1387  *
1388  * \param reduce_config Reduce configuration.
1389  *
1390  * \ingroup dia_dops
1391  */
1392  template <bool VolatileKeyValue,
1393  typename KeyExtractor, typename ReduceFunction,
1394  typename ReduceConfig = class DefaultReduceToIndexConfig>
1395  auto ReduceToIndex(
1397  const KeyExtractor& key_extractor,
1398  const ReduceFunction& reduce_function,
1399  size_t size,
1400  const ValueType& neutral_element = ValueType(),
1401  const ReduceConfig& reduce_config = ReduceConfig()) const;
1402 
1403  /*!
1404  * ReduceToIndex is a DOp, which groups elements of the DIA with the
1405  * key_extractor returning an unsigned integers and reduces each key-bucket
1406  * to a single element using the associative reduce_function. In contrast
1407  * to ReduceByKey, ReduceToIndex returns a DIA in a defined order, which has
1408  * the reduced element with key i in position i. The reduce_function
1409  * defines how two elements can be reduced to a single element of equal
1410  * type.
1411  *
1412  * \image html dia_ops/ReduceToIndex.svg
1413  *
1414  * ReduceToIndex is the equivalent to ReduceByKey, as the
1415  * reduce_function is allowed to change the key. Since ReduceToIndex
1416  * is a DOp, it creates a new DIANode. The DIA returned by ReduceToIndex
1417  * links to this newly created DIANode. The stack_ of the returned DIA
1418  * consists of the PostOp of ReduceToIndex, as a reduced element can
1419  * directly be chained to the following LOps.
1420  *
1421  * \param key_extractor Key extractor function, which maps each element to a
1422  * key of possibly different type.
1423  *
1424  * \tparam ReduceFunction Type of the reduce_function. This is a function
1425  * reducing two elements of L's result type to a single element of equal
1426  * type.
1427  *
1428  * \param reduce_function Reduce function, which defines how the key buckets
1429  * are reduced to a single element. This function is applied associative but
1430  * not necessarily commutative.
1431  *
1432  * \param size Resulting DIA size. Consequently, the key_extractor function
1433  * but always return < size for any element in the input DIA.
1434  *
1435  * \param neutral_element Item value with which to start the reduction in
1436  * each array cell.
1437  *
1438  * \param reduce_config Reduce configuration.
1439  *
1440  * \ingroup dia_dops
1441  */
1442  template <typename KeyExtractor, typename ReduceFunction,
1443  typename ReduceConfig = class DefaultReduceToIndexConfig>
1444  auto ReduceToIndex(
1445  const struct SkipPreReducePhaseTag&,
1446  const KeyExtractor& key_extractor,
1447  const ReduceFunction& reduce_function,
1448  size_t size,
1449  const ValueType& neutral_element = ValueType(),
1450  const ReduceConfig& reduce_config = ReduceConfig()) const;
1451 
1452  /*!
1453  * GroupByKey is a DOp, which groups elements of the DIA by its key.
1454  * After having grouped all elements of one key, all elements of one key
1455  * will be processed according to the GroupByFunction and returns an output
1456  * Contrary to Reduce, GroupBy allows usage of functions that require all
1457  * elements of one key at once as GroupByFunction will be applied _after_
1458  * all elements with the same key have been grouped. However because of this
1459  * reason, the communication overhead is also higher. If possible, usage of
1460  * Reduce is therefore recommended.
1461  *
1462  * \image html dia_ops/GroupByKey.svg
1463  *
1464  * As GroupBy is a DOp, it creates a new DIANode. The DIA returned by
1465  * Reduce links to this newly created DIANode. The stack_ of the returned
1466  * DIA consists of the PostOp of Reduce, as a reduced element can
1467  * directly be chained to the following LOps.
1468  *
1469  * \tparam KeyExtractor Type of the key_extractor function.
1470  * The key_extractor function is equal to a map function.
1471  *
1472  * \param key_extractor Key extractor function, which maps each element to a
1473  * key of possibly different type.
1474  *
1475  * \tparam GroupByFunction Type of the groupby_function. This is a function
1476  * taking an iterator for all elements of the same key as input.
1477  *
1478  * \param groupby_function Reduce function, which defines how the key
1479  * buckets are grouped and processed.
1480  * input param: api::GroupByReader with functions HasNext() and Next()
1481  *
1482  * \ingroup dia_dops
1483  */
1484  template <typename ValueOut, typename KeyExtractor,
1485  typename GroupByFunction>
1486  auto GroupByKey(const KeyExtractor& key_extractor,
1487  const GroupByFunction& groupby_function) const;
1488 
1489  /*!
1490  * GroupByKey is a DOp, which groups elements of the DIA by its key.
1491  * After having grouped all elements of one key, all elements of one key
1492  * will be processed according to the GroupByFunction and returns an output
1493  * Contrary to Reduce, GroupBy allows usage of functions that require all
1494  * elements of one key at once as GroupByFunction will be applied _after_
1495  * all elements with the same key have been grouped. However because of this
1496  * reason, the communication overhead is also higher. If possible, usage of
1497  * Reduce is therefore recommended.
1498  *
1499  * \image html dia_ops/GroupByKey.svg
1500  *
1501  * As GroupBy is a DOp, it creates a new DIANode. The DIA returned by
1502  * Reduce links to this newly created DIANode. The stack_ of the returned
1503  * DIA consists of the PostOp of Reduce, as a reduced element can
1504  * directly be chained to the following LOps.
1505  *
1506  * \tparam KeyExtractor Type of the key_extractor function.
1507  * The key_extractor function is equal to a map function.
1508  *
1509  * \param key_extractor Key extractor function, which maps each element to a
1510  * key of possibly different type.
1511  *
1512  * \tparam GroupByFunction Type of the groupby_function. This is a function
1513  * taking an iterator for all elements of the same key as input.
1514  *
1515  * \param groupby_function Reduce function, which defines how the key
1516  * buckets are grouped and processed.
1517  * input param: api::GroupByReader with functions HasNext() and Next()
1518  *
1519  * \param hash_function Hash method for Keys
1520  *
1521  * \ingroup dia_dops
1522  */
1523  template <typename ValueOut, typename KeyExtractor,
1524  typename GroupByFunction, typename HashFunction>
1525  auto GroupByKey(const KeyExtractor& key_extractor,
1526  const GroupByFunction& groupby_function,
1527  const HashFunction& hash_function) const;
1528 
1529  /*!
1530  * GroupByKey is a DOp, which groups elements of the DIA by its key.
1531  * After having grouped all elements of one key, all elements of one key
1532  * will be processed according to the GroupByFunction and returns an output
1533  * Contrary to Reduce, GroupBy allows usage of functions that require all
1534  * elements of one key at once as GroupByFunction will be applied _after_
1535  * all elements with the same key have been grouped. However because of this
1536  * reason, the communication overhead is also higher. If possible, usage of
1537  * Reduce is therefore recommended.
1538  *
1539  * \image html dia_ops/GroupByKey.svg
1540  *
1541  * As GroupBy is a DOp, it creates a new DIANode. The DIA returned by
1542  * Reduce links to this newly created DIANode. The stack_ of the returned
1543  * DIA consists of the PostOp of Reduce, as a reduced element can
1544  * directly be chained to the following LOps.
1545  *
1546  * \tparam KeyExtractor Type of the key_extractor function.
1547  * The key_extractor function is equal to a map function.
1548  *
1549  * \param key_extractor Key extractor function, which maps each element to a
1550  * key of possibly different type.
1551  *
1552  * \tparam GroupByFunction Type of the groupby_function. This is a function
1553  * taking an iterator for all elements of the same key as input.
1554  *
1555  * \param groupby_function Reduce function, which defines how the key
1556  * buckets are grouped and processed.
1557  * input param: api::GroupByReader with functions HasNext() and Next()
1558  *
1559  * \param hash_function Hash method for Keys
1560  *
1561  * \ingroup dia_dops
1562  */
1563  template <typename ValueOut, bool LocationDetectionTagValue,
1564  typename KeyExtractor, typename GroupByFunction,
1565  typename HashFunction =
1566  std::hash<typename FunctionTraits<KeyExtractor>::result_type>
1567  >
1568  auto GroupByKey(const LocationDetectionFlag<LocationDetectionTagValue>&,
1569  const KeyExtractor& key_extractor,
1570  const GroupByFunction& groupby_function,
1571  const HashFunction& hash_function = HashFunction()) const;
1572 
1573  /*!
1574  * GroupBy is a DOp, which groups elements of the DIA by its key.
1575  * After having grouped all elements of one key, all elements of one key
1576  * will be processed according to the GroupByFunction and returns an output
1577  * Contrary to Reduce, GroupBy allows usage of functions that require all
1578  * elements of one key at once as GroupByFunction will be applied _after_
1579  * all elements with the same key have been grouped. However because of this
1580  * reason, the communication overhead is also higher. If possible, usage of
1581  * Reduce is therefore recommended.
1582  *
1583  * \image html dia_ops/GroupToIndex.svg
1584  *
1585  * In contrast to GroupBy, GroupToIndex returns a DIA in a defined order,
1586  * which has the reduced element with key i in position i.
1587  * As GroupBy is a DOp, it creates a new DIANode. The DIA returned by
1588  * Reduce links to this newly created DIANode. The stack_ of the returned
1589  * DIA consists of the PostOp of Reduce, as a reduced element can
1590  * directly be chained to the following LOps.
1591  *
1592  * \tparam KeyExtractor Type of the key_extractor function.
1593  * The key_extractor function is equal to a map function.
1594  *
1595  * \param key_extractor Key extractor function, which maps each element to a
1596  * key of possibly different type.
1597  *
1598  * \tparam GroupByFunction Type of the groupby_function. This is a function
1599  * taking an iterator for all elements of the same key as input.
1600  *
1601  * \param groupby_function Reduce function, which defines how the key
1602  * buckets are grouped and processed.
1603  * input param: api::GroupByReader with functions HasNext() and Next()
1604  *
1605  * \param size Resulting DIA size. Consequently, the key_extractor function
1606  * but always return < size for any element in the input DIA.
1607  *
1608  * \param neutral_element Item value with which to start the reduction in
1609  * each array cell.
1610  *
1611  * \ingroup dia_dops
1612  */
1613  template <typename ValueOut, typename KeyExtractor,
1614  typename GroupByFunction>
1615  auto GroupToIndex(const KeyExtractor& key_extractor,
1616  const GroupByFunction& groupby_function,
1617  const size_t size,
1618  const ValueOut& neutral_element = ValueOut()) const;
1619 
1620  /*!
1621  * Zips two DIAs of equal size in style of functional programming by
1622  * applying zip_function to the i-th elements of both input DIAs to form the
1623  * i-th element of the output DIA. The type of the output DIA can be
1624  * inferred from the zip_function.
1625  *
1626  * \image html dia_ops/Zip.svg
1627  *
1628  * The two input DIAs are required to be of equal size, otherwise use the
1629  * CutTag variant.
1630  *
1631  * \tparam ZipFunction Type of the zip_function. This is a function with two
1632  * input elements, both of the local type, and one output element, which is
1633  * the type of the Zip node.
1634  *
1635  * \param zip_function Zip function, which zips two elements together
1636  *
1637  * \param second_dia DIA, which is zipped together with the original
1638  * DIA.
1639  *
1640  * \ingroup dia_dops
1641  */
1642  template <typename ZipFunction, typename SecondDIA>
1643  auto Zip(const SecondDIA& second_dia,
1644  const ZipFunction& zip_function) const;
1645 
1646  /*!
1647  * Zips two DIAs in style of functional programming by applying zip_function
1648  * to the i-th elements of both input DIAs to form the i-th element of the
1649  * output DIA. The type of the output DIA can be inferred from the
1650  * zip_function.
1651  *
1652  * \image html dia_ops/Zip.svg
1653  *
1654  * If the two input DIAs are of unequal size, the result is the shorter of
1655  * both. Otherwise use PadTag().
1656  *
1657  * \tparam ZipFunction Type of the zip_function. This is a function with two
1658  * input elements, both of the local type, and one output element, which is
1659  * the type of the Zip node.
1660  *
1661  * \param zip_function Zip function, which zips two elements together
1662  *
1663  * \param second_dia DIA, which is zipped together with the original
1664  * DIA.
1665  *
1666  * \ingroup dia_dops
1667  */
1668  template <typename ZipFunction, typename SecondDIA>
1669  auto Zip(struct CutTag const&, const SecondDIA& second_dia,
1670  const ZipFunction& zip_function) const;
1671 
1672  /*!
1673  * Zips two DIAs in style of functional programming by applying zip_function
1674  * to the i-th elements of both input DIAs to form the i-th element of the
1675  * output DIA. The type of the output DIA can be inferred from the
1676  * zip_function.
1677  *
1678  * \image html dia_ops/Zip.svg
1679  *
1680  * The output DIA's length is the *maximum* of all input DIAs, shorter DIAs
1681  * are padded with default-constructed items.
1682  *
1683  * \tparam ZipFunction Type of the zip_function. This is a function with two
1684  * input elements, both of the local type, and one output element, which is
1685  * the type of the Zip node.
1686  *
1687  * \param zip_function Zip function, which zips two elements together
1688  *
1689  * \param second_dia DIA, which is zipped together with the original
1690  * DIA.
1691  *
1692  * \ingroup dia_dops
1693  */
1694  template <typename ZipFunction, typename SecondDIA>
1695  auto Zip(struct PadTag const&, const SecondDIA& second_dia,
1696  const ZipFunction& zip_function) const;
1697 
1698  /*!
1699  * Zips two DIAs in style of functional programming by applying zip_function
1700  * to the i-th elements of both input DIAs to form the i-th element of the
1701  * output DIA. The type of the output DIA can be inferred from the
1702  * zip_function.
1703  *
1704  * \image html dia_ops/Zip.svg
1705  *
1706  * In this variant, the DIA partitions on all PEs must have matching
1707  * length. No rebalancing is performed, and the program will die if any
1708  * partition mismatches. This enables Zip to proceed without any
1709  * communication.
1710  *
1711  * \tparam ZipFunction Type of the zip_function. This is a function with two
1712  * input elements, both of the local type, and one output element, which is
1713  * the type of the Zip node.
1714  *
1715  * \param zip_function Zip function, which zips two elements together
1716  *
1717  * \param second_dia DIA, which is zipped together with the original
1718  * DIA.
1719  *
1720  * \ingroup dia_dops
1721  */
1722  template <typename ZipFunction, typename SecondDIA>
1723  auto Zip(struct NoRebalanceTag const&, const SecondDIA& second_dia,
1724  const ZipFunction& zip_function) const;
1725 
1726  /*!
1727  * Zips each item of a DIA with its zero-based array index. This requires a
1728  * full data store/retrieve cycle because the input DIA's size is generally
1729  * unknown.
1730  *
1731  * \param zip_function Zip function, which gets each element together with
1732  * its array index.
1733  *
1734  * \ingroup dia_dops
1735  */
1736  template <typename ZipFunction>
1737  auto ZipWithIndex(const ZipFunction& zip_function) const;
1738 
1739  /*!
1740  * Sort is a DOp, which sorts a given DIA according to the given
1741  * compare_function.
1742  *
1743  * \image html dia_ops/Sort.svg
1744  *
1745  * \tparam CompareFunction Type of the compare_function.
1746  * Should be (ValueType,ValueType)->bool
1747  *
1748  * \param compare_function Function, which compares two elements. Returns
1749  * true, if first element is smaller than second. False otherwise.
1750  *
1751  * \ingroup dia_dops
1752  */
1753  template <typename CompareFunction = std::less<ValueType> >
1754  auto Sort(const CompareFunction& compare_function = CompareFunction()) const;
1755 
1756  /*!
1757  * Sort is a DOp, which sorts a given DIA according to the given
1758  * compare_function.
1759  *
1760  * \image html dia_ops/Sort.svg
1761  *
1762  * \tparam CompareFunction Type of the compare_function.
1763  * Should be (ValueType,ValueType)->bool
1764  *
1765  * \param compare_function Function, which compares two elements. Returns
1766  * true, if first element is smaller than second. False otherwise.
1767  *
1768  * \param sort_algorithm Algorithm class used to sort items. Merging is
1769  * always done using a tournament tree with compare_function.
1770  *
1771  * \ingroup dia_dops
1772  */
1773  template <typename CompareFunction, typename SortAlgorithm>
1774  auto Sort(const CompareFunction& compare_function,
1775  const SortAlgorithm& sort_algorithm) const;
1776 
1777  /*!
1778  * SortStable is a DOp, which sorts a given DIA stably according to the
1779  * given compare_function.
1780  *
1781  * \image html dia_ops/Sort.svg
1782  *
1783  * \tparam CompareFunction Type of the compare_function.
1784  * Should be (ValueType,ValueType)->bool
1785  *
1786  * \param compare_function Function, which compares two elements. Returns
1787  * true, if first element is smaller than second. False otherwise.
1788  *
1789  * \ingroup dia_dops
1790  */
1791  template <typename CompareFunction = std::less<ValueType> >
1792  auto SortStable(const CompareFunction& compare_function = CompareFunction()) const;
1793 
1794  /*!
1795  * SortStable is a DOp, which sorts a given DIA stably according to the
1796  * given compare_function.
1797  *
1798  * \image html dia_ops/Sort.svg
1799  *
1800  * \tparam CompareFunction Type of the compare_function.
1801  * Should be (ValueType,ValueType)->bool
1802  *
1803  * \param compare_function Function, which compares two elements. Returns
1804  * true, if first element is smaller than second. False otherwise.
1805  *
1806  * \param sort_algorithm Algorithm class used to stably sort items. Merging
1807  * is always done using a tournament tree with compare_function. In order
1808  * for the sorting to be stable, this must be a stable sorting algorithm.
1809  *
1810  * \ingroup dia_dops
1811  */
1812  template <typename CompareFunction, typename SortAlgorithm>
1813  auto SortStable(const CompareFunction& compare_function,
1814  const SortAlgorithm& sort_algorithm) const;
1815 
1816  /*!
1817  * Merge is a DOp, which merges two sorted DIAs to a single sorted DIA.
1818  * Both input DIAs must be used sorted conforming to the given comparator.
1819  * The type of the output DIA will be the type of this DIA.
1820  *
1821  * \image html dia_ops/Merge.svg
1822  *
1823  * The merge operation balances all input data, so that each worker will
1824  * have an equal number of elements when the merge completes.
1825  *
1826  * \param comparator Comparator to specify the order of input and output.
1827  *
1828  * \param second_dia DIA, which is merged with this DIA.
1829  *
1830  * \ingroup dia_dops
1831  */
1832  template <typename Comparator = std::less<ValueType>, typename SecondDIA>
1833  auto Merge(const SecondDIA& second_dia,
1834  const Comparator& comparator = Comparator()) const;
1835 
1836  /*!
1837  * PrefixSum is a DOp, which computes the (inclusive) prefix sum of all
1838  * elements. The sum function defines how two elements are combined to a
1839  * single element.
1840  *
1841  * \image html dia_ops/PrefixSum.svg
1842  *
1843  * \param sum_function Sum function (any associative function).
1844  *
1845  * \param initial_element Initial element of the sum function.
1846  *
1847  * \ingroup dia_dops
1848  */
1849  template <typename SumFunction = std::plus<ValueType> >
1850  auto PrefixSum(const SumFunction& sum_function = SumFunction(),
1851  const ValueType& initial_element = ValueType()) const;
1852 
1853  /*!
1854  * ExPrefixSum is a DOp, which computes the exclusive prefix sum of all
1855  * elements. The sum function defines how two elements are combined to a
1856  * single element.
1857  *
1858  * \image html dia_ops/ExPrefixSum.svg
1859  *
1860  * \param sum_function Sum function (any associative function).
1861  *
1862  * \param initial_element Initial element of the sum function.
1863  *
1864  * \ingroup dia_dops
1865  */
1866  template <typename SumFunction = std::plus<ValueType> >
1867  auto ExPrefixSum(const SumFunction& sum_function = SumFunction(),
1868  const ValueType& initial_element = ValueType()) const;
1869 
1870  /*!
1871  * Window is a DOp, which applies a window function to every k
1872  * consecutive items in a DIA. The window function is also given the index
1873  * of the first item, and can output zero or more items via an Emitter.
1874  *
1875  * \image html dia_ops/Window.svg
1876  *
1877  * \param window_size the size of the delivered window. Signature: TODO(tb).
1878  *
1879  * \param window_function Window function applied to each k item.
1880  *
1881  * \ingroup dia_dops
1882  */
1883  template <typename WindowFunction>
1884  auto Window(size_t window_size,
1885  const WindowFunction& window_function = WindowFunction()) const;
1886 
1887  /*!
1888  * Window is a DOp, which applies a window function to every k
1889  * consecutive items in a DIA. The window function is also given the index
1890  * of the first item, and can output zero or more items via an Emitter.
1891  *
1892  * \image html dia_ops/Window.svg
1893  *
1894  * \param window_size the size of the delivered window. Signature: TODO(tb).
1895  *
1896  * \param window_function Window function applied to each k item.
1897  *
1898  * \param partial_window_function Window function applied to less than k
1899  * items.
1900  *
1901  * \ingroup dia_dops
1902  */
1903  template <typename WindowFunction, typename PartialWindowFunction>
1904  auto Window(size_t window_size,
1905  const WindowFunction& window_function,
1906  const PartialWindowFunction& partial_window_function) const;
1907 
1908  /*!
1909  * Window is a DOp, which applies a window function to every k
1910  * consecutive items in a DIA. The window function is also given the index
1911  * of the first item, and can output zero or more items via an Emitter.
1912  *
1913  * \image html dia_ops/Window.svg
1914  *
1915  * \param window_size the size of the delivered window.
1916  *
1917  * \param window_function Window function applied to each k item.
1918  *
1919  * \ingroup dia_dops
1920  */
1921  template <typename WindowFunction>
1922  auto Window(struct DisjointTag const&, size_t window_size,
1923  const WindowFunction& window_function) const;
1924 
1925  /*!
1926  * FlatWindow is a DOp, which applies a window function to every k
1927  * consecutive items in a DIA. The window function is also given the index
1928  * of the first item, and can output zero or more items via an Emitter.
1929  *
1930  * \image html dia_ops/Window.svg
1931  *
1932  * \param window_size the size of the delivered window. Signature: TODO(tb).
1933  *
1934  * \param window_function Window function applied to each k item.
1935  *
1936  * \ingroup dia_dops
1937  */
1938  template <typename ValueOut, typename WindowFunction>
1939  auto FlatWindow(
1940  size_t window_size,
1941  const WindowFunction& window_function = WindowFunction()) const;
1942 
1943  /*!
1944  * FlatWindow is a DOp, which applies a window function to every k
1945  * consecutive items in a DIA. The window function is also given the index
1946  * of the first item, and can output zero or more items via an Emitter.
1947  *
1948  * \image html dia_ops/Window.svg
1949  *
1950  * \param window_size the size of the delivered window. Signature: TODO(tb).
1951  *
1952  * \param window_function Window function applied to each k item.
1953  *
1954  * \param partial_window_function Window function applied to less than k
1955  * items.
1956  *
1957  * \ingroup dia_dops
1958  */
1959  template <typename ValueOut, typename WindowFunction,
1960  typename PartialWindowFunction>
1961  auto FlatWindow(size_t window_size,
1962  const WindowFunction& window_function,
1963  const PartialWindowFunction& partial_window_function) const;
1964 
1965  /*!
1966  * FlatWindow is a DOp, which applies a window function to every k
1967  * consecutive items in a DIA. The window function is also given the index
1968  * of the first item, and can output zero or more items via an Emitter.
1969  *
1970  * \image html dia_ops/Window.svg
1971  *
1972  * \param window_size the size of the delivered window. Signature: TODO(tb).
1973  *
1974  * \param window_function Window function applied to each k item.
1975  *
1976  * \ingroup dia_dops
1977  */
1978  template <typename ValueOut, typename WindowFunction>
1979  auto FlatWindow(struct DisjointTag const&, size_t window_size,
1980  const WindowFunction& window_function) const;
1981 
1982  /*!
1983  * Concat is a DOp, which concatenates any number of DIAs to a single DIA.
1984  * All input DIAs must contain the same type, which is also the output DIA's
1985  * type.
1986  *
1987  * The concat operation balances all input data, so that each worker will
1988  * have an equal number of elements when the concat completes.
1989  *
1990  * \ingroup dia_dops
1991  */
1992  template <typename SecondDIA>
1993  auto Concat(const SecondDIA& second_dia) const;
1994 
1995  /*!
1996  * Rebalance is a DOp, which rebalances a single DIA among all workers; in
1997  * general, this operation is needed only if previous steps are known to
1998  * create heavy imbalance (e.g. like Filter()s which cut DIAs to ranges).
1999  *
2000  * \ingroup dia_dops
2001  */
2002  auto Rebalance() const;
2003 
2004  /*!
2005  * Create a CollapseNode which is mainly used to collapse the LOp chain into
2006  * a DIA<T> with an empty stack. This is most often necessary for iterative
2007  * algorithms, where a DIA<T> reference variable is updated in each
2008  * iteration.
2009  *
2010  * \ingroup dia_dops
2011  */
2012  DIA<ValueType> Collapse() const;
2013 
2014  /*!
2015  * Create a CacheNode which contains all items of a DIA in calculated plain
2016  * format. This is needed if a DIA is reused many times, in order to avoid
2017  * recalculating a PostOp multiple times.
2018  *
2019  * \ingroup dia_dops
2020  */
2021  DIA<ValueType> Cache() const;
2022 
2023  //! \}
2024 
2025 private:
2026  //! The DIANode which DIA points to. The node represents the latest DOp
2027  //! or Action performed previously.
2029 
2030  //! The local function chain, which stores the chained lambda function from
2031  //! the last DIANode to this DIA.
2033 
2034  //! DIA serial id for logging, matches DIANode::id_ for DOps.
2035  size_t dia_id_ = 0;
2036 
2037  //! static DIA (LOp or DOp) node label string, may match DIANode::label_.
2038  const char* label_ = nullptr;
2039 
2040  //! deliver next DIA serial id
2041  size_t next_dia_id() { return context().next_dia_id(); }
2042 };
2043 
2044 //! \}
2045 
2046 } // namespace api
2047 
2048 //! imported from api namespace
2049 using api::DIA;
2050 
2051 //! imported from api namespace
2052 using api::DisjointTag;
2053 
2054 //! imported from api namespace
2055 using api::VolatileKeyFlag;
2056 
2057 //! imported from api namespace
2058 using api::VolatileKeyTag;
2059 
2060 //! imported from api namespace
2061 using api::NoVolatileKeyTag;
2062 
2063 //! imported from api namespace
2065 
2066 //! imported from api namespace
2067 using api::CutTag;
2068 
2069 //! imported from api namespace
2070 using api::PadTag;
2071 
2072 //! imported from api namespace
2073 using api::NoRebalanceTag;
2074 
2075 //! imported from api namespace
2077 
2078 //! imported from api namespace
2080 
2081 //! imported from api namespace
2083 
2084 //! imported from api namespace
2086 
2087 //! imported from api namespace
2089 
2090 //! imported from api namespace
2092 
2093 } // namespace thrill
2094 
2095 #endif // !THRILL_API_DIA_HEADER
2096 
2097 /******************************************************************************/
DIA is the interface between the user and the Thrill framework.
Definition: dia.hpp:141
auto Union(const FirstDIA &first_dia, const DIAs &... dias)
Union is a LOp, which creates the union of all items from any number of DIAs as a single DIA...
Definition: union.hpp:319
size_t id() const
Returns id_.
Definition: dia.hpp:285
Type[] Array
A template to make writing temporary arrays easy: Array<int>{ 1, 2, 3 }.
Definition: json_logger.hpp:64
const struct LocationDetectionFlag< true > LocationDetectionTag
global const LocationDetectionFlag instance
Definition: dia.hpp:122
bool IsValid() const
Return whether the DIA is valid.
Definition: dia.hpp:175
typename Stack::Input StackInput
Definition: dia.hpp:160
const struct PadTag PadTag
global const PadTag instance
Definition: dia.hpp:83
DIANodePtr node_
Definition: dia.hpp:2028
const struct VolatileKeyFlag< true > VolatileKeyTag
global const VolatileKeyFlag instance
Definition: dia.hpp:48
STL namespace.
const char * label() const
Returns label_.
Definition: dia.hpp:288
void AssertValid() const
Assert that the DIA is valid.
Definition: dia.hpp:178
DIA(DIANodePtr &&node, const Stack &stack, size_t dia_id, const char *label)
Constructor of a new DIA supporting move semantics of nodes.
Definition: dia.hpp:212
Context & context() const
Return context_ of DIANode, e.g. for creating new LOps and DOps.
Definition: dia.hpp:273
tag structure for ReduceToIndex()
Definition: dia.hpp:54
const struct SkipPreReducePhaseTag SkipPreReducePhaseTag
global const SkipPreReducePhaseTag instance
Definition: dia.hpp:59
#define die(msg)
Instead of std::terminate(), throw the output the message via an exception.
Definition: die.hpp:22
auto FlatMap(const FlatmapFunction &flatmap_function) const
Each item of a DIA is expanded by the flatmap_function : to zero or more items of different type...
Definition: dia.hpp:458
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
Definition: context.hpp:221
const DIA & Dispose() const
Dispose of the DIANode&#39;s data.
Definition: dia.hpp:295
DIA(const DIANodePtr &node, const Stack &stack, size_t dia_id, const char *label)
Constructor of a new DIA with a pointer to a DIANode and a function chain from the DIANode to this DI...
Definition: dia.hpp:194
const DIANodePtr & node() const
Returns a pointer to the according DIANode.
Definition: dia.hpp:255
Specialized template class for ActionFuture which return void.
tag structure for Zip()
Definition: dia.hpp:78
A FunctionStack is a chain of functor that can be folded to a single functor (which is usually optimi...
size_t next_dia_id()
deliver next DIA serial id
Definition: dia.hpp:2041
const struct VolatileKeyFlag< false > NoVolatileKeyTag
global const VolatileKeyFlag instance
Definition: dia.hpp:51
const Stack & stack() const
Returns the stored function chain.
Definition: dia.hpp:267
Context & ctx() const
Return context_ of DIANode, e.g. for creating new LOps and DOps.
Definition: dia.hpp:279
tag structure for ReduceByKey(), and ReduceToIndex()
Definition: dia.hpp:42
auto Merge(const Comparator &comparator, const FirstDIA &first_dia, const DIAs &... dias)
Merge is a DOp, which merges any number of sorted DIAs to a single sorted DIA.
Definition: merge.hpp:674
auto Zip(const ZipFunction &zip_function, const DIA< FirstDIAType, FirstDIAStack > &first_dia, const DIAs &... dias)
Zips two DIAs of equal size in style of functional programming by applying zip_function to the i-th e...
Definition: zip.hpp:426
auto Filter(const FilterFunction &filter_function) const
Each item of a DIA is tested using filter_function : to determine whether it is copied into the outp...
Definition: dia.hpp:405
const DIA & Execute() const
Execute DIA&#39;s scope and parents such that this (Action)Node is Executed.
Definition: dia.hpp:335
int value
Definition: gen_data.py:41
tag structure for Zip()
Definition: dia.hpp:70
common::FunctionTraits< Function > FunctionTraits
alias for convenience.
Definition: dia.hpp:147
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
const DIA & KeepForever() const
Mark the referenced DIANode for keeping forever, which makes children not consume the data when execu...
Definition: dia.hpp:325
Stack stack_
Definition: dia.hpp:2032
The return type class for all ActionFutures.
Definition: action_node.hpp:83
const struct LocationDetectionFlag< false > NoLocationDetectionTag
global const LocationDetectionFlag instance
Definition: dia.hpp:125
auto Map(const MapFunction &map_function) const
Map applies map_function : to each item of a DIA and delivers a new DIA contains the returned values...
Definition: dia.hpp:358
tag structure for Zip()
Definition: dia.hpp:86
const struct DuplicateDetectionFlag< true > DuplicateDetectionTag
global const DuplicateDetectionFlag instance
Definition: dia.hpp:109
const struct DuplicateDetectionFlag< false > NoDuplicateDetectionTag
global const DuplicateDetectionFlag instance
Definition: dia.hpp:112
static const bool value
Definition: dia.hpp:44
tag structure for ReduceByKey()
Definition: dia.hpp:103
auto Concat(const FirstDIA &first_dia, const DIAs &... dias)
Concat is a DOp, which concatenates any number of DIAs to a single DIA.
Definition: concat.hpp:331
tag structure for Read()
Definition: dia.hpp:94
size_t node_refcount() const
Returns the number of references to the according DIANode.
Definition: dia.hpp:261
DIA(DIANodePtr &&node)
Constructor of a new DIA with a real backing DIABase.
Definition: dia.hpp:223
tag structure for Window() and FlatWindow()
Definition: dia.hpp:62
tag structure for GroupByKey(), and InnerJoin()
Definition: dia.hpp:116
const struct CutTag CutTag
global const CutTag instance
Definition: dia.hpp:75
const DIA & Keep(size_t increase=1) const
Mark the referenced DIANode for keeping, which makes children not consume the data when executing...
Definition: dia.hpp:310
const struct NoRebalanceTag NoRebalanceTag
global const NoRebalanceTag instance
Definition: dia.hpp:91
const struct DisjointTag DisjointTag
global const DisjointTag instance
Definition: dia.hpp:67
static constexpr size_t kNeverConsume
Never full consume.
Definition: dia_base.hpp:324