docs/master/reduce__probing__hash__table_8hpp_source.html

 /*******************************************************************************
  * thrill/core/reduce_probing_hash_table.hpp
  *
  * Part of Project Thrill - http://project-thrill.org
  *
  * Copyright (C) 2015 Matthias Stumpp <[email protected]>
  * Copyright (C) 2016 Timo Bingmann <[email protected]>
  * Copyright (C) 2017 Tim Zeitz <[email protected]>
  *
  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
  ******************************************************************************/

 #pragma once
 #ifndef THRILL_CORE_REDUCE_PROBING_HASH_TABLE_HEADER
 #define THRILL_CORE_REDUCE_PROBING_HASH_TABLE_HEADER

 #include <thrill/core/reduce_functional.hpp>
 #include <thrill/core/reduce_table.hpp>

 #include <algorithm>
 #include <functional>
 #include <limits>
 #include <utility>
 #include <vector>

 namespace thrill {
 namespace core {

 /*!
  * A data structure which takes an arbitrary value and extracts a key using a
  * key extractor function from that value. A key may also be provided initially
  * as part of a key/value pair, not requiring to extract a key.
  *
  * Afterwards, the key is hashed and the hash is used to assign that key/value
  * pair to some slot.
  *
  * In case a slot already has a key/value pair and the key of that value and the
  * key of the value to be inserted are them same, the values are reduced
  * according to some reduce function. No key/value is added to the data
  * structure.
  *
  * If the keys are different, the next slot (moving to the right) is considered.
  * If the slot is occupied, the same procedure happens again (know as linear
  * probing.)
  *
  * Finally, the key/value pair to be inserted may either:
  *
  * 1.) Be reduced with some other key/value pair, sharing the same key.
  * 2.) Inserted at a free slot.
  * 3.) Trigger a resize of the data structure in case there are no more free
  *     slots in the data structure.
  *
  * The following illustrations shows the general structure of the data
  * structure.  The set of slots is divided into 1..n partitions. Each key is
  * hashed into exactly one partition.
  *
  *
  *     Partition 0 Partition 1 Partition 2 Partition 3 Partition 4
  *     P00 P01 P02 P10 P11 P12 P20 P21 P22 P30 P31 P32 P40 P41 P42
  *    +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
  *    ||  |   |   ||  |   |   ||  |   |   ||  |   |   ||  |   |  ||
  *    +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
  *                <-   LI  ->
  *                     LI..Local Index
  *    <-        GI         ->
  *              GI..Global Index
  *         PI 0        PI 1        PI 2        PI 3        PI 4
  *         PI..Partition ID
  *
  */
 template <typename TableItem, typename Key, typename Value,
           typename KeyExtractor, typename ReduceFunction, typename Emitter,
           const bool VolatileKey,
           typename ReduceConfig_,
           typename IndexFunction,
           typename KeyEqualFunction = std::equal_to<Key> >
 class ReduceProbingHashTable
     : public ReduceTable<TableItem, Key, Value,
                          KeyExtractor, ReduceFunction, Emitter,
                          VolatileKey, ReduceConfig_,
                          IndexFunction, KeyEqualFunction>
 {
     using Super = ReduceTable<TableItem, Key, Value,
                               KeyExtractor, ReduceFunction, Emitter,
                               VolatileKey, ReduceConfig_, IndexFunction,
                               KeyEqualFunction>;
     using Super::debug;
     static constexpr bool debug_items = false;

 public:
     using ReduceConfig = ReduceConfig_;

     ReduceProbingHashTable(
         Context& ctx, size_t dia_id,
         const KeyExtractor& key_extractor,
         const ReduceFunction& reduce_function,
         Emitter& emitter,
         size_t num_partitions,
         const ReduceConfig& config = ReduceConfig(),
         bool immediate_flush = false,
         const IndexFunction& index_function = IndexFunction(),
         const KeyEqualFunction& key_equal_function = KeyEqualFunction())
         : Super(ctx, dia_id,
                 key_extractor, reduce_function, emitter,
                 num_partitions, config, immediate_flush,
                 index_function, key_equal_function)
     { assert(num_partitions > 0); }

     //! Construct the hash table itself. fill it with sentinels. have one extra
     //! cell beyond the end for reducing the sentinel itself.
     void Initialize(size_t limit_memory_bytes) {
         assert(!items_);

         limit_memory_bytes_ = limit_memory_bytes;

         // calculate num_buckets_per_partition_ from the memory limit and the
         // number of partitions required, initialize partition_size_ array.

         assert(limit_memory_bytes_ >= 0 &&
                "limit_memory_bytes must be greater than or equal to 0. "
                "A byte size of zero results in exactly one item per partition");

         num_buckets_per_partition_ = std::max<size_t>(
             1,
             (size_t)(static_cast<double>(limit_memory_bytes_)
                      / static_cast<double>(sizeof(TableItem))
                      / static_cast<double>(num_partitions_)));

         num_buckets_ = num_buckets_per_partition_ * num_partitions_;

         assert(num_buckets_per_partition_ > 0);
         assert(num_buckets_ > 0);

         partition_size_.resize(
             num_partitions_,
             std::min(size_t(config_.initial_items_per_partition_),
                      num_buckets_per_partition_));

         // calculate limit on the number of items in a partition before these
         // are spilled to disk or flushed to network.

         double limit_fill_rate = config_.limit_partition_fill_rate();

         assert(limit_fill_rate >= 0.0 && limit_fill_rate <= 1.0
                && "limit_partition_fill_rate must be between 0.0 and 1.0. "
                "with a fill rate of 0.0, items are immediately flushed.");

         limit_items_per_partition_.resize(
             num_partitions_,
             static_cast<size_t>(
                 static_cast<double>(partition_size_[0]) * limit_fill_rate));

         assert(limit_items_per_partition_[0] >= 0);

         // actually allocate the table and initialize the valid ranges, the + 1
         // is for the sentinel's slot.

         items_ = static_cast<TableItem*>(
             operator new ((num_buckets_ + 1) * sizeof(TableItem)));

         for (size_t id = 0; id < num_partitions_; ++id) {
             TableItem* iter = items_ + id * num_buckets_per_partition_;
             TableItem* pend = iter + partition_size_[id];

             for ( ; iter != pend; ++iter)
                 new (iter)TableItem();
         }
     }

     ~ReduceProbingHashTable() {
         if (items_) Dispose();
     }

     /*!
      * Inserts a value into the table, potentially reducing it in case both the
      * key of the value already in the table and the key of the value to be
      * inserted are the same.
      *
      * An insert may trigger a partial flush of the partition with the most
      * items if the maximal number of items in the table (max_num_items_table)
      * is reached.
      *
      * Alternatively, it may trigger a resize of the table in case the maximal
      * fill ratio per partition is reached.
      *
      * \param kv Value to be inserted into the table.
      *
      * \return true if a new key was inserted to the table
      */
     bool Insert(const TableItem& kv) {

         typename IndexFunction::Result h = calculate_index(kv);
         assert(h.partition_id < num_partitions_);

         if (TLX_UNLIKELY(key_equal_function_(key(kv), Key()))) {
             // handle pairs with sentinel key specially by reducing into last
             // element of items.
             TableItem& sentinel = items_[num_buckets_];
             if (sentinel_partition_ == invalid_partition_) {
                 // first occurrence of sentinel key
                 new (&sentinel)TableItem(kv);
                 sentinel_partition_ = h.partition_id;
             }
             else {
                 sentinel = reduce(sentinel, kv);
                 return false;
             }
             ++items_per_partition_[h.partition_id];
             ++num_items_;

             while (TLX_UNLIKELY(
                        items_per_partition_[h.partition_id] >
                        limit_items_per_partition_[h.partition_id])) {
                 GrowAndRehash(h.partition_id);
             }

             return true;
         }

         // calculate local index depending on the current subtable's size
         size_t local_index = h.local_index(partition_size_[h.partition_id]);

         TableItem* pbegin = items_ + h.partition_id * num_buckets_per_partition_;
         TableItem* pend = pbegin + partition_size_[h.partition_id];

         TableItem* begin_iter = pbegin + local_index;
         TableItem* iter = begin_iter;

         while (!key_equal_function_(key(*iter), Key()))
         {
             if (key_equal_function_(key(*iter), key(kv)))
             {
                 *iter = reduce(*iter, kv);
                 return false;
             }

             ++iter;

             // wrap around if beyond the current partition
             if (TLX_UNLIKELY(iter == pend))
                 iter = pbegin;

             // flush partition and retry, if all slots are reserved
             if (TLX_UNLIKELY(iter == begin_iter)) {
                 GrowAndRehash(h.partition_id);
                 return Insert(kv);
             }
         }

         // insert new pair
         *iter = kv;

         // increase counter for partition
         ++items_per_partition_[h.partition_id];
         ++num_items_;

         while (TLX_UNLIKELY(
                    items_per_partition_[h.partition_id] >=
                    limit_items_per_partition_[h.partition_id])) {
             LOG << "Grow due to "
                 << items_per_partition_[h.partition_id] << " >= "
                 << limit_items_per_partition_[h.partition_id]
                 << " among " << partition_size_[h.partition_id];
             GrowAndRehash(h.partition_id);
         }

         return true;
     }

     //! Deallocate items and memory
     void Dispose() {
         if (!items_) return;

         // dispose the items by destructor

         for (size_t id = 0; id < num_partitions_; ++id) {
             TableItem* iter = items_ + id * num_buckets_per_partition_;
             TableItem* pend = iter + partition_size_[id];

             for ( ; iter != pend; ++iter)
                 iter->~TableItem();
         }

         if (sentinel_partition_ != invalid_partition_)
             items_[num_buckets_].~TableItem();

         operator delete (items_);
         items_ = nullptr;

         Super::Dispose();
     }

     void GrowAndRehash(size_t partition_id) {

         size_t old_size = partition_size_[partition_id];
         GrowPartition(partition_id);
         if (partition_size_[partition_id] == old_size) {
             SpillPartition(partition_id);
             return;
         }

         if (partition_size_[partition_id] % old_size != 0) {
             // in place rehashing won't work properly so we spill rather than
             // potentially blasting memory limits by using an extra vector for
             // temporary item storage
             SpillPartition(partition_id);
             return;
         }

         // initialize pointers to old range - the second half is still empty
         TableItem* pbegin =
             items_ + partition_id * num_buckets_per_partition_;
         TableItem* iter = pbegin;
         TableItem* pend = pbegin + old_size;

         bool passed_first_half = false;
         bool found_hole = false;
         while (!passed_first_half || !found_hole) {
             Key item_key = key(*iter);
             bool is_empty = key_equal_function_(item_key, Key());
             if (!is_empty) {
                 --items_per_partition_[partition_id];
                 --num_items_;
                 TableItem item = std::move(*iter);
                 new (iter)TableItem();
                 Insert(item);
             }

             iter++;
             found_hole = passed_first_half && is_empty;
             passed_first_half = passed_first_half || iter == pend;
         }
     }

     //! Grow a partition after a spill or flush (if possible)
     void GrowPartition(size_t partition_id) {

         if (TLX_UNLIKELY(mem::memory_exceeded)) {
             SpillPartition(partition_id);
             return;
         }

         if (partition_size_[partition_id] == num_buckets_per_partition_)
             return;

         size_t new_size = std::min(
             num_buckets_per_partition_, 2 * partition_size_[partition_id]);

         sLOG << "Growing partition" << partition_id
              << "from" << partition_size_[partition_id] << "to" << new_size
              << "limit_items" << new_size * config_.limit_partition_fill_rate();

         // initialize new items

         TableItem* pbegin =
             items_ + partition_id * num_buckets_per_partition_;
         TableItem* iter = pbegin + partition_size_[partition_id];
         TableItem* pend = pbegin + new_size;

         for ( ; iter != pend; ++iter)
             new (iter)TableItem();

         partition_size_[partition_id] = new_size;
         limit_items_per_partition_[partition_id]
             = new_size * config_.limit_partition_fill_rate();
     }

     //! \name Spilling Mechanisms to External Memory Files
     //! \{

     //! Spill all items of a partition into an external memory File.
     void SpillPartition(size_t partition_id) {

         if (immediate_flush_) {
             return FlushPartition(
                 partition_id, /* consume */ true, /* grow */ !mem::memory_exceeded);
         }

         LOG << "Spilling " << items_per_partition_[partition_id]
             << " items of partition with id: " << partition_id;

         if (items_per_partition_[partition_id] == 0)
             return;

         data::File::Writer writer = partition_files_[partition_id].GetWriter();

         if (sentinel_partition_ == partition_id) {
             writer.Put(items_[num_buckets_]);
             items_[num_buckets_].~TableItem();
             sentinel_partition_ = invalid_partition_;
         }

         TableItem* iter = items_ + partition_id * num_buckets_per_partition_;
         TableItem* pend = iter + partition_size_[partition_id];

         for ( ; iter != pend; ++iter) {
             if (!key_equal_function_(key(*iter), Key())) {
                 writer.Put(*iter);
                 *iter = TableItem();
             }
         }

         // reset partition specific counter
         num_items_ -= items_per_partition_[partition_id];
         items_per_partition_[partition_id] = 0;
         assert(num_items_ == this->num_items_calc());

         LOG << "Spilled items of partition with id: " << partition_id;
     }

     //! Spill all items of an arbitrary partition into an external memory File.
     void SpillAnyPartition() {
         // maybe make a policy later -tb
         return SpillLargestPartition();
     }

     //! Spill all items of the largest partition into an external memory File.
     void SpillLargestPartition() {
         // get partition with max size
         size_t size_max = 0, index = 0;

         for (size_t i = 0; i < num_partitions_; ++i)
         {
             if (items_per_partition_[i] > size_max)
             {
                 size_max = items_per_partition_[i];
                 index = i;
             }
         }

         if (size_max == 0) {
             return;
         }

         return SpillPartition(index);
     }

     //! \}

     //! \name Flushing Mechanisms to Next Stage or Phase
     //! \{

     template <typename Emit>
     void FlushPartitionEmit(
         size_t partition_id, bool consume, bool grow, Emit emit) {

         LOG << "Flushing " << items_per_partition_[partition_id]
             << " items of partition: " << partition_id;

         if (sentinel_partition_ == partition_id) {
             emit(partition_id, items_[num_buckets_]);
             if (consume) {
                 items_[num_buckets_].~TableItem();
                 sentinel_partition_ = invalid_partition_;
             }
         }

         TableItem* iter = items_ + partition_id * num_buckets_per_partition_;
         TableItem* pend = iter + partition_size_[partition_id];

         for ( ; iter != pend; ++iter)
         {
             if (!key_equal_function_(key(*iter), Key())) {
                 emit(partition_id, *iter);

                 if (consume)
                     *iter = TableItem();
             }
         }

         if (consume) {
             // reset partition specific counter
             num_items_ -= items_per_partition_[partition_id];
             items_per_partition_[partition_id] = 0;
             assert(num_items_ == this->num_items_calc());
         }

         LOG << "Done flushed items of partition: " << partition_id;

         if (grow)
             GrowPartition(partition_id);
     }

     void FlushPartition(size_t partition_id, bool consume, bool grow) {
         FlushPartitionEmit(
             partition_id, consume, grow,
             [this](const size_t& partition_id, const TableItem& p) {
                 this->emitter_.Emit(partition_id, p);
             });
     }

     void FlushAll() {
         for (size_t i = 0; i < num_partitions_; ++i) {
             FlushPartition(i, /* consume */ true, /* grow */ false);
         }
     }

     //! \}

 public:
     using Super::calculate_index;

 private:
     using Super::config_;
     using Super::immediate_flush_;
     using Super::index_function_;
     using Super::items_per_partition_;
     using Super::key;
     using Super::key_equal_function_;
     using Super::limit_memory_bytes_;
     using Super::num_buckets_;
     using Super::num_buckets_per_partition_;
     using Super::num_items_;
     using Super::num_partitions_;
     using Super::partition_files_;
     using Super::reduce;

     //! Storing the actual hash table.
     TableItem* items_ = nullptr;

     //! Current sizes of the partitions because the valid allocated areas grow
     std::vector<size_t> partition_size_;

     //! Current limits on the number of items in a partitions, different for
     //! different partitions, because the valid allocated areas grow.
     std::vector<size_t> limit_items_per_partition_;

     //! sentinel for invalid partition or no sentinel.
     static constexpr size_t invalid_partition_ = size_t(-1);

     //! store the partition id of the sentinel key. implicitly this also stored
     //! whether the sentinel key was found and reduced into
     //! items_[num_buckets_].
     size_t sentinel_partition_ = invalid_partition_;
 };

 template <typename TableItem, typename Key, typename Value,
           typename KeyExtractor, typename ReduceFunction,
           typename Emitter, const bool VolatileKey,
           typename ReduceConfig, typename IndexFunction,
           typename KeyEqualFunction>
 class ReduceTableSelect<
         ReduceTableImpl::PROBING,
         TableItem, Key, Value, KeyExtractor, ReduceFunction,
         Emitter, VolatileKey, ReduceConfig, IndexFunction, KeyEqualFunction>
 {
 public:
     using type = ReduceProbingHashTable<
         TableItem, Key, Value, KeyExtractor, ReduceFunction,
         Emitter, VolatileKey, ReduceConfig,
         IndexFunction, KeyEqualFunction>;
 };

 } // namespace core
 } // namespace thrill

 #endif // !THRILL_CORE_REDUCE_PROBING_HASH_TABLE_HEADER

 /******************************************************************************/
thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::config_
ReduceConfig config_
config of reduce table
Definition: reduce_table.hpp:306

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::num_buckets_
size_t num_buckets_
Definition: reduce_table.hpp:310

thrill::core::ReduceProbingHashTable::~ReduceProbingHashTable
~ReduceProbingHashTable()
Definition: reduce_probing_hash_table.hpp:170

sLOG
#define sLOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:34

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::Dispose
void Dispose()
Deallocate memory.
Definition: reduce_table.hpp:152

thrill::core::ReduceProbingHashTable::ReduceProbingHashTable
ReduceProbingHashTable(Context &ctx, size_t dia_id, const KeyExtractor &key_extractor, const ReduceFunction &reduce_function, Emitter &emitter, size_t num_partitions, const ReduceConfig &config=ReduceConfig(), bool immediate_flush=false, const IndexFunction &index_function=IndexFunction(), const KeyEqualFunction &key_equal_function=KeyEqualFunction())
Definition: reduce_probing_hash_table.hpp:93

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::emitter_
Emitter & emitter_
Emitter object to receive items outputted to next phase.
Definition: reduce_table.hpp:288

thrill::core::ReduceProbingHashTable::sentinel_partition_
size_t sentinel_partition_
Definition: reduce_probing_hash_table.hpp:534

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::index_function_
IndexFunction index_function_
Index Calculation functions: Hash or ByIndex.
Definition: reduce_table.hpp:291

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::num_partitions
size_t num_partitions()
Returns the number of partitions.
Definition: reduce_table.hpp:199

thrill::core::ReduceProbingHashTable::limit_items_per_partition_
std::vector< size_t > limit_items_per_partition_
Definition: reduce_probing_hash_table.hpp:526

thrill::core::ReduceTableSelect
Type selection via ReduceTableImpl enum.
Definition: reduce_table.hpp:348

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::num_partitions_
const size_t num_partitions_
Number of partitions.
Definition: reduce_table.hpp:303

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::calculate_index
IndexFunction::Result calculate_index(const TableItem &kv) const
Definition: reduce_table.hpp:267

thrill::core::ReduceProbingHashTable::SpillAnyPartition
void SpillAnyPartition()
Spill all items of an arbitrary partition into an external memory File.
Definition: reduce_probing_hash_table.hpp:412

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::key_equal_function
const KeyEqualFunction & key_equal_function() const
Returns key_equal_function_.
Definition: reduce_table.hpp:192

thrill::core::ReduceProbingHashTable::FlushPartitionEmit
void FlushPartitionEmit(size_t partition_id, bool consume, bool grow, Emit emit)
Definition: reduce_probing_hash_table.hpp:444

thrill::core::ReduceProbingHashTable::ReduceConfig
ReduceConfig_ ReduceConfig
Definition: reduce_probing_hash_table.hpp:91

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::items_per_partition_
std::vector< size_t > items_per_partition_
Current number of items per partition.
Definition: reduce_table.hpp:334

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::dia_id
size_t dia_id() const
Returns dia_id_.
Definition: reduce_table.hpp:174

thrill::core::ReduceProbingHashTable
A data structure which takes an arbitrary value and extracts a key using a key extractor function fro...
Definition: reduce_probing_hash_table.hpp:77

thrill::data::BlockWriter
BlockWriter contains a temporary Block object into which a) any serializable item can be stored or b)...
Definition: block_writer.hpp:53

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::limit_memory_bytes
size_t limit_memory_bytes() const
Returns limit_memory_bytes_.
Definition: reduce_table.hpp:209

TLX_UNLIKELY
#define TLX_UNLIKELY(c)
Definition: likely.hpp:24

thrill::mem::memory_exceeded
bool memory_exceeded
memory limit exceeded indicator
Definition: malloc_tracker.cpp:172

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::index_function
const IndexFunction & index_function() const
Returns index_function_.
Definition: reduce_table.hpp:186

thrill::core::ReduceProbingHashTable::invalid_partition_
static constexpr size_t invalid_partition_
sentinel for invalid partition or no sentinel.
Definition: reduce_probing_hash_table.hpp:529

thrill::api::Context
The Context of a job is a unique instance per worker which holds references to all underlying parts o...
Definition: context.hpp:221

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::emitter
const Emitter & emitter() const
Returns emitter_.
Definition: reduce_table.hpp:183

thrill::mem::sentinel
static constexpr size_t sentinel
a sentinel value prefixed to each allocation
Definition: malloc_tracker.cpp:99

thrill::core::ReduceProbingHashTable::debug_items
static constexpr bool debug_items
Definition: reduce_probing_hash_table.hpp:88

thrill::core::ReduceProbingHashTable::SpillPartition
void SpillPartition(size_t partition_id)
Spill all items of a partition into an external memory File.
Definition: reduce_probing_hash_table.hpp:372

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::key_equal_function_
KeyEqualFunction key_equal_function_
Comparator function for keys.
Definition: reduce_table.hpp:294

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::key_extractor
const KeyExtractor & key_extractor() const
Returns the key_extractor.
Definition: reduce_table.hpp:177

thrill::core::ReduceTableImpl
ReduceTableImpl
Enum class to select a hash table implementation.
Definition: reduce_table.hpp:31

thrill::core::ReduceProbingHashTable::Insert
bool Insert(const TableItem &kv)
Inserts a value into the table, potentially reducing it in case both the key of the value already in ...
Definition: reduce_probing_hash_table.hpp:190

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::partition_files_
std::vector< data::File > partition_files_
Store the files for partitions.
Definition: reduce_table.hpp:297

thrill::core::ReduceProbingHashTable::GrowAndRehash
void GrowAndRehash(size_t partition_id)
Definition: reduce_probing_hash_table.hpp:293

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::debug
static constexpr bool debug
Definition: reduce_table.hpp:105

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::num_buckets_per_partition_
size_t num_buckets_per_partition_
Partition size, the number of buckets per partition.
Definition: reduce_table.hpp:313

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::ctx
Context & ctx() const
Returns the context.
Definition: reduce_table.hpp:171

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::limit_memory_bytes_
size_t limit_memory_bytes_
Size of the table in bytes.
Definition: reduce_table.hpp:316

thrill::core::ReduceTableImpl::PROBING

thrill::core::ReduceProbingHashTable::items_
TableItem * items_
Storing the actual hash table.
Definition: reduce_probing_hash_table.hpp:519

thrill::core::ReduceProbingHashTable::partition_size_
std::vector< size_t > partition_size_
Current sizes of the partitions because the valid allocated areas grow.
Definition: reduce_probing_hash_table.hpp:522

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::TableItem
typename std::conditional< VolatileKey, std::pair< Key, Value >, Value >::type TableItem
Definition: reduce_table.hpp:110

min
static uint_pair min()
return an uint_pair instance containing the smallest value possible
Definition: uint_types.hpp:217

thrill::core::ReduceProbingHashTable::Initialize
void Initialize(size_t limit_memory_bytes)
Definition: reduce_probing_hash_table.hpp:111

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::reduce
TableItem reduce(const TableItem &a, const TableItem &b) const
Definition: reduce_table.hpp:263

thrill::data::BlockWriter::Put
TLX_ATTRIBUTE_ALWAYS_INLINE BlockWriter & Put(const T &x)
Put appends a complete item, or fails with a FullException.
Definition: block_writer.hpp:210

reduce_functional.hpp

thrill::core::ReduceProbingHashTable::FlushAll
void FlushAll()
Definition: reduce_probing_hash_table.hpp:492

thrill
Definition: action_node.hpp:21

thrill::core::ReduceProbingHashTable::SpillLargestPartition
void SpillLargestPartition()
Spill all items of the largest partition into an external memory File.
Definition: reduce_probing_hash_table.hpp:418

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::num_items_
size_t num_items_
Current number of items.
Definition: reduce_table.hpp:331

thrill::core::ReduceProbingHashTable::GrowPartition
void GrowPartition(size_t partition_id)
Grow a partition after a spill or flush (if possible)
Definition: reduce_probing_hash_table.hpp:336

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::key
Key key(const TableItem &t) const
Definition: reduce_table.hpp:259

thrill::core::ReduceProbingHashTable::Dispose
void Dispose()
Deallocate items and memory.
Definition: reduce_probing_hash_table.hpp:271

thrill::core::ReduceTable
Common super-class for bucket and linear-probing hash/reduce tables.
Definition: reduce_table.hpp:102

reduce_table.hpp

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::num_items_calc
size_t num_items_calc() const
Returns the total num of items in the table.
Definition: reduce_table.hpp:227

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::reduce_function
const ReduceFunction & reduce_function() const
Returns the reduce_function.
Definition: reduce_table.hpp:180

thrill::core::ReduceTable< TableItem, Key, Value, KeyExtractor, ReduceFunction, Emitter, VolatileKey, ReduceConfig_, IndexFunction, KeyEqualFunction >::immediate_flush_
bool immediate_flush_
Definition: reduce_table.hpp:323

LOG
#define LOG
Default logging method: output if the local debug variable is true.
Definition: logger.hpp:24

thrill::core::ReduceProbingHashTable::FlushPartition
void FlushPartition(size_t partition_id, bool consume, bool grow)
Definition: reduce_probing_hash_table.hpp:484