Thrill  0.1
random_bipartition_shuffle.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/algorithm/random_bipartition_shuffle.hpp
3  *
4  * Part of tlx - http://panthema.net/tlx
5  *
6  * Copyright (C) 2018 Manuel Penschuck <[email protected]>
7  *
8  * All rights reserved. Published under the Boost Software License, Version 1.0
9  ******************************************************************************/
10 
11 #ifndef TLX_ALGORITHM_RANDOM_BIPARTITION_SHUFFLE_HEADER
12 #define TLX_ALGORITHM_RANDOM_BIPARTITION_SHUFFLE_HEADER
13 
14 #include <cassert>
15 #include <iterator>
16 #include <random>
17 
18 namespace tlx {
19 
20 //! \addtogroup tlx_algorithm
21 //! \{
22 
23 /*!
24  * Similar to std::shuffle, but only generates a random bi-partition.
25  * In the result, the
26  * the left partition class is stored at positions 0 to size_left_partition-1
27  * the right partition class is stored at positions size_left_partition to n
28  * where n = std::distance(begin, end).
29  *
30  * Each element is moved to the left partition with probability
31  * (size_left_partition / n). There are no guarantees on the order WITHIN a
32  * partition (which makes this function considerable faster than std::shuffle
33  * especially if partition sizes are unbalanced). The runtime complexity is
34  * linear in the size of the smaller partition class.
35  *
36  * \param begin Iterator to the begin of the data frame
37  * \param end Iterator to the end of the data frame
38  * \param size_left_partition Number of elements to be put into the left
39  * partition: 0 <= size_left_partition <= n
40  * \param urng Random number generator compatible with STL
41  * interface, e.g. std::mt19937
42  */
43 template <typename RandomAccessIt, typename RandomBits>
44 void random_bipartition_shuffle(RandomAccessIt begin, RandomAccessIt end,
45  size_t size_left_partition, RandomBits& urng) {
46  const size_t size = std::distance(begin, end);
47  assert(size_left_partition <= size);
48 
49  // ensure that both paritions are non-empty
50  const size_t size_right_partition = size - size_left_partition;
51  if (!size_left_partition || !size_right_partition) return;
52 
53  // this idea is borrow from GNU stdlibc++ and generates two random
54  // variates uniform on [0, a) and [0, b) respectively.
55  auto two_random_variates =
56  [&urng](size_t a, size_t b) {
57  auto x = std::uniform_int_distribution<size_t>{ 0, (a * b) - 1 } (urng);
58  return std::make_pair(x / b, x % b);
59  };
60 
61  using std::swap; // allow ADL
62 
63  // Perform a Fisher-Yates shuffle, but iterate only over the positions
64  // of the smaller partition.
65  if (size_left_partition < size_right_partition) {
66  auto it = begin;
67 
68  // To avoid wasted random bits, we draw two variates at once
69  // and shuffle two positions per iteration
70  for (size_t i = 1; i < size_left_partition; i += 2) {
71  auto rand = two_random_variates(size - i + 1, size - i);
72  swap(*it++, *std::next(begin, size - 1 - rand.first));
73  swap(*it++, *std::next(begin, size - 1 - rand.second));
74  }
75 
76  // In case the partition contains an odd number of elements,
77  // there's a special case for the last element
78  if (size_left_partition % 2) {
79  auto x = std::uniform_int_distribution<size_t>{ size_left_partition - 1, size - 1 } (urng);
80  swap(*it, *std::next(begin, x));
81  }
82  }
83  else {
84  // Symmetric case to above, this time shuffling the right partition
85  auto it = std::next(begin, size - 1);
86 
87  for (size_t i = size - 2; i >= size_left_partition; i -= 2) {
88  auto rand = two_random_variates(i + 2, i + 1);
89  swap(*it--, *std::next(begin, rand.first));
90  swap(*it--, *std::next(begin, rand.second));
91  }
92 
93  if (size_right_partition % 2) {
94  auto x = std::uniform_int_distribution<size_t>{ 0, size_left_partition } (urng);
95  swap(*it--, *std::next(begin, x));
96  }
97  }
98 }
99 
100 //! \}
101 
102 } // namespace tlx
103 
104 #endif // !TLX_ALGORITHM_RANDOM_BIPARTITION_SHUFFLE_HEADER
105 
106 /******************************************************************************/
void random_bipartition_shuffle(RandomAccessIt begin, RandomAccessIt end, size_t size_left_partition, RandomBits &urng)
Similar to std::shuffle, but only generates a random bi-partition.
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
list x
Definition: gen_data.py:39