Thrill  0.1
golomb_bit_stream.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/core/golomb_bit_stream.hpp
3  *
4  * Encode bit stream using Golomb code into Block Reader/Writers via
5  * BitStreamWriter/BitStreamReader.
6  *
7  * Part of Project Thrill - http://project-thrill.org
8  *
9  * Copyright (C) 2017 Timo Bingmann <[email protected]>
10  *
11  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
12  ******************************************************************************/
13 
14 #pragma once
15 #ifndef THRILL_CORE_GOLOMB_BIT_STREAM_HEADER
16 #define THRILL_CORE_GOLOMB_BIT_STREAM_HEADER
17 
19 
21 
22 namespace thrill {
23 namespace core {
24 
25 /******************************************************************************/
26 // GolombBitStreamWriter
27 
28 template <typename BlockWriter>
29 class GolombBitStreamWriter : public BitStreamWriter<BlockWriter>
30 {
31 private:
33 
34  using Super::buffer_bits_;
35 
36  enum : size_t { all_set = ~((size_t)0) };
37 
38 public:
39  GolombBitStreamWriter(BlockWriter& block_writer, const size_t& b)
40  : Super(block_writer),
41  b_(b),
42  log2b_(tlx::integer_log2_ceil(b_)), // helper var for Golomb in
43  max_little_value_((((size_t)1) << log2b_) - b_) {
44  die_unless(block_writer.block_size() % sizeof(size_t) == 0);
45  }
46 
47  //! non-copyable: delete copy-constructor
49  //! non-copyable: delete assignment operator
51  //! move-constructor: default
53  //! move-assignment operator: default
55 
57  if (Super::pos_ != 0) {
58  // fill currently remaining buffer item with ones. the decoder will
59  // detect that no zero follows these ones.
60  unsigned bits = buffer_bits_ - Super::pos_;
61  Super::PutBits(all_set >> (buffer_bits_ - bits), bits);
62  assert(Super::pos_ == 0);
63  }
64  }
65 
66  /*!
67  * Append new Golomb-encoded value to bitset
68  */
69  void PutGolomb(const size_t& value) {
70 
72  // First value can be very large. It is therefore not encoded.
73  Super::block_writer_.PutRaw(value);
74  first_call_ = false;
75  return;
76  }
77 
78  // golomb_enc(value) =
79  // unary encoding of (value / b),0,binary encoding of (value % b)
80 
81  size_t q = value / b_;
82  size_t r = value - (q * b_);
83 
84  // d049672: PutGolomb() might fail on pathological sequences that push
85  // the Golomb code to its maximum size. In these cases, it is possible
86  // that the unary sequence of 1s is greater than buffer_bits_, which is
87  // not supported by the original implementation! See
88  // microbench_golomb_coding_pathological_case.cpp for an example.
89  while (q >= buffer_bits_) {
90  q -= buffer_bits_;
92  }
93 
94  if (TLX_UNLIKELY(q + 1 + log2b_ > buffer_bits_)) {
95  // When we need more than buffer_bits_ bits to encode a value, q
96  // and r have to be inserted separately, as PutBits can only
97  // handle up to buffer_bits_ bits at once
98  assert(q + 1 <= buffer_bits_);
99  size_t res = (all_set >> (buffer_bits_ - q - 1)) - 1;
100  Super::PutBits(res, q + 1);
101  if (r >= max_little_value_) {
103  }
104  else {
105  Super::PutBits(r, log2b_ - 1);
106  }
107  }
108  else {
109  // default case
110  size_t res = (all_set >> (buffer_bits_ - q - 1)) - 1;
111 
112  if (r >= max_little_value_) {
113  res = (res << log2b_) | (r + max_little_value_);
114  Super::PutBits(res, q + 1 + log2b_);
115  }
116  else {
117  res = (res << (log2b_ - 1)) | r;
118  Super::PutBits(res, q + 1 + log2b_ - 1);
119  }
120  }
121  }
122 
123  void Put(size_t value) {
124  PutGolomb(value);
125  }
126 
127 private:
128  //! Golomb code parameter
129  size_t b_;
130 
131  //! ceil(log2(b_))
132  int log2b_;
133 
134  //! escape value
136 
137  //! false, when PutGolomb_in was called already
138  bool first_call_ = true;
139 };
140 
141 /******************************************************************************/
142 // GolombBitStreamReader
143 
144 template <typename BlockReader>
145 class GolombBitStreamReader : public BitStreamReader<BlockReader>
146 {
147 private:
149 
150  using Super::buffer_bits_;
151 
152 public:
153  GolombBitStreamReader(BlockReader& block_reader, const size_t& b)
154  : Super(block_reader),
155  b_(b),
156  log2b_(tlx::integer_log2_ceil(b_)), // helper var for Golomb in
157  max_little_value_((((size_t)1) << log2b_) - b_)
158  { }
159 
160  //! non-copyable: delete copy-constructor
162  //! non-copyable: delete assignment operator
164  //! move-constructor: default
166  //! move-assignment operator: default
168 
169  bool HasNext() {
171  return Super::block_reader_.HasNext();
172 
173  return Super::HasNextZeroTest();
174  }
175 
176  size_t GetGolomb() {
177  if (TLX_UNLIKELY(first_call_)) {
178  first_call_ = false;
179  return Super::block_reader_.template GetRaw<size_t>();
180  }
181 
182  size_t q = Super::GetNumberOfOnesUntilNextZero();
183  size_t r = Super::GetBits(log2b_ - 1);
184 
185  if (r >= max_little_value_)
186  r = ((r << 1) + Super::GetBits(1)) - max_little_value_;
187 
188  return (q * b_) + r;
189  }
190 
191  template <typename Type2>
192  size_t Next() {
193  static_assert(
194  std::is_same<size_t, Type2>::value, "Invalid Next() call");
195  return GetGolomb();
196  }
197 
198 private:
199  //! Golomb code parameter
200  size_t b_;
201 
202  //! ceil(log2(b_))
203  int log2b_;
204 
205  //! escape value
207 
208  //! false, when PutGolomb_in was called already
209  bool first_call_ = true;
210 };
211 
212 /******************************************************************************/
213 
214 } // namespace core
215 } // namespace thrill
216 
217 #endif // !THRILL_CORE_GOLOMB_BIT_STREAM_HEADER
218 
219 /******************************************************************************/
#define die_unless(X)
Definition: die.hpp:27
BlockWriter & block_writer_
Output BlockWriter.
Definition: bit_stream.hpp:104
#define TLX_UNLIKELY(c)
Definition: likely.hpp:24
GolombBitStreamWriter(BlockWriter &block_writer, const size_t &b)
size_t b_
Golomb code parameter.
GolombBitStreamReader(BlockReader &block_reader, const size_t &b)
int value
Definition: gen_data.py:41
void PutBits(const size_t &value, unsigned bits)
Append k bits to the data array.
Definition: bit_stream.hpp:61
static unsigned integer_log2_ceil(int i)
calculate the log2 floor of an integer type
size_t pos_
currently filled number of bits
Definition: bit_stream.hpp:110
GolombBitStreamWriter & operator=(const GolombBitStreamWriter &)=delete
non-copyable: delete assignment operator
bool first_call_
false, when PutGolomb_in was called already
size_t b_
Golomb code parameter.
void PutGolomb(const size_t &value)
Append new Golomb-encoded value to bitset.