Thrill  0.1
item_serialization_tools.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * thrill/common/item_serialization_tools.hpp
3  *
4  * Abstract methods common to many serializer and deserializers: serialize
5  * Varint (7-bit encoding), and Strings by prefixing them with their length.
6  * Included by BlockWriter and BinaryBufferBuilder via CRTP.
7  *
8  * Part of Project Thrill - http://project-thrill.org
9  *
10  * Copyright (C) 2015 Timo Bingmann <[email protected]>
11  *
12  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
13  ******************************************************************************/
14 
15 #pragma once
16 #ifndef THRILL_COMMON_ITEM_SERIALIZATION_TOOLS_HEADER
17 #define THRILL_COMMON_ITEM_SERIALIZATION_TOOLS_HEADER
18 
19 #include <cstdint>
20 #include <stdexcept>
21 #include <string>
22 
23 namespace thrill {
24 namespace common {
25 
26 /*!
27  * CRTP class to enhance item/memory writer classes with Varint encoding and
28  * String encoding.
29  */
30 template <typename Writer>
32 {
33 public:
34  //! Append a varint to the writer.
35  Writer& PutVarint32(uint32_t v) {
36  Writer& w = *static_cast<Writer*>(this);
37 
38  if (v < 128) {
39  w.PutByte(uint8_t(v));
40  }
41  else if (v < 128 * 128) {
42  w.PutByte(uint8_t(((v >> 0) & 0x7F) | 0x80));
43  w.PutByte(uint8_t((v >> 7) & 0x7F));
44  }
45  else if (v < 128 * 128 * 128) {
46  w.PutByte(uint8_t(((v >> 0) & 0x7F) | 0x80));
47  w.PutByte(uint8_t(((v >> 7) & 0x7F) | 0x80));
48  w.PutByte(uint8_t((v >> 14) & 0x7F));
49  }
50  else if (v < 128 * 128 * 128 * 128) {
51  w.PutByte(uint8_t(((v >> 0) & 0x7F) | 0x80));
52  w.PutByte(uint8_t(((v >> 7) & 0x7F) | 0x80));
53  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
54  w.PutByte(uint8_t((v >> 21) & 0x7F));
55  }
56  else {
57  w.PutByte(uint8_t(((v >> 0) & 0x7F) | 0x80));
58  w.PutByte(uint8_t(((v >> 7) & 0x7F) | 0x80));
59  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
60  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
61  w.PutByte(uint8_t((v >> 28) & 0x7F));
62  }
63 
64  return w;
65  }
66 
67  //! Append a varint to the writer.
68  Writer& PutVarint(uint64_t v) {
69  Writer& w = *static_cast<Writer*>(this);
70 
71  if (v < 128) {
72  w.PutByte(uint8_t(v));
73  }
74  else if (v < 128 * 128) {
75  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
76  w.PutByte(uint8_t((v >> 07) & 0x7F));
77  }
78  else if (v < 128 * 128 * 128) {
79  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
80  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
81  w.PutByte(uint8_t((v >> 14) & 0x7F));
82  }
83  else if (v < 128 * 128 * 128 * 128) {
84  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
85  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
86  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
87  w.PutByte(uint8_t((v >> 21) & 0x7F));
88  }
89  else if (v < 128llu * 128 * 128 * 128 * 128) {
90  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
91  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
92  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
93  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
94  w.PutByte(uint8_t((v >> 28) & 0x7F));
95  }
96  else if (v < 128llu * 128 * 128 * 128 * 128 * 128) {
97  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
98  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
99  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
100  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
101  w.PutByte(uint8_t(((v >> 28) & 0x7F) | 0x80));
102  w.PutByte(uint8_t((v >> 35) & 0x7F));
103  }
104  else if (v < 128llu * 128 * 128 * 128 * 128 * 128 * 128) {
105  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
106  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
107  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
108  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
109  w.PutByte(uint8_t(((v >> 28) & 0x7F) | 0x80));
110  w.PutByte(uint8_t(((v >> 35) & 0x7F) | 0x80));
111  w.PutByte(uint8_t((v >> 42) & 0x7F));
112  }
113  else if (v < 128llu * 128 * 128 * 128 * 128 * 128 * 128 * 128) {
114  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
115  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
116  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
117  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
118  w.PutByte(uint8_t(((v >> 28) & 0x7F) | 0x80));
119  w.PutByte(uint8_t(((v >> 35) & 0x7F) | 0x80));
120  w.PutByte(uint8_t(((v >> 42) & 0x7F) | 0x80));
121  w.PutByte(uint8_t((v >> 49) & 0x7F));
122  }
123  else if (v < 128llu * 128 * 128 * 128 * 128 * 128 * 128 * 128 * 128) {
124  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
125  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
126  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
127  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
128  w.PutByte(uint8_t(((v >> 28) & 0x7F) | 0x80));
129  w.PutByte(uint8_t(((v >> 35) & 0x7F) | 0x80));
130  w.PutByte(uint8_t(((v >> 42) & 0x7F) | 0x80));
131  w.PutByte(uint8_t(((v >> 49) & 0x7F) | 0x80));
132  w.PutByte(uint8_t((v >> 56) & 0x7F));
133  }
134  else {
135  w.PutByte(uint8_t(((v >> 00) & 0x7F) | 0x80));
136  w.PutByte(uint8_t(((v >> 07) & 0x7F) | 0x80));
137  w.PutByte(uint8_t(((v >> 14) & 0x7F) | 0x80));
138  w.PutByte(uint8_t(((v >> 21) & 0x7F) | 0x80));
139  w.PutByte(uint8_t(((v >> 28) & 0x7F) | 0x80));
140  w.PutByte(uint8_t(((v >> 35) & 0x7F) | 0x80));
141  w.PutByte(uint8_t(((v >> 42) & 0x7F) | 0x80));
142  w.PutByte(uint8_t(((v >> 49) & 0x7F) | 0x80));
143  w.PutByte(uint8_t(((v >> 56) & 0x7F) | 0x80));
144  w.PutByte(uint8_t((v >> 63) & 0x7F));
145  }
146 
147  return w;
148  }
149 
150  //! Put a string by saving it's length followed by the data itself.
151  Writer& PutString(const char* data, size_t len) {
152  return PutVarint(len).Append(data, len);
153  }
154 
155  //! Put a string by saving it's length followed by the data itself.
156  Writer& PutString(const uint8_t* data, size_t len) {
157  return PutVarint(len).Append(data, len);
158  }
159 
160  //! Put a string by saving it's length followed by the data itself.
161  Writer& PutString(const std::string& str) {
162  return PutString(str.data(), str.size());
163  }
164 };
165 
166 /*!
167  * CRTP class to enhance item/memory reader classes with Varint decoding and
168  * String decoding.
169  */
170 template <typename Reader>
172 {
173 public:
174  //! Fetch a varint with up to 32-bit from the reader at the cursor.
175  uint32_t GetVarint32() {
176  Reader& r = *static_cast<Reader*>(this);
177 
178  uint32_t u, v = r.GetByte();
179  if (!(v & 0x80)) return v;
180  v &= 0x7F;
181  u = r.GetByte(), v |= (u & 0x7F) << 7;
182  if (!(u & 0x80)) return v;
183  u = r.GetByte(), v |= (u & 0x7F) << 14;
184  if (!(u & 0x80)) return v;
185  u = r.GetByte(), v |= (u & 0x7F) << 21;
186  if (!(u & 0x80)) return v;
187  u = r.GetByte();
188  if (u & 0xF0)
189  throw std::overflow_error("Overflow during varint decoding.");
190  v |= (u & 0x7F) << 28;
191  return v;
192  }
193 
194  //! Fetch a varint with up to 32-bit from the reader at the cursor.
195  uint32_t PeekVarint32() const {
196  const Reader& r = *static_cast<const Reader*>(this);
197 
198  uint32_t u, v = r.PeekByte(0);
199  if (!(v & 0x80)) return v;
200  v &= 0x7F;
201  u = r.PeekByte(1), v |= (u & 0x7F) << 7;
202  if (!(u & 0x80)) return v;
203  u = r.PeekByte(2), v |= (u & 0x7F) << 14;
204  if (!(u & 0x80)) return v;
205  u = r.PeekByte(3), v |= (u & 0x7F) << 21;
206  if (!(u & 0x80)) return v;
207  u = r.PeekByte(4);
208  if (u & 0xF0)
209  throw std::overflow_error("Overflow during varint decoding.");
210  v |= (u & 0x7F) << 28;
211  return v;
212  }
213 
214  //! Fetch a 64-bit varint from the reader at the cursor.
215  uint64_t GetVarint() {
216  Reader& r = *static_cast<Reader*>(this);
217 
218  uint64_t u, v = r.GetByte();
219  if (!(v & 0x80)) return v;
220  v &= 0x7F;
221  u = r.GetByte(), v |= (u & 0x7F) << 7;
222  if (!(u & 0x80)) return v;
223  u = r.GetByte(), v |= (u & 0x7F) << 14;
224  if (!(u & 0x80)) return v;
225  u = r.GetByte(), v |= (u & 0x7F) << 21;
226  if (!(u & 0x80)) return v;
227  u = r.GetByte(), v |= (u & 0x7F) << 28;
228  if (!(u & 0x80)) return v;
229  u = r.GetByte(), v |= (u & 0x7F) << 35;
230  if (!(u & 0x80)) return v;
231  u = r.GetByte(), v |= (u & 0x7F) << 42;
232  if (!(u & 0x80)) return v;
233  u = r.GetByte(), v |= (u & 0x7F) << 49;
234  if (!(u & 0x80)) return v;
235  u = r.GetByte(), v |= (u & 0x7F) << 56;
236  if (!(u & 0x80)) return v;
237  u = r.GetByte();
238  if (u & 0xFE)
239  throw std::overflow_error("Overflow during varint64 decoding.");
240  v |= (u & 0x7F) << 63;
241  return v;
242  }
243 
244  //! Fetch a string which was Put via Put_string().
246  Reader& r = *static_cast<Reader*>(this);
247  return r.Read(GetVarint());
248  }
249 };
250 
251 } // namespace common
252 } // namespace thrill
253 
254 #endif // !THRILL_COMMON_ITEM_SERIALIZATION_TOOLS_HEADER
255 
256 /******************************************************************************/
uint64_t GetVarint()
Fetch a 64-bit varint from the reader at the cursor.
std::string GetString()
Fetch a string which was Put via Put_string().
CRTP class to enhance item/memory reader classes with Varint decoding and String decoding.
uint32_t PeekVarint32() const
Fetch a varint with up to 32-bit from the reader at the cursor.
Writer & PutString(const char *data, size_t len)
Put a string by saving it&#39;s length followed by the data itself.
std::basic_string< char, std::char_traits< char >, Allocator< char > > string
string with Manager tracking
Definition: allocator.hpp:220
CRTP class to enhance item/memory writer classes with Varint encoding and String encoding.
Writer & PutVarint32(uint32_t v)
Append a varint to the writer.
Writer & PutString(const std::string &str)
Put a string by saving it&#39;s length followed by the data itself.
Writer & PutVarint(uint64_t v)
Append a varint to the writer.
Writer & PutString(const uint8_t *data, size_t len)
Put a string by saving it&#39;s length followed by the data itself.
uint32_t GetVarint32()
Fetch a varint with up to 32-bit from the reader at the cursor.