msgpack.h
7.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#ifndef MSGPACK_H
#define MSGPACK_H
#include <functional>
namespace msgpack {
// The message pack format is dynamically typed, schema-less. Format is:
// message: [type][header][payload]
// where type is one byte, header length is a fixed length function of type
// payload is zero to N bytes, with the length encoded in [type][header]
// Scalar fields include boolean, signed integer, float, string etc
// Composite types are sequences of messages
// Array field is [header][element][element]...
// Map field is [header][key][value][key][value]...
// Multibyte integer fields are big endian encoded
// The map key can be any message type
// Maps may contain duplicate keys
// Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or
// in as many as nine, as signed or unsigned. Implementation defined.
// Similarly "foo" may embed the length in the type field or in multiple bytes
// This parser is structured as an iterator over a sequence of bytes.
// It calls a user provided function on each message in order to extract fields
// The default implementation for each scalar type is to do nothing. For map or
// arrays, the default implementation returns just after that message to support
// iterating to the next message, but otherwise has no effect.
struct byte_range {
const unsigned char *start;
const unsigned char *end;
};
const unsigned char *skip_next_message(const unsigned char *start,
const unsigned char *end);
template <typename Derived> class functors_defaults {
public:
void cb_string(size_t N, const unsigned char *str) {
derived().handle_string(N, str);
}
void cb_boolean(bool x) { derived().handle_boolean(x); }
void cb_signed(int64_t x) { derived().handle_signed(x); }
void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); }
void cb_array_elements(byte_range bytes) {
derived().handle_array_elements(bytes);
}
void cb_map_elements(byte_range key, byte_range value) {
derived().handle_map_elements(key, value);
}
const unsigned char *cb_array(uint64_t N, byte_range bytes) {
return derived().handle_array(N, bytes);
}
const unsigned char *cb_map(uint64_t N, byte_range bytes) {
return derived().handle_map(N, bytes);
}
private:
Derived &derived() { return *static_cast<Derived *>(this); }
// Default implementations for scalar ops are no-ops
void handle_string(size_t, const unsigned char *) {}
void handle_boolean(bool) {}
void handle_signed(int64_t) {}
void handle_unsigned(uint64_t) {}
void handle_array_elements(byte_range) {}
void handle_map_elements(byte_range, byte_range) {}
// Default implementation for sequences is to skip over the messages
const unsigned char *handle_array(uint64_t N, byte_range bytes) {
for (uint64_t i = 0; i < N; i++) {
const unsigned char *next = skip_next_message(bytes.start, bytes.end);
if (!next) {
return nullptr;
}
cb_array_elements(bytes);
bytes.start = next;
}
return bytes.start;
}
const unsigned char *handle_map(uint64_t N, byte_range bytes) {
for (uint64_t i = 0; i < N; i++) {
const unsigned char *start_key = bytes.start;
const unsigned char *end_key = skip_next_message(start_key, bytes.end);
if (!end_key) {
return nullptr;
}
const unsigned char *start_value = end_key;
const unsigned char *end_value =
skip_next_message(start_value, bytes.end);
if (!end_value) {
return nullptr;
}
cb_map_elements({start_key, end_key}, {start_value, end_value});
bytes.start = end_value;
}
return bytes.start;
}
};
typedef enum : uint8_t {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME,
#include "msgpack.def"
#undef X
} type;
[[noreturn]] void internal_error();
type parse_type(unsigned char x);
unsigned bytes_used_fixed(type ty);
typedef uint64_t (*payload_info_t)(const unsigned char *);
payload_info_t payload_info(msgpack::type ty);
template <typename T, typename R> R bitcast(T x);
template <typename F, msgpack::type ty>
const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) {
const unsigned char *start = bytes.start;
const unsigned char *end = bytes.end;
const uint64_t available = end - start;
assert(available != 0);
assert(ty == parse_type(*start));
const uint64_t bytes_used = bytes_used_fixed(ty);
if (available < bytes_used) {
return 0;
}
const uint64_t available_post_header = available - bytes_used;
const payload_info_t info = payload_info(ty);
const uint64_t N = info(start);
switch (ty) {
case msgpack::t:
case msgpack::f: {
// t is 0b11000010, f is 0b11000011, masked with 0x1
f.cb_boolean(N);
return start + bytes_used;
}
case msgpack::posfixint:
case msgpack::uint8:
case msgpack::uint16:
case msgpack::uint32:
case msgpack::uint64: {
f.cb_unsigned(N);
return start + bytes_used;
}
case msgpack::negfixint:
case msgpack::int8:
case msgpack::int16:
case msgpack::int32:
case msgpack::int64: {
f.cb_signed(bitcast<uint64_t, int64_t>(N));
return start + bytes_used;
}
case msgpack::fixstr:
case msgpack::str8:
case msgpack::str16:
case msgpack::str32: {
if (available_post_header < N) {
return 0;
} else {
f.cb_string(N, start + bytes_used);
return start + bytes_used + N;
}
}
case msgpack::fixarray:
case msgpack::array16:
case msgpack::array32: {
return f.cb_array(N, {start + bytes_used, end});
}
case msgpack::fixmap:
case msgpack::map16:
case msgpack::map32: {
return f.cb_map(N, {start + bytes_used, end});
}
case msgpack::nil:
case msgpack::bin8:
case msgpack::bin16:
case msgpack::bin32:
case msgpack::float32:
case msgpack::float64:
case msgpack::ext8:
case msgpack::ext16:
case msgpack::ext32:
case msgpack::fixext1:
case msgpack::fixext2:
case msgpack::fixext4:
case msgpack::fixext8:
case msgpack::fixext16:
case msgpack::never_used: {
if (available_post_header < N) {
return 0;
}
return start + bytes_used + N;
}
}
internal_error();
}
template <typename F>
const unsigned char *handle_msgpack(byte_range bytes, F f) {
const unsigned char *start = bytes.start;
const unsigned char *end = bytes.end;
const uint64_t available = end - start;
if (available == 0) {
return 0;
}
const type ty = parse_type(*start);
switch (ty) {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
case msgpack::NAME: \
return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f);
#include "msgpack.def"
#undef X
}
internal_error();
}
bool message_is_string(byte_range bytes, const char *str);
template <typename C> void foronly_string(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_string(size_t N, const unsigned char *str) { cb(N, str); }
};
handle_msgpack<inner>(bytes, {callback});
}
template <typename C> void foronly_unsigned(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_unsigned(uint64_t x) { cb(x); }
};
handle_msgpack<inner>(bytes, {callback});
}
template <typename C> void foreach_array(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_array_elements(byte_range element) { cb(element); }
};
handle_msgpack<inner>(bytes, {callback});
}
template <typename C> void foreach_map(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_map_elements(byte_range key, byte_range value) {
cb(key, value);
}
};
handle_msgpack<inner>(bytes, {callback});
}
// Crude approximation to json
void dump(byte_range);
} // namespace msgpack
#endif