ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.14 by root, Tue Oct 29 15:56:32 2013 UTC vs.
Revision 1.40 by root, Tue Dec 10 13:54:40 2013 UTC

19# define HvNAMELEN(hv) HvNAMELEN_get (hv) 19# define HvNAMELEN(hv) HvNAMELEN_get (hv)
20#endif 20#endif
21#ifndef HvNAMEUTF8 21#ifndef HvNAMEUTF8
22# define HvNAMEUTF8(hv) 0 22# define HvNAMEUTF8(hv) 0
23#endif 23#endif
24#ifndef SvREFCNT_dec_NN
25# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
26#endif
27
28// known major and minor types
29enum cbor_type
30{
31 MAJOR_SHIFT = 5,
32 MINOR_MASK = 0x1f,
33
34 MAJOR_POS_INT = 0 << MAJOR_SHIFT,
35 MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
36 MAJOR_BYTES = 2 << MAJOR_SHIFT,
37 MAJOR_TEXT = 3 << MAJOR_SHIFT,
38 MAJOR_ARRAY = 4 << MAJOR_SHIFT,
39 MAJOR_MAP = 5 << MAJOR_SHIFT,
40 MAJOR_TAG = 6 << MAJOR_SHIFT,
41 MAJOR_MISC = 7 << MAJOR_SHIFT,
42
43 // INT/STRING/ARRAY/MAP subtypes
44 LENGTH_EXT1 = 24,
45 LENGTH_EXT2 = 25,
46 LENGTH_EXT4 = 26,
47 LENGTH_EXT8 = 27,
48
49 // SIMPLE types (effectively MISC subtypes)
50 SIMPLE_FALSE = 20,
51 SIMPLE_TRUE = 21,
52 SIMPLE_NULL = 22,
53 SIMPLE_UNDEF = 23,
54
55 // MISC subtype (unused)
56 MISC_EXT1 = 24,
57 MISC_FLOAT16 = 25,
58 MISC_FLOAT32 = 26,
59 MISC_FLOAT64 = 27,
60
61 // BYTES/TEXT/ARRAY/MAP
62 MINOR_INDEF = 31,
63};
24 64
25// known tags 65// known tags
26enum cbor_tag 66enum cbor_tag
27{ 67{
28 // inofficial extensions (pending iana registration) 68 // extensions
29 CBOR_TAG_PERL_OBJECT = 256, 69 CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
30 CBOR_TAG_GENERIC_OBJECT = 257, 70 CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
71 CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
72 CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
73 CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
74 CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
75 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
31 76
32 // rfc7049 77 // rfc7049
33 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 78 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
34 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any 79 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
35 CBOR_TAG_POS_BIGNUM = 2, // byte string 80 CBOR_TAG_POS_BIGNUM = 2, // byte string
36 CBOR_TAG_NEG_BIGNUM = 3, // byte string 81 CBOR_TAG_NEG_BIGNUM = 3, // byte string
37 CBOR_TAG_DECIMAL = 4, // decimal fraction, array 82 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
38 CBOR_TAG_BIGFLOAT = 5, // array 83 CBOR_TAG_BIGFLOAT = 5, // array
39 84
40 CBOR_TAG_CONV_B64U = 21, // base64url, any 85 CBOR_TAG_CONV_B64U = 21, // base64url, any
41 CBOR_TAG_CONV_B64 = 22, // base64, any 86 CBOR_TAG_CONV_B64 = 22, // base64, any
42 CBOR_TAG_CONV_HEX = 23, // base16, any 87 CBOR_TAG_CONV_HEX = 23, // base16, any
43 CBOR_TAG_CBOR = 24, // embedded cbor, byte string 88 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
44 89
45 CBOR_TAG_URI = 32, // URI rfc3986, utf-8 90 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
46 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8 91 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
47 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 92 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
48 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 93 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
49 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 94 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
50 95
51 CBOR_TAG_MAGIC = 55799 // self-describe cbor 96 CBOR_TAG_MAGIC = 55799, // self-describe cbor
52}; 97};
53 98
54#define F_SHRINK 0x00000200UL 99#define F_SHRINK 0x00000001UL
55#define F_ALLOW_UNKNOWN 0x00002000UL 100#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL
104#define F_VALIDATE_UTF8 0x00000020UL
56 105
57#define INIT_SIZE 32 // initial scalar size to be allocated 106#define INIT_SIZE 32 // initial scalar size to be allocated
58 107
59#define SB do { 108#define SB do {
60#define SE } while (0) 109#define SE } while (0)
72# define CBOR_SLOW 0 121# define CBOR_SLOW 0
73# define CBOR_STASH cbor_stash 122# define CBOR_STASH cbor_stash
74#endif 123#endif
75 124
76static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS:: 125static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
77static SV *types_true, *types_false, *types_error, *sv_cbor; 126static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
78 127
79typedef struct { 128typedef struct {
80 U32 flags; 129 U32 flags;
81 U32 max_depth; 130 U32 max_depth;
82 STRLEN max_size; 131 STRLEN max_size;
132 SV *filter;
133
134 // for the incremental parser
135 STRLEN incr_pos; // the current offset into the text
136 STRLEN incr_need; // minimum bytes needed to decode
137 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
83} CBOR; 138} CBOR;
84 139
85ecb_inline void 140ecb_inline void
86cbor_init (CBOR *cbor) 141cbor_init (CBOR *cbor)
87{ 142{
88 Zero (cbor, 1, CBOR); 143 Zero (cbor, 1, CBOR);
89 cbor->max_depth = 512; 144 cbor->max_depth = 512;
145}
146
147ecb_inline void
148cbor_free (CBOR *cbor)
149{
150 SvREFCNT_dec (cbor->filter);
151 SvREFCNT_dec (cbor->incr_count);
90} 152}
91 153
92///////////////////////////////////////////////////////////////////////////// 154/////////////////////////////////////////////////////////////////////////////
93// utility functions 155// utility functions
94 156
116 SvPV_renew (sv, SvCUR (sv) + 1); 178 SvPV_renew (sv, SvCUR (sv) + 1);
117#endif 179#endif
118 } 180 }
119} 181}
120 182
121///////////////////////////////////////////////////////////////////////////// 183// minimum length of a string to be registered for stringref
122// fp hell 184ecb_inline int
123 185minimum_string_length (UV idx)
124//TODO 186{
187 return idx > 23
188 ? idx > 0xffU
189 ? idx > 0xffffU
190 ? idx > 0xffffffffU
191 ? 11
192 : 7
193 : 5
194 : 4
195 : 3;
196}
125 197
126///////////////////////////////////////////////////////////////////////////// 198/////////////////////////////////////////////////////////////////////////////
127// encoder 199// encoder
128 200
129// structure used for encoding CBOR 201// structure used for encoding CBOR
132 char *cur; // SvPVX (sv) + current output position 204 char *cur; // SvPVX (sv) + current output position
133 char *end; // SvEND (sv) 205 char *end; // SvEND (sv)
134 SV *sv; // result scalar 206 SV *sv; // result scalar
135 CBOR cbor; 207 CBOR cbor;
136 U32 depth; // recursion level 208 U32 depth; // recursion level
209 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
210 UV stringref_idx;
211 HV *shareable; // ptr => index, or 0
212 UV shareable_idx;
137} enc_t; 213} enc_t;
138 214
139ecb_inline void 215ecb_inline void
140need (enc_t *enc, STRLEN len) 216need (enc_t *enc, STRLEN len)
141{ 217{
158static void 234static void
159encode_uint (enc_t *enc, int major, UV len) 235encode_uint (enc_t *enc, int major, UV len)
160{ 236{
161 need (enc, 9); 237 need (enc, 9);
162 238
163 if (len < 24) 239 if (ecb_expect_true (len < LENGTH_EXT1))
164 *enc->cur++ = major | len; 240 *enc->cur++ = major | len;
165 else if (len <= 0xff) 241 else if (ecb_expect_true (len <= 0xffU))
166 { 242 {
167 *enc->cur++ = major | 24; 243 *enc->cur++ = major | LENGTH_EXT1;
168 *enc->cur++ = len; 244 *enc->cur++ = len;
169 } 245 }
170 else if (len <= 0xffff) 246 else if (len <= 0xffffU)
171 { 247 {
172 *enc->cur++ = major | 25; 248 *enc->cur++ = major | LENGTH_EXT2;
173 *enc->cur++ = len >> 8; 249 *enc->cur++ = len >> 8;
174 *enc->cur++ = len; 250 *enc->cur++ = len;
175 } 251 }
176 else if (len <= 0xffffffff) 252 else if (len <= 0xffffffffU)
177 { 253 {
178 *enc->cur++ = major | 26; 254 *enc->cur++ = major | LENGTH_EXT4;
179 *enc->cur++ = len >> 24; 255 *enc->cur++ = len >> 24;
180 *enc->cur++ = len >> 16; 256 *enc->cur++ = len >> 16;
181 *enc->cur++ = len >> 8; 257 *enc->cur++ = len >> 8;
182 *enc->cur++ = len; 258 *enc->cur++ = len;
183 } 259 }
184 else 260 else
185 { 261 {
186 *enc->cur++ = major | 27; 262 *enc->cur++ = major | LENGTH_EXT8;
187 *enc->cur++ = len >> 56; 263 *enc->cur++ = len >> 56;
188 *enc->cur++ = len >> 48; 264 *enc->cur++ = len >> 48;
189 *enc->cur++ = len >> 40; 265 *enc->cur++ = len >> 40;
190 *enc->cur++ = len >> 32; 266 *enc->cur++ = len >> 32;
191 *enc->cur++ = len >> 24; 267 *enc->cur++ = len >> 24;
193 *enc->cur++ = len >> 8; 269 *enc->cur++ = len >> 8;
194 *enc->cur++ = len; 270 *enc->cur++ = len;
195 } 271 }
196} 272}
197 273
198static void 274ecb_inline void
275encode_tag (enc_t *enc, UV tag)
276{
277 encode_uint (enc, MAJOR_TAG, tag);
278}
279
280ecb_inline void
199encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 281encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
200{ 282{
201 encode_uint (enc, utf8 ? 0x60 : 0x40, len); 283 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
202 need (enc, len); 284 need (enc, len);
203 memcpy (enc->cur, str, len); 285 memcpy (enc->cur, str, len);
204 enc->cur += len; 286 enc->cur += len;
205} 287}
206 288
289static void
290encode_strref (enc_t *enc, int utf8, char *str, STRLEN len)
291{
292 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
293 {
294 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
295
296 if (SvOK (*svp))
297 {
298 // already registered, use stringref
299 encode_tag (enc, CBOR_TAG_STRINGREF);
300 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
301 return;
302 }
303 else if (len >= minimum_string_length (enc->stringref_idx))
304 {
305 // register only
306 sv_setuv (*svp, enc->stringref_idx);
307 ++enc->stringref_idx;
308 }
309 }
310
311 encode_str (enc, utf8, str, len);
312}
313
207static void encode_sv (enc_t *enc, SV *sv); 314static void encode_sv (enc_t *enc, SV *sv);
208 315
209static void 316static void
210encode_av (enc_t *enc, AV *av) 317encode_av (enc_t *enc, AV *av)
211{ 318{
214 if (enc->depth >= enc->cbor.max_depth) 321 if (enc->depth >= enc->cbor.max_depth)
215 croak (ERR_NESTING_EXCEEDED); 322 croak (ERR_NESTING_EXCEEDED);
216 323
217 ++enc->depth; 324 ++enc->depth;
218 325
219 encode_uint (enc, 0x80, len + 1); 326 encode_uint (enc, MAJOR_ARRAY, len + 1);
220 327
221 for (i = 0; i <= len; ++i) 328 for (i = 0; i <= len; ++i)
222 { 329 {
223 SV **svp = av_fetch (av, i, 0); 330 SV **svp = av_fetch (av, i, 0);
224 encode_sv (enc, svp ? *svp : &PL_sv_undef); 331 encode_sv (enc, svp ? *svp : &PL_sv_undef);
239 346
240 int pairs = hv_iterinit (hv); 347 int pairs = hv_iterinit (hv);
241 int mg = SvMAGICAL (hv); 348 int mg = SvMAGICAL (hv);
242 349
243 if (mg) 350 if (mg)
244 encode_ch (enc, 0xa0 | 31); 351 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
245 else 352 else
246 encode_uint (enc, 0xa0, pairs); 353 encode_uint (enc, MAJOR_MAP, pairs);
247 354
248 while ((he = hv_iternext (hv))) 355 while ((he = hv_iternext (hv)))
249 { 356 {
250 if (HeKLEN (he) == HEf_SVKEY) 357 if (HeKLEN (he) == HEf_SVKEY)
251 encode_sv (enc, HeSVKEY (he)); 358 encode_sv (enc, HeSVKEY (he));
252 else 359 else
253 encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 360 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
254 361
255 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 362 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
256 } 363 }
257 364
258 if (mg) 365 if (mg)
259 encode_ch (enc, 0xe0 | 31); 366 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
260 367
261 --enc->depth; 368 --enc->depth;
262} 369}
263 370
264// encode objects, arrays and special \0=false and \1=true values. 371// encode objects, arrays and special \0=false and \1=true values.
265static void 372static void
266encode_rv (enc_t *enc, SV *sv) 373encode_rv (enc_t *enc, SV *sv)
267{ 374{
268 svtype svt;
269
270 SvGETMAGIC (sv); 375 SvGETMAGIC (sv);
376
271 svt = SvTYPE (sv); 377 svtype svt = SvTYPE (sv);
272 378
273 if (ecb_expect_false (SvOBJECT (sv))) 379 if (ecb_expect_false (SvOBJECT (sv)))
274 { 380 {
275 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash 381 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
276 ? types_boolean_stash 382 ? types_boolean_stash
281 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 387 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
282 ? cbor_tagged_stash 388 ? cbor_tagged_stash
283 : gv_stashpv ("CBOR::XS::Tagged" , 1); 389 : gv_stashpv ("CBOR::XS::Tagged" , 1);
284 390
285 HV *stash = SvSTASH (sv); 391 HV *stash = SvSTASH (sv);
286 GV *method;
287 392
288 if (stash == boolean_stash) 393 if (stash == boolean_stash)
289 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); 394 {
395 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
396 return;
397 }
290 else if (stash == error_stash) 398 else if (stash == error_stash)
291 encode_ch (enc, 0xe0 | 23); 399 {
400 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
401 return;
402 }
292 else if (stash == tagged_stash) 403 else if (stash == tagged_stash)
293 { 404 {
294 if (svt != SVt_PVAV) 405 if (svt != SVt_PVAV)
295 croak ("encountered CBOR::XS::Tagged object that isn't an array"); 406 croak ("encountered CBOR::XS::Tagged object that isn't an array");
296 407
297 encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1))); 408 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
298 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); 409 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
410
411 return;
412 }
413 }
414
415 if (ecb_expect_false (SvREFCNT (sv) > 1)
416 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
417 {
418 if (!enc->shareable)
419 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
420
421 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
422
423 if (SvOK (*svp))
299 } 424 {
425 encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
426 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
427 return;
428 }
429 else
430 {
431 sv_setuv (*svp, enc->shareable_idx);
432 ++enc->shareable_idx;
433 encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
434 }
435 }
436
437 if (ecb_expect_false (SvOBJECT (sv)))
438 {
439 HV *stash = SvSTASH (sv);
440 GV *method;
441
300 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 442 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
301 { 443 {
302 dSP; 444 dSP;
303 445
304 ENTER; SAVETMPS; PUSHMARK (SP); 446 ENTER; SAVETMPS; PUSHMARK (SP);
305 // we re-bless the reference to get overload and other niceties right 447 // we re-bless the reference to get overload and other niceties right
336 478
337 // catch this surprisingly common error 479 // catch this surprisingly common error
338 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 480 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
339 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 481 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
340 482
341 encode_uint (enc, 0xc0, CBOR_TAG_PERL_OBJECT); 483 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
342 encode_uint (enc, 0x80, count + 1); 484 encode_uint (enc, MAJOR_ARRAY, count + 1);
343 encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 485 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
344 486
345 while (count) 487 while (count)
346 encode_sv (enc, SP[1 - count--]); 488 encode_sv (enc, SP[1 - count--]);
347 489
348 PUTBACK; 490 PUTBACK;
355 } 497 }
356 else if (svt == SVt_PVHV) 498 else if (svt == SVt_PVHV)
357 encode_hv (enc, (HV *)sv); 499 encode_hv (enc, (HV *)sv);
358 else if (svt == SVt_PVAV) 500 else if (svt == SVt_PVAV)
359 encode_av (enc, (AV *)sv); 501 encode_av (enc, (AV *)sv);
360 else if (svt < SVt_PVAV)
361 {
362 STRLEN len = 0;
363 char *pv = svt ? SvPV (sv, len) : 0;
364
365 if (len == 1 && *pv == '1')
366 encode_ch (enc, 0xe0 | 21);
367 else if (len == 1 && *pv == '0')
368 encode_ch (enc, 0xe0 | 20);
369 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
370 encode_ch (enc, 0xe0 | 23);
371 else
372 croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
373 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
374 }
375 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
376 encode_ch (enc, 0xe0 | 23);
377 else 502 else
378 croak ("encountered %s, but CBOR can only represent references to arrays or hashes", 503 {
379 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 504 encode_tag (enc, CBOR_TAG_INDIRECTION);
505 encode_sv (enc, sv);
506 }
380} 507}
381 508
382static void 509static void
383encode_nv (enc_t *enc, SV *sv) 510encode_nv (enc_t *enc, SV *sv)
384{ 511{
385 double nv = SvNVX (sv); 512 double nv = SvNVX (sv);
386 513
387 need (enc, 9); 514 need (enc, 9);
388 515
389 if (ecb_expect_false (nv == (U32)nv)) 516 if (ecb_expect_false (nv == (NV)(U32)nv))
390 encode_uint (enc, 0x00, (U32)nv); 517 encode_uint (enc, MAJOR_POS_INT, (U32)nv);
391 //TODO: maybe I32? 518 //TODO: maybe I32?
392 else if (ecb_expect_false (nv == (float)nv)) 519 else if (ecb_expect_false (nv == (float)nv))
393 { 520 {
394 uint32_t fp = ecb_float_to_binary32 (nv); 521 uint32_t fp = ecb_float_to_binary32 (nv);
395 522
396 *enc->cur++ = 0xe0 | 26; 523 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
397 524
398 if (!ecb_big_endian ()) 525 if (!ecb_big_endian ())
399 fp = ecb_bswap32 (fp); 526 fp = ecb_bswap32 (fp);
400 527
401 memcpy (enc->cur, &fp, 4); 528 memcpy (enc->cur, &fp, 4);
403 } 530 }
404 else 531 else
405 { 532 {
406 uint64_t fp = ecb_double_to_binary64 (nv); 533 uint64_t fp = ecb_double_to_binary64 (nv);
407 534
408 *enc->cur++ = 0xe0 | 27; 535 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
409 536
410 if (!ecb_big_endian ()) 537 if (!ecb_big_endian ())
411 fp = ecb_bswap64 (fp); 538 fp = ecb_bswap64 (fp);
412 539
413 memcpy (enc->cur, &fp, 8); 540 memcpy (enc->cur, &fp, 8);
422 549
423 if (SvPOKp (sv)) 550 if (SvPOKp (sv))
424 { 551 {
425 STRLEN len; 552 STRLEN len;
426 char *str = SvPV (sv, len); 553 char *str = SvPV (sv, len);
427 encode_str (enc, SvUTF8 (sv), str, len); 554 encode_strref (enc, SvUTF8 (sv), str, len);
428 } 555 }
429 else if (SvNOKp (sv)) 556 else if (SvNOKp (sv))
430 encode_nv (enc, sv); 557 encode_nv (enc, sv);
431 else if (SvIOKp (sv)) 558 else if (SvIOKp (sv))
432 { 559 {
433 if (SvIsUV (sv)) 560 if (SvIsUV (sv))
434 encode_uint (enc, 0x00, SvUVX (sv)); 561 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
435 else if (SvIVX (sv) >= 0) 562 else if (SvIVX (sv) >= 0)
436 encode_uint (enc, 0x00, SvIVX (sv)); 563 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
437 else 564 else
438 encode_uint (enc, 0x20, -(SvIVX (sv) + 1)); 565 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
439 } 566 }
440 else if (SvROK (sv)) 567 else if (SvROK (sv))
441 encode_rv (enc, SvRV (sv)); 568 encode_rv (enc, SvRV (sv));
442 else if (!SvOK (sv)) 569 else if (!SvOK (sv))
443 encode_ch (enc, 0xe0 | 22); 570 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
444 else if (enc->cbor.flags & F_ALLOW_UNKNOWN) 571 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
445 encode_ch (enc, 0xe0 | 23); 572 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
446 else 573 else
447 croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data", 574 croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
448 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv)); 575 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
449} 576}
450 577
451static SV * 578static SV *
452encode_cbor (SV *scalar, CBOR *cbor) 579encode_cbor (SV *scalar, CBOR *cbor)
453{ 580{
454 enc_t enc; 581 enc_t enc = { };
455 582
456 enc.cbor = *cbor; 583 enc.cbor = *cbor;
457 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 584 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
458 enc.cur = SvPVX (enc.sv); 585 enc.cur = SvPVX (enc.sv);
459 enc.end = SvEND (enc.sv); 586 enc.end = SvEND (enc.sv);
460 enc.depth = 0;
461 587
462 SvPOK_only (enc.sv); 588 SvPOK_only (enc.sv);
589
590 if (cbor->flags & F_PACK_STRINGS)
591 {
592 encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
593 enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
594 enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
595 }
596
463 encode_sv (&enc, scalar); 597 encode_sv (&enc, scalar);
464 598
465 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 599 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
466 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings 600 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
467 601
481 U8 *end; // end of input string 615 U8 *end; // end of input string
482 const char *err; // parse error, if != 0 616 const char *err; // parse error, if != 0
483 CBOR cbor; 617 CBOR cbor;
484 U32 depth; // recursion depth 618 U32 depth; // recursion depth
485 U32 maxdepth; // recursion depth limit 619 U32 maxdepth; // recursion depth limit
620 AV *shareable;
621 AV *stringref;
622 SV *decode_tagged;
486} dec_t; 623} dec_t;
487 624
488#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 625#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
489 626
490#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 627#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
493#define DEC_DEC_DEPTH --dec->depth 630#define DEC_DEC_DEPTH --dec->depth
494 631
495static UV 632static UV
496decode_uint (dec_t *dec) 633decode_uint (dec_t *dec)
497{ 634{
498 switch (*dec->cur & 31) 635 U8 m = *dec->cur & MINOR_MASK;
499 { 636 ++dec->cur;
500 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
501 case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
502 case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
503 return *dec->cur++ & 31;
504 637
505 case 24: 638 if (ecb_expect_true (m < LENGTH_EXT1))
639 return m;
640 else if (ecb_expect_true (m == LENGTH_EXT1))
641 {
506 WANT (2); 642 WANT (1);
507 dec->cur += 2; 643 dec->cur += 1;
508 return dec->cur[-1]; 644 return dec->cur[-1];
509 645 }
510 case 25: 646 else if (ecb_expect_true (m == LENGTH_EXT2))
647 {
511 WANT (3); 648 WANT (2);
512 dec->cur += 3; 649 dec->cur += 2;
513 return (((UV)dec->cur[-2]) << 8) 650 return (((UV)dec->cur[-2]) << 8)
514 | ((UV)dec->cur[-1]); 651 | ((UV)dec->cur[-1]);
515 652 }
516 case 26: 653 else if (ecb_expect_true (m == LENGTH_EXT4))
654 {
517 WANT (5); 655 WANT (4);
518 dec->cur += 5; 656 dec->cur += 4;
519 return (((UV)dec->cur[-4]) << 24) 657 return (((UV)dec->cur[-4]) << 24)
520 | (((UV)dec->cur[-3]) << 16) 658 | (((UV)dec->cur[-3]) << 16)
521 | (((UV)dec->cur[-2]) << 8) 659 | (((UV)dec->cur[-2]) << 8)
522 | ((UV)dec->cur[-1]); 660 | ((UV)dec->cur[-1]);
523 661 }
524 case 27: 662 else if (ecb_expect_true (m == LENGTH_EXT8))
663 {
525 WANT (9); 664 WANT (8);
526 dec->cur += 9; 665 dec->cur += 8;
666
667 return
668#if UVSIZE < 8
669 0
670#else
527 return (((UV)dec->cur[-8]) << 56) 671 (((UV)dec->cur[-8]) << 56)
528 | (((UV)dec->cur[-7]) << 48) 672 | (((UV)dec->cur[-7]) << 48)
529 | (((UV)dec->cur[-6]) << 40) 673 | (((UV)dec->cur[-6]) << 40)
530 | (((UV)dec->cur[-5]) << 32) 674 | (((UV)dec->cur[-5]) << 32)
675#endif
531 | (((UV)dec->cur[-4]) << 24) 676 | (((UV)dec->cur[-4]) << 24)
532 | (((UV)dec->cur[-3]) << 16) 677 | (((UV)dec->cur[-3]) << 16)
533 | (((UV)dec->cur[-2]) << 8) 678 | (((UV)dec->cur[-2]) << 8)
534 | ((UV)dec->cur[-1]); 679 | ((UV)dec->cur[-1]);
535 680 }
536 default: 681 else
537 ERR ("corrupted CBOR data (unsupported integer minor encoding)"); 682 ERR ("corrupted CBOR data (unsupported integer minor encoding)");
538 }
539 683
540fail: 684fail:
541 return 0; 685 return 0;
542} 686}
543 687
548{ 692{
549 AV *av = newAV (); 693 AV *av = newAV ();
550 694
551 DEC_INC_DEPTH; 695 DEC_INC_DEPTH;
552 696
553 if ((*dec->cur & 31) == 31) 697 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
554 { 698 {
555 ++dec->cur; 699 ++dec->cur;
556 700
557 for (;;) 701 for (;;)
558 { 702 {
559 WANT (1); 703 WANT (1);
560 704
561 if (*dec->cur == (0xe0 | 31)) 705 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
562 { 706 {
563 ++dec->cur; 707 ++dec->cur;
564 break; 708 break;
565 } 709 }
566 710
569 } 713 }
570 else 714 else
571 { 715 {
572 int i, len = decode_uint (dec); 716 int i, len = decode_uint (dec);
573 717
718 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
574 av_fill (av, len - 1); 719 av_fill (av, len - 1);
575 720
576 for (i = 0; i < len; ++i) 721 for (i = 0; i < len; ++i)
577 AvARRAY (av)[i] = decode_sv (dec); 722 AvARRAY (av)[i] = decode_sv (dec);
578 } 723 }
584 SvREFCNT_dec (av); 729 SvREFCNT_dec (av);
585 DEC_DEC_DEPTH; 730 DEC_DEC_DEPTH;
586 return &PL_sv_undef; 731 return &PL_sv_undef;
587} 732}
588 733
734static void
735decode_he (dec_t *dec, HV *hv)
736{
737 // for speed reasons, we specialcase single-string
738 // byte or utf-8 strings as keys, but only when !stringref
739
740 if (ecb_expect_true (!dec->stringref))
741 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
742 {
743 I32 len = decode_uint (dec);
744 char *key = (char *)dec->cur;
745
746 dec->cur += len;
747
748 hv_store (hv, key, len, decode_sv (dec), 0);
749
750 return;
751 }
752 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
753 {
754 I32 len = decode_uint (dec);
755 char *key = (char *)dec->cur;
756
757 dec->cur += len;
758
759 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
760 if (!is_utf8_string (key, len))
761 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
762
763 hv_store (hv, key, -len, decode_sv (dec), 0);
764
765 return;
766 }
767
768 SV *k = decode_sv (dec);
769 SV *v = decode_sv (dec);
770
771 hv_store_ent (hv, k, v, 0);
772 SvREFCNT_dec (k);
773
774fail:
775 ;
776}
777
589static SV * 778static SV *
590decode_hv (dec_t *dec) 779decode_hv (dec_t *dec)
591{ 780{
592 HV *hv = newHV (); 781 HV *hv = newHV ();
593 782
594 DEC_INC_DEPTH; 783 DEC_INC_DEPTH;
595 784
596 if ((*dec->cur & 31) == 31) 785 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
597 { 786 {
598 ++dec->cur; 787 ++dec->cur;
599 788
600 for (;;) 789 for (;;)
601 { 790 {
602 WANT (1); 791 WANT (1);
603 792
604 if (*dec->cur == (0xe0 | 31)) 793 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
605 { 794 {
606 ++dec->cur; 795 ++dec->cur;
607 break; 796 break;
608 } 797 }
609 798
610 SV *k = decode_sv (dec); 799 decode_he (dec, hv);
611 SV *v = decode_sv (dec);
612
613 hv_store_ent (hv, k, v, 0);
614 SvREFCNT_dec (k);
615 } 800 }
616 } 801 }
617 else 802 else
618 { 803 {
619 int len = decode_uint (dec); 804 int pairs = decode_uint (dec);
620 805
621 while (len--) 806 while (pairs--)
622 { 807 decode_he (dec, hv);
623 SV *k = decode_sv (dec);
624 SV *v = decode_sv (dec);
625
626 hv_store_ent (hv, k, v, 0);
627 SvREFCNT_dec (k);
628 }
629 } 808 }
630 809
631 DEC_DEC_DEPTH; 810 DEC_DEC_DEPTH;
632 return newRV_noinc ((SV *)hv); 811 return newRV_noinc ((SV *)hv);
633 812
640static SV * 819static SV *
641decode_str (dec_t *dec, int utf8) 820decode_str (dec_t *dec, int utf8)
642{ 821{
643 SV *sv = 0; 822 SV *sv = 0;
644 823
645 if ((*dec->cur & 31) == 31) 824 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
646 { 825 {
826 // indefinite length strings
647 ++dec->cur; 827 ++dec->cur;
648 828
829 U8 major = *dec->cur & MAJOR_MISC;
830
649 sv = newSVpvn ("", 0); 831 sv = newSVpvn ("", 0);
650 832
651 // not very fast, and certainly not robust against illegal input
652 for (;;) 833 for (;;)
653 { 834 {
654 WANT (1); 835 WANT (1);
655 836
656 if (*dec->cur == (0xe0 | 31)) 837 if ((*dec->cur - major) > LENGTH_EXT8)
838 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
657 { 839 {
658 ++dec->cur; 840 ++dec->cur;
659 break; 841 break;
660 } 842 }
843 else
844 ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
661 845
662 sv_catsv (sv, decode_sv (dec)); 846 STRLEN len = decode_uint (dec);
847
848 WANT (len);
849 sv_catpvn (sv, dec->cur, len);
850 dec->cur += len;
663 } 851 }
664 } 852 }
665 else 853 else
666 { 854 {
667 STRLEN len = decode_uint (dec); 855 STRLEN len = decode_uint (dec);
668 856
669 WANT (len); 857 WANT (len);
670 sv = newSVpvn (dec->cur, len); 858 sv = newSVpvn (dec->cur, len);
671 dec->cur += len; 859 dec->cur += len;
860
861 if (ecb_expect_false (dec->stringref)
862 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
863 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
672 } 864 }
673 865
674 if (utf8) 866 if (utf8)
867 {
868 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
869 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
870 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
871
675 SvUTF8_on (sv); 872 SvUTF8_on (sv);
873 }
676 874
677 return sv; 875 return sv;
678 876
679fail: 877fail:
680 SvREFCNT_dec (sv); 878 SvREFCNT_dec (sv);
682} 880}
683 881
684static SV * 882static SV *
685decode_tagged (dec_t *dec) 883decode_tagged (dec_t *dec)
686{ 884{
885 SV *sv = 0;
687 UV tag = decode_uint (dec); 886 UV tag = decode_uint (dec);
887
888 WANT (1);
889
890 switch (tag)
891 {
892 case CBOR_TAG_MAGIC:
688 SV *sv = decode_sv (dec); 893 sv = decode_sv (dec);
894 break;
689 895
690 if (tag == CBOR_TAG_MAGIC) 896 case CBOR_TAG_INDIRECTION:
691 return sv; 897 sv = newRV_noinc (decode_sv (dec));
692 else if (tag == CBOR_TAG_PERL_OBJECT) 898 break;
693 { 899
900 case CBOR_TAG_STRINGREF_NAMESPACE:
901 {
902 ENTER; SAVETMPS;
903
904 SAVESPTR (dec->stringref);
905 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
906
907 sv = decode_sv (dec);
908
909 FREETMPS; LEAVE;
910 }
911 break;
912
913 case CBOR_TAG_STRINGREF:
914 {
915 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
916 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
917
918 UV idx = decode_uint (dec);
919
920 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
921 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
922
923 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
924 }
925 break;
926
927 case CBOR_TAG_VALUE_SHAREABLE:
928 {
929 if (ecb_expect_false (!dec->shareable))
930 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
931
932 if (dec->cbor.flags & F_ALLOW_CYCLES)
933 {
934 sv = newSV (0);
935 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
936
937 SV *osv = decode_sv (dec);
938 sv_setsv (sv, osv);
939 SvREFCNT_dec_NN (osv);
940 }
941 else
942 {
943 av_push (dec->shareable, &PL_sv_undef);
944 int idx = AvFILLp (dec->shareable);
945 sv = decode_sv (dec);
946 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
947 }
948 }
949 break;
950
951 case CBOR_TAG_VALUE_SHAREDREF:
952 {
953 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
954 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
955
956 UV idx = decode_uint (dec);
957
958 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
959 ERR ("corrupted CBOR data (sharedref index out of bounds)");
960
961 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
962
963 if (sv == &PL_sv_undef)
964 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
965 }
966 break;
967
968 case CBOR_TAG_PERL_OBJECT:
969 {
970 sv = decode_sv (dec);
971
694 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) 972 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
695 ERR ("corrupted CBOR data (non-array perl object)"); 973 ERR ("corrupted CBOR data (non-array perl object)");
696 974
697 AV *av = (AV *)SvRV (sv); 975 AV *av = (AV *)SvRV (sv);
698 int len = av_len (av) + 1; 976 int len = av_len (av) + 1;
699 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0); 977 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
700 978
701 if (!stash) 979 if (!stash)
702 ERR ("cannot decode perl-object (package does not exist)"); 980 ERR ("cannot decode perl-object (package does not exist)");
703 981
704 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); 982 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
705 983
706 if (!method) 984 if (!method)
707 ERR ("cannot decode perl-object (package does not have a THAW method)"); 985 ERR ("cannot decode perl-object (package does not have a THAW method)");
708 986
709 dSP; 987 dSP;
710 988
711 ENTER; SAVETMPS; PUSHMARK (SP); 989 ENTER; SAVETMPS; PUSHMARK (SP);
712 EXTEND (SP, len + 1); 990 EXTEND (SP, len + 1);
713 // we re-bless the reference to get overload and other niceties right 991 // we re-bless the reference to get overload and other niceties right
714 PUSHs (*av_fetch (av, 0, 1)); 992 PUSHs (*av_fetch (av, 0, 1));
715 PUSHs (sv_cbor); 993 PUSHs (sv_cbor);
716 994
717 int i; 995 int i;
718 996
719 for (i = 1; i < len; ++i) 997 for (i = 1; i < len; ++i)
720 PUSHs (*av_fetch (av, i, 1)); 998 PUSHs (*av_fetch (av, i, 1));
721 999
722 PUTBACK; 1000 PUTBACK;
723 call_sv ((SV *)GvCV (method), G_SCALAR); 1001 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
724 SPAGAIN; 1002 SPAGAIN;
725 1003
1004 if (SvTRUE (ERRSV))
1005 {
1006 FREETMPS; LEAVE;
1007 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1008 }
1009
726 SvREFCNT_dec (sv); 1010 SvREFCNT_dec (sv);
727 sv = SvREFCNT_inc (POPs); 1011 sv = SvREFCNT_inc (POPs);
728 1012
729 PUTBACK; 1013 PUTBACK;
730 1014
731 FREETMPS; LEAVE; 1015 FREETMPS; LEAVE;
1016 }
1017 break;
732 1018
733 return sv; 1019 default:
734 } 1020 {
735 else 1021 sv = decode_sv (dec);
736 { 1022
1023 dSP;
1024 ENTER; SAVETMPS; PUSHMARK (SP);
1025 EXTEND (SP, 2);
1026 PUSHs (newSVuv (tag));
1027 PUSHs (sv);
1028
1029 PUTBACK;
1030 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1031 SPAGAIN;
1032
1033 if (SvTRUE (ERRSV))
1034 {
1035 FREETMPS; LEAVE;
1036 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1037 }
1038
1039 if (count)
1040 {
1041 SvREFCNT_dec (sv);
1042 sv = SvREFCNT_inc (POPs);
1043 }
1044 else
1045 {
737 AV *av = newAV (); 1046 AV *av = newAV ();
738 av_push (av, newSVuv (tag)); 1047 av_push (av, newSVuv (tag));
739 av_push (av, sv); 1048 av_push (av, sv);
740 1049
741 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 1050 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
742 ? cbor_tagged_stash 1051 ? cbor_tagged_stash
743 : gv_stashpv ("CBOR::XS::Tagged" , 1); 1052 : gv_stashpv ("CBOR::XS::Tagged" , 1);
744
745 return sv_bless (newRV_noinc ((SV *)av), tagged_stash); 1053 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1054 }
1055
1056 PUTBACK;
1057
1058 FREETMPS; LEAVE;
1059 }
1060 break;
746 } 1061 }
1062
1063 return sv;
747 1064
748fail: 1065fail:
749 SvREFCNT_dec (sv); 1066 SvREFCNT_dec (sv);
750 return &PL_sv_undef; 1067 return &PL_sv_undef;
751} 1068}
753static SV * 1070static SV *
754decode_sv (dec_t *dec) 1071decode_sv (dec_t *dec)
755{ 1072{
756 WANT (1); 1073 WANT (1);
757 1074
758 switch (*dec->cur >> 5) 1075 switch (*dec->cur >> MAJOR_SHIFT)
759 { 1076 {
760 case 0: // unsigned int 1077 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
761 return newSVuv (decode_uint (dec)); 1078 case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
762 case 1: // negative int 1079 case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
763 return newSViv (-1 - (IV)decode_uint (dec)); 1080 case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
764 case 2: // octet string 1081 case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
765 return decode_str (dec, 0); 1082 case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
766 case 3: // utf-8 string 1083 case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
767 return decode_str (dec, 1); 1084
768 case 4: // array 1085 case MAJOR_MISC >> MAJOR_SHIFT:
769 return decode_av (dec);
770 case 5: // map
771 return decode_hv (dec);
772 case 6: // tag
773 return decode_tagged (dec);
774 case 7: // misc
775 switch (*dec->cur++ & 31) 1086 switch (*dec->cur++ & MINOR_MASK)
776 { 1087 {
777 case 20: 1088 case SIMPLE_FALSE:
778#if CBOR_SLOW 1089#if CBOR_SLOW
779 types_false = get_bool ("Types::Serialiser::false"); 1090 types_false = get_bool ("Types::Serialiser::false");
780#endif 1091#endif
781 return newSVsv (types_false); 1092 return newSVsv (types_false);
782 case 21: 1093 case SIMPLE_TRUE:
783#if CBOR_SLOW 1094#if CBOR_SLOW
784 types_true = get_bool ("Types::Serialiser::true"); 1095 types_true = get_bool ("Types::Serialiser::true");
785#endif 1096#endif
786 return newSVsv (types_true); 1097 return newSVsv (types_true);
787 case 22: 1098 case SIMPLE_NULL:
788 return newSVsv (&PL_sv_undef); 1099 return newSVsv (&PL_sv_undef);
789 case 23: 1100 case SIMPLE_UNDEF:
790#if CBOR_SLOW 1101#if CBOR_SLOW
791 types_error = get_bool ("Types::Serialiser::error"); 1102 types_error = get_bool ("Types::Serialiser::error");
792#endif 1103#endif
793 return newSVsv (types_error); 1104 return newSVsv (types_error);
794 1105
795 case 25: 1106 case MISC_FLOAT16:
796 { 1107 {
797 WANT (2); 1108 WANT (2);
798 1109
799 uint16_t fp = (dec->cur[0] << 8) | dec->cur[1]; 1110 uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
800 dec->cur += 2; 1111 dec->cur += 2;
801 1112
802 return newSVnv (ecb_binary16_to_float (fp)); 1113 return newSVnv (ecb_binary16_to_float (fp));
803 } 1114 }
804 1115
805 case 26: 1116 case MISC_FLOAT32:
806 { 1117 {
807 uint32_t fp; 1118 uint32_t fp;
808 WANT (4); 1119 WANT (4);
809 memcpy (&fp, dec->cur, 4); 1120 memcpy (&fp, dec->cur, 4);
810 dec->cur += 4; 1121 dec->cur += 4;
813 fp = ecb_bswap32 (fp); 1124 fp = ecb_bswap32 (fp);
814 1125
815 return newSVnv (ecb_binary32_to_float (fp)); 1126 return newSVnv (ecb_binary32_to_float (fp));
816 } 1127 }
817 1128
818 case 27: 1129 case MISC_FLOAT64:
819 { 1130 {
820 uint64_t fp; 1131 uint64_t fp;
821 WANT (8); 1132 WANT (8);
822 memcpy (&fp, dec->cur, 8); 1133 memcpy (&fp, dec->cur, 8);
823 dec->cur += 8; 1134 dec->cur += 8;
826 fp = ecb_bswap64 (fp); 1137 fp = ecb_bswap64 (fp);
827 1138
828 return newSVnv (ecb_binary64_to_double (fp)); 1139 return newSVnv (ecb_binary64_to_double (fp));
829 } 1140 }
830 1141
831 // 0..19 unassigned 1142 // 0..19 unassigned simple
832 // 24 reserved + unassigned (reserved values are not encodable) 1143 // 24 reserved + unassigned simple (reserved values are not encodable)
1144 // 28-30 unassigned misc
1145 // 31 break code
833 default: 1146 default:
834 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1147 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
835 } 1148 }
836 1149
837 break; 1150 break;
838 } 1151 }
839 1152
842} 1155}
843 1156
844static SV * 1157static SV *
845decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1158decode_cbor (SV *string, CBOR *cbor, char **offset_return)
846{ 1159{
847 dec_t dec; 1160 dec_t dec = { };
848 SV *sv; 1161 SV *sv;
1162 STRLEN len;
1163 char *data = SvPVbyte (string, len);
849 1164
850 /* work around bugs in 5.10 where manipulating magic values
851 * makes perl ignore the magic in subsequent accesses.
852 * also make a copy of non-PV values, to get them into a clean
853 * state (SvPV should do that, but it's buggy, see below).
854 */
855 /*SvGETMAGIC (string);*/
856 if (SvMAGICAL (string) || !SvPOK (string))
857 string = sv_2mortal (newSVsv (string));
858
859 SvUPGRADE (string, SVt_PV);
860
861 /* work around a bug in perl 5.10, which causes SvCUR to fail an
862 * assertion with -DDEBUGGING, although SvCUR is documented to
863 * return the xpv_cur field which certainly exists after upgrading.
864 * according to nicholas clark, calling SvPOK fixes this.
865 * But it doesn't fix it, so try another workaround, call SvPV_nolen
866 * and hope for the best.
867 * Damnit, SvPV_nolen still trips over yet another assertion. This
868 * assertion business is seriously broken, try yet another workaround
869 * for the broken -DDEBUGGING.
870 */
871 {
872#ifdef DEBUGGING
873 STRLEN offset = SvOK (string) ? sv_len (string) : 0;
874#else
875 STRLEN offset = SvCUR (string);
876#endif
877
878 if (offset > cbor->max_size && cbor->max_size) 1165 if (len > cbor->max_size && cbor->max_size)
879 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", 1166 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
880 (unsigned long)SvCUR (string), (unsigned long)cbor->max_size); 1167 (unsigned long)len, (unsigned long)cbor->max_size);
881 }
882
883 sv_utf8_downgrade (string, 0);
884 1168
885 dec.cbor = *cbor; 1169 dec.cbor = *cbor;
886 dec.cur = (U8 *)SvPVX (string); 1170 dec.cur = (U8 *)data;
887 dec.end = (U8 *)SvEND (string); 1171 dec.end = (U8 *)data + len;
888 dec.err = 0;
889 dec.depth = 0;
890 1172
891 sv = decode_sv (&dec); 1173 sv = decode_sv (&dec);
892 1174
893 if (offset_return) 1175 if (offset_return)
894 *offset_return = dec.cur; 1176 *offset_return = dec.cur;
897 if (dec.cur != dec.end && !dec.err) 1179 if (dec.cur != dec.end && !dec.err)
898 dec.err = "garbage after CBOR object"; 1180 dec.err = "garbage after CBOR object";
899 1181
900 if (dec.err) 1182 if (dec.err)
901 { 1183 {
1184 if (dec.shareable)
1185 {
1186 // need to break cyclic links, which whould all be in shareable
1187 int i;
1188 SV **svp;
1189
1190 for (i = av_len (dec.shareable) + 1; i--; )
1191 if ((svp = av_fetch (dec.shareable, i, 0)))
1192 sv_setsv (*svp, &PL_sv_undef);
1193 }
1194
902 SvREFCNT_dec (sv); 1195 SvREFCNT_dec (sv);
903 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)SvPVX (string), (int)(uint8_t)*dec.cur); 1196 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
904 } 1197 }
905 1198
906 sv = sv_2mortal (sv); 1199 sv = sv_2mortal (sv);
907 1200
908 return sv; 1201 return sv;
909} 1202}
910 1203
1204/////////////////////////////////////////////////////////////////////////////
1205// incremental parser
1206
1207#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1208
1209// returns 0 for notyet, 1 for success or error
1210static int
1211incr_parse (CBOR *self, SV *cborstr)
1212{
1213 STRLEN cur;
1214 SvPV (cborstr, cur);
1215
1216 while (ecb_expect_true (self->incr_need <= cur))
1217 {
1218 // table of integer count bytes
1219 static I8 incr_len[MINOR_MASK + 1] = {
1220 0, 0, 0, 0, 0, 0, 0, 0,
1221 0, 0, 0, 0, 0, 0, 0, 0,
1222 0, 0, 0, 0, 0, 0, 0, 0,
1223 1, 2, 4, 8,-1,-1,-1,-2
1224 };
1225
1226 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1227 U8 m = *p & MINOR_MASK;
1228 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1229 I8 ilen = incr_len[m];
1230
1231 self->incr_need = self->incr_pos + 1;
1232
1233 if (ecb_expect_false (ilen < 0))
1234 {
1235 if (m != MINOR_INDEF)
1236 return 1; // error
1237
1238 if (*p == (MAJOR_MISC | MINOR_INDEF))
1239 {
1240 if (count >= 0)
1241 return 1; // error
1242
1243 count = 1;
1244 }
1245 else
1246 {
1247 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1248 count = -1;
1249 }
1250 }
1251 else
1252 {
1253 self->incr_need += ilen;
1254 if (ecb_expect_false (self->incr_need > cur))
1255 return 0;
1256
1257 int major = *p >> MAJOR_SHIFT;
1258
1259 switch (major)
1260 {
1261 case MAJOR_BYTES >> MAJOR_SHIFT:
1262 case MAJOR_TEXT >> MAJOR_SHIFT:
1263 case MAJOR_ARRAY >> MAJOR_SHIFT:
1264 case MAJOR_MAP >> MAJOR_SHIFT:
1265 {
1266 UV len;
1267
1268 if (ecb_expect_false (ilen))
1269 {
1270 len = 0;
1271
1272 do {
1273 len = (len << 8) | *++p;
1274 } while (--ilen);
1275 }
1276 else
1277 len = m;
1278
1279 switch (major)
1280 {
1281 case MAJOR_BYTES >> MAJOR_SHIFT:
1282 case MAJOR_TEXT >> MAJOR_SHIFT:
1283 self->incr_need += len;
1284 if (ecb_expect_false (self->incr_need > cur))
1285 return 0;
1286
1287 break;
1288
1289 case MAJOR_MAP >> MAJOR_SHIFT:
1290 len <<= 1;
1291 case MAJOR_ARRAY >> MAJOR_SHIFT:
1292 if (len)
1293 {
1294 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1295 count = len + 1;
1296 }
1297 break;
1298 }
1299 }
1300 }
1301 }
1302
1303 self->incr_pos = self->incr_need;
1304
1305 if (count > 0)
1306 {
1307 while (!--count)
1308 {
1309 if (!AvFILLp (self->incr_count))
1310 return 1; // done
1311
1312 SvREFCNT_dec_NN (av_pop (self->incr_count));
1313 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1314 }
1315
1316 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1317 }
1318 }
1319
1320 return 0;
1321}
1322
1323
911///////////////////////////////////////////////////////////////////////////// 1324/////////////////////////////////////////////////////////////////////////////
912// XS interface functions 1325// XS interface functions
913 1326
914MODULE = CBOR::XS PACKAGE = CBOR::XS 1327MODULE = CBOR::XS PACKAGE = CBOR::XS
915 1328
922 types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1); 1335 types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
923 1336
924 types_true = get_bool ("Types::Serialiser::true" ); 1337 types_true = get_bool ("Types::Serialiser::true" );
925 types_false = get_bool ("Types::Serialiser::false"); 1338 types_false = get_bool ("Types::Serialiser::false");
926 types_error = get_bool ("Types::Serialiser::error"); 1339 types_error = get_bool ("Types::Serialiser::error");
1340
1341 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
927 1342
928 sv_cbor = newSVpv ("CBOR", 0); 1343 sv_cbor = newSVpv ("CBOR", 0);
929 SvREADONLY_on (sv_cbor); 1344 SvREADONLY_on (sv_cbor);
930} 1345}
931 1346
952 1367
953void shrink (CBOR *self, int enable = 1) 1368void shrink (CBOR *self, int enable = 1)
954 ALIAS: 1369 ALIAS:
955 shrink = F_SHRINK 1370 shrink = F_SHRINK
956 allow_unknown = F_ALLOW_UNKNOWN 1371 allow_unknown = F_ALLOW_UNKNOWN
1372 allow_sharing = F_ALLOW_SHARING
1373 allow_cycles = F_ALLOW_CYCLES
1374 pack_strings = F_PACK_STRINGS
1375 validate_utf8 = F_VALIDATE_UTF8
957 PPCODE: 1376 PPCODE:
958{ 1377{
959 if (enable) 1378 if (enable)
960 self->flags |= ix; 1379 self->flags |= ix;
961 else 1380 else
966 1385
967void get_shrink (CBOR *self) 1386void get_shrink (CBOR *self)
968 ALIAS: 1387 ALIAS:
969 get_shrink = F_SHRINK 1388 get_shrink = F_SHRINK
970 get_allow_unknown = F_ALLOW_UNKNOWN 1389 get_allow_unknown = F_ALLOW_UNKNOWN
1390 get_allow_sharing = F_ALLOW_SHARING
1391 get_allow_cycles = F_ALLOW_CYCLES
1392 get_pack_strings = F_PACK_STRINGS
1393 get_validate_utf8 = F_VALIDATE_UTF8
971 PPCODE: 1394 PPCODE:
972 XPUSHs (boolSV (self->flags & ix)); 1395 XPUSHs (boolSV (self->flags & ix));
973 1396
974void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1397void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
975 PPCODE: 1398 PPCODE:
991 CODE: 1414 CODE:
992 RETVAL = self->max_size; 1415 RETVAL = self->max_size;
993 OUTPUT: 1416 OUTPUT:
994 RETVAL 1417 RETVAL
995 1418
996#if 0 //TODO 1419void filter (CBOR *self, SV *filter = 0)
997
998void filter_cbor_object (CBOR *self, SV *cb = &PL_sv_undef)
999 PPCODE: 1420 PPCODE:
1000{
1001 SvREFCNT_dec (self->cb_object); 1421 SvREFCNT_dec (self->filter);
1002 self->cb_object = SvOK (cb) ? newSVsv (cb) : 0; 1422 self->filter = filter ? newSVsv (filter) : filter;
1003
1004 XPUSHs (ST (0)); 1423 XPUSHs (ST (0));
1005}
1006 1424
1007void filter_cbor_single_key_object (CBOR *self, SV *key, SV *cb = &PL_sv_undef) 1425SV *get_filter (CBOR *self)
1008 PPCODE: 1426 CODE:
1009{ 1427 RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1010 if (!self->cb_sk_object) 1428 OUTPUT:
1011 self->cb_sk_object = newHV (); 1429 RETVAL
1012
1013 if (SvOK (cb))
1014 hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0);
1015 else
1016 {
1017 hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0);
1018
1019 if (!HvKEYS (self->cb_sk_object))
1020 {
1021 SvREFCNT_dec (self->cb_sk_object);
1022 self->cb_sk_object = 0;
1023 }
1024 }
1025
1026 XPUSHs (ST (0));
1027}
1028
1029#endif
1030 1430
1031void encode (CBOR *self, SV *scalar) 1431void encode (CBOR *self, SV *scalar)
1032 PPCODE: 1432 PPCODE:
1033 PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN; 1433 PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1034 XPUSHs (scalar); 1434 XPUSHs (scalar);
1047 EXTEND (SP, 2); 1447 EXTEND (SP, 2);
1048 PUSHs (sv); 1448 PUSHs (sv);
1049 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1449 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1050} 1450}
1051 1451
1052#if 0 1452void incr_parse (CBOR *self, SV *cborstr, int chop = 0)
1453 PPCODE:
1454{
1455 if (SvUTF8 (cborstr))
1456 sv_utf8_downgrade (cborstr, 0);
1457
1458 if (!self->incr_count)
1459 {
1460 self->incr_count = newAV ();
1461 self->incr_pos = 0;
1462 self->incr_need = 1;
1463
1464 av_push (self->incr_count, newSViv (1));
1465 }
1466
1467 for (;;)
1468 {
1469 if (!incr_parse (self, cborstr))
1470 {
1471 if (self->incr_need > self->max_size && self->max_size)
1472 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1473 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1474
1475 break;
1476 }
1477
1478 SV *sv;
1479 char *offset;
1480
1481 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1482 XPUSHs (sv);
1483
1484 av_clear (self->incr_count);
1485 av_push (self->incr_count, newSViv (1));
1486
1487 if (chop)
1488 {
1489 self->incr_pos = 0;
1490 sv_chop (cborstr, offset);
1491 }
1492 else
1493 self->incr_pos = offset - SvPVX (cborstr);
1494
1495 self->incr_need = self->incr_pos + 1;
1496 }
1497}
1498
1499void incr_reset (CBOR *self)
1500 CODE:
1501{
1502 SvREFCNT_dec (self->incr_count);
1503 self->incr_count = 0;
1504}
1053 1505
1054void DESTROY (CBOR *self) 1506void DESTROY (CBOR *self)
1055 CODE: 1507 PPCODE:
1056 SvREFCNT_dec (self->cb_sk_object); 1508 cbor_free (self);
1057 SvREFCNT_dec (self->cb_object);
1058
1059#endif
1060 1509
1061PROTOTYPES: ENABLE 1510PROTOTYPES: ENABLE
1062 1511
1063void encode_cbor (SV *scalar) 1512void encode_cbor (SV *scalar)
1513 ALIAS:
1514 encode_cbor = 0
1515 encode_cbor_sharing = F_ALLOW_SHARING
1064 PPCODE: 1516 PPCODE:
1065{ 1517{
1066 CBOR cbor; 1518 CBOR cbor;
1067 cbor_init (&cbor); 1519 cbor_init (&cbor);
1520 cbor.flags |= ix;
1068 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN; 1521 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1069 XPUSHs (scalar); 1522 XPUSHs (scalar);
1070} 1523}
1071 1524
1072void decode_cbor (SV *cborstr) 1525void decode_cbor (SV *cborstr)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines