ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.65
Committed: Thu Nov 15 19:52:41 2018 UTC (5 years, 6 months ago) by root
Branch: MAIN
CVS Tags: rel-1_71
Changes since 1.64: +10 -5 lines
Log Message:
1.71

File Contents

# Content
1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #include <assert.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <limits.h>
10 #include <float.h>
11 #include <inttypes.h>
12
13 #define ECB_NO_THREADS 1
14 #include "ecb.h"
15
16 // compatibility with perl <5.18
17 #ifndef HvNAMELEN_get
18 # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19 #endif
20 #ifndef HvNAMELEN
21 # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22 #endif
23 #ifndef HvNAMEUTF8
24 # define HvNAMEUTF8(hv) 0
25 #endif
26 #ifndef SvREFCNT_inc_NN
27 # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28 #endif
29 #ifndef SvREFCNT_dec_NN
30 # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31 #endif
32
33 // known major and minor types
34 enum cbor_type
35 {
36 MAJOR_SHIFT = 5,
37 MINOR_MASK = 0x1f,
38
39 MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40 MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41 MAJOR_BYTES = 2 << MAJOR_SHIFT,
42 MAJOR_TEXT = 3 << MAJOR_SHIFT,
43 MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44 MAJOR_MAP = 5 << MAJOR_SHIFT,
45 MAJOR_TAG = 6 << MAJOR_SHIFT,
46 MAJOR_MISC = 7 << MAJOR_SHIFT,
47
48 // INT/STRING/ARRAY/MAP subtypes
49 LENGTH_EXT1 = 24,
50 LENGTH_EXT2 = 25,
51 LENGTH_EXT4 = 26,
52 LENGTH_EXT8 = 27,
53
54 // SIMPLE types (effectively MISC subtypes)
55 SIMPLE_FALSE = 20,
56 SIMPLE_TRUE = 21,
57 SIMPLE_NULL = 22,
58 SIMPLE_UNDEF = 23,
59
60 // MISC subtype (unused)
61 MISC_EXT1 = 24,
62 MISC_FLOAT16 = 25,
63 MISC_FLOAT32 = 26,
64 MISC_FLOAT64 = 27,
65
66 // BYTES/TEXT/ARRAY/MAP
67 MINOR_INDEF = 31,
68 };
69
70 // known tags
71 enum cbor_tag
72 {
73 // extensions
74 CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75 CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76 CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77 CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78 CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79 CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81
82 // rfc7049
83 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85 CBOR_TAG_POS_BIGNUM = 2, // byte string
86 CBOR_TAG_NEG_BIGNUM = 3, // byte string
87 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88 CBOR_TAG_BIGFLOAT = 5, // array
89
90 CBOR_TAG_CONV_B64U = 21, // base64url, any
91 CBOR_TAG_CONV_B64 = 22, // base64, any
92 CBOR_TAG_CONV_HEX = 23, // base16, any
93 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94
95 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100
101 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 };
103
104 #define F_SHRINK 0x00000001UL
105 #define F_ALLOW_UNKNOWN 0x00000002UL
106 #define F_ALLOW_SHARING 0x00000004UL
107 #define F_ALLOW_CYCLES 0x00000008UL
108 #define F_FORBID_OBJECTS 0x00000010UL
109 #define F_PACK_STRINGS 0x00000020UL
110 #define F_TEXT_KEYS 0x00000040UL
111 #define F_TEXT_STRINGS 0x00000080UL
112 #define F_VALIDATE_UTF8 0x00000100UL
113
114 #define INIT_SIZE 32 // initial scalar size to be allocated
115
116 #define SB do {
117 #define SE } while (0)
118
119 #define IN_RANGE_INC(type,val,beg,end) \
120 ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
121 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
122
123 #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
124
125 #ifdef USE_ITHREADS
126 # define CBOR_SLOW 1
127 # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
128 #else
129 # define CBOR_SLOW 0
130 # define CBOR_STASH cbor_stash
131 #endif
132
133 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
134 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
135
136 typedef struct {
137 U32 flags;
138 U32 max_depth;
139 STRLEN max_size;
140 SV *filter;
141
142 // for the incremental parser
143 STRLEN incr_pos; // the current offset into the text
144 STRLEN incr_need; // minimum bytes needed to decode
145 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
146 } CBOR;
147
148 ecb_inline void
149 cbor_init (CBOR *cbor)
150 {
151 Zero (cbor, 1, CBOR);
152 cbor->max_depth = 512;
153 }
154
155 ecb_inline void
156 cbor_free (CBOR *cbor)
157 {
158 SvREFCNT_dec (cbor->filter);
159 SvREFCNT_dec (cbor->incr_count);
160 }
161
162 /////////////////////////////////////////////////////////////////////////////
163 // utility functions
164
165 ecb_inline SV *
166 get_bool (const char *name)
167 {
168 SV *sv = get_sv (name, 1);
169
170 SvREADONLY_on (sv);
171 SvREADONLY_on (SvRV (sv));
172
173 return sv;
174 }
175
176 ecb_inline void
177 shrink (SV *sv)
178 {
179 sv_utf8_downgrade (sv, 1);
180
181 if (SvLEN (sv) > SvCUR (sv) + 1)
182 {
183 #ifdef SvPV_shrink_to_cur
184 SvPV_shrink_to_cur (sv);
185 #elif defined (SvPV_renew)
186 SvPV_renew (sv, SvCUR (sv) + 1);
187 #endif
188 }
189 }
190
191 // minimum length of a string to be registered for stringref
192 ecb_inline int
193 minimum_string_length (UV idx)
194 {
195 return idx <= 23 ? 3
196 : idx <= 0xffU ? 4
197 : idx <= 0xffffU ? 5
198 : idx <= 0xffffffffU ? 7
199 : 11;
200 }
201
202 /////////////////////////////////////////////////////////////////////////////
203 // encoder
204
205 // structure used for encoding CBOR
206 typedef struct
207 {
208 char *cur; // SvPVX (sv) + current output position
209 char *end; // SvEND (sv)
210 SV *sv; // result scalar
211 CBOR cbor;
212 U32 depth; // recursion level
213 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
214 UV stringref_idx;
215 HV *shareable; // ptr => index, or 0
216 UV shareable_idx;
217 } enc_t;
218
219 ecb_inline void
220 need (enc_t *enc, STRLEN len)
221 {
222 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
223 {
224 STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
225 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
226 enc->cur = SvPVX (enc->sv) + cur;
227 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
228 }
229 }
230
231 ecb_inline void
232 encode_ch (enc_t *enc, char ch)
233 {
234 need (enc, 1);
235 *enc->cur++ = ch;
236 }
237
238 static void
239 encode_uint (enc_t *enc, int major, UV len)
240 {
241 need (enc, 9);
242
243 if (ecb_expect_true (len < LENGTH_EXT1))
244 *enc->cur++ = major | len;
245 else if (ecb_expect_true (len <= 0xffU))
246 {
247 *enc->cur++ = major | LENGTH_EXT1;
248 *enc->cur++ = len;
249 }
250 else if (len <= 0xffffU)
251 {
252 *enc->cur++ = major | LENGTH_EXT2;
253 *enc->cur++ = len >> 8;
254 *enc->cur++ = len;
255 }
256 else if (len <= 0xffffffffU)
257 {
258 *enc->cur++ = major | LENGTH_EXT4;
259 *enc->cur++ = len >> 24;
260 *enc->cur++ = len >> 16;
261 *enc->cur++ = len >> 8;
262 *enc->cur++ = len;
263 }
264 else
265 {
266 *enc->cur++ = major | LENGTH_EXT8;
267 *enc->cur++ = len >> 56;
268 *enc->cur++ = len >> 48;
269 *enc->cur++ = len >> 40;
270 *enc->cur++ = len >> 32;
271 *enc->cur++ = len >> 24;
272 *enc->cur++ = len >> 16;
273 *enc->cur++ = len >> 8;
274 *enc->cur++ = len;
275 }
276 }
277
278 ecb_inline void
279 encode_tag (enc_t *enc, UV tag)
280 {
281 encode_uint (enc, MAJOR_TAG, tag);
282 }
283
284 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
285 ecb_noinline static void
286 encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
287 {
288 STRLEN ulen = len;
289 U8 *p, *pend = (U8 *)str + len;
290
291 for (p = (U8 *)str; p < pend; ++p)
292 ulen += *p >> 7; // count set high bits
293
294 encode_uint (enc, MAJOR_TEXT, ulen);
295
296 need (enc, ulen);
297 for (p = (U8 *)str; p < pend; ++p)
298 if (*p < 0x80)
299 *enc->cur++ = *p;
300 else
301 {
302 *enc->cur++ = 0xc0 + (*p >> 6);
303 *enc->cur++ = 0x80 + (*p & 63);
304 }
305 }
306
307 ecb_inline void
308 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
309 {
310 if (ecb_expect_false (upgrade_utf8))
311 if (!utf8)
312 {
313 encode_str_utf8 (enc, utf8, str, len);
314 return;
315 }
316
317 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
318 need (enc, len);
319 memcpy (enc->cur, str, len);
320 enc->cur += len;
321 }
322
323 ecb_inline void
324 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
325 {
326 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
327 {
328 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
329
330 if (SvOK (*svp))
331 {
332 // already registered, use stringref
333 encode_tag (enc, CBOR_TAG_STRINGREF);
334 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
335 return;
336 }
337 else if (len >= minimum_string_length (enc->stringref_idx))
338 {
339 // register only
340 sv_setuv (*svp, enc->stringref_idx);
341 ++enc->stringref_idx;
342 }
343 }
344
345 encode_str (enc, upgrade_utf8, utf8, str, len);
346 }
347
348 static void encode_sv (enc_t *enc, SV *sv);
349
350 static void
351 encode_av (enc_t *enc, AV *av)
352 {
353 int i, len = av_len (av);
354
355 if (enc->depth >= enc->cbor.max_depth)
356 croak (ERR_NESTING_EXCEEDED);
357
358 ++enc->depth;
359
360 encode_uint (enc, MAJOR_ARRAY, len + 1);
361
362 if (ecb_expect_false (SvMAGICAL (av)))
363 for (i = 0; i <= len; ++i)
364 {
365 SV **svp = av_fetch (av, i, 0);
366 encode_sv (enc, svp ? *svp : &PL_sv_undef);
367 }
368 else
369 for (i = 0; i <= len; ++i)
370 {
371 SV *sv = AvARRAY (av)[i];
372 encode_sv (enc, sv ? sv : &PL_sv_undef);
373 }
374
375 --enc->depth;
376 }
377
378 static void
379 encode_hv (enc_t *enc, HV *hv)
380 {
381 HE *he;
382
383 if (enc->depth >= enc->cbor.max_depth)
384 croak (ERR_NESTING_EXCEEDED);
385
386 ++enc->depth;
387
388 int pairs = hv_iterinit (hv);
389 int mg = SvMAGICAL (hv);
390
391 if (ecb_expect_false (mg))
392 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
393 else
394 encode_uint (enc, MAJOR_MAP, pairs);
395
396 while ((he = hv_iternext (hv)))
397 {
398 if (HeKLEN (he) == HEf_SVKEY)
399 encode_sv (enc, HeSVKEY (he));
400 else
401 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
402
403 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
404 }
405
406 if (ecb_expect_false (mg))
407 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
408
409 --enc->depth;
410 }
411
412 // encode objects, arrays and special \0=false and \1=true values.
413 static void
414 encode_rv (enc_t *enc, SV *sv)
415 {
416 SvGETMAGIC (sv);
417
418 svtype svt = SvTYPE (sv);
419
420 if (ecb_expect_false (SvOBJECT (sv)))
421 {
422 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
423 ? types_boolean_stash
424 : gv_stashpv ("Types::Serialiser::Boolean", 1);
425 HV *error_stash = !CBOR_SLOW || types_error_stash
426 ? types_error_stash
427 : gv_stashpv ("Types::Serialiser::Error", 1);
428 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
429 ? cbor_tagged_stash
430 : gv_stashpv ("CBOR::XS::Tagged" , 1);
431
432 HV *stash = SvSTASH (sv);
433
434 if (stash == boolean_stash)
435 {
436 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
437 return;
438 }
439 else if (stash == error_stash)
440 {
441 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
442 return;
443 }
444 else if (stash == tagged_stash)
445 {
446 if (svt != SVt_PVAV)
447 croak ("encountered CBOR::XS::Tagged object that isn't an array");
448
449 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
450 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
451
452 return;
453 }
454 }
455
456 if (ecb_expect_false (SvREFCNT (sv) > 1)
457 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
458 {
459 if (ecb_expect_false (!enc->shareable))
460 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
461
462 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
463
464 if (SvOK (*svp))
465 {
466 encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
467 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
468 return;
469 }
470 else
471 {
472 sv_setuv (*svp, enc->shareable_idx);
473 ++enc->shareable_idx;
474 encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
475 }
476 }
477
478 if (ecb_expect_false (SvOBJECT (sv)))
479 {
480 HV *stash = SvSTASH (sv);
481 GV *method;
482
483 if (enc->cbor.flags & F_FORBID_OBJECTS)
484 croak ("encountered object '%s', but forbid_objects is enabled",
485 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
486 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
487 {
488 dSP;
489
490 ENTER; SAVETMPS;
491 PUSHMARK (SP);
492 // we re-bless the reference to get overload and other niceties right
493 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
494
495 PUTBACK;
496 // G_SCALAR ensures that return value is 1
497 call_sv ((SV *)GvCV (method), G_SCALAR);
498 SPAGAIN;
499
500 // catch this surprisingly common error
501 if (SvROK (TOPs) && SvRV (TOPs) == sv)
502 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
503
504 encode_sv (enc, POPs);
505
506 PUTBACK;
507
508 FREETMPS; LEAVE;
509 }
510 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
511 {
512 dSP;
513
514 ENTER; SAVETMPS;
515 PUSHMARK (SP);
516 EXTEND (SP, 2);
517 // we re-bless the reference to get overload and other niceties right
518 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
519 PUSHs (sv_cbor);
520
521 PUTBACK;
522 int count = call_sv ((SV *)GvCV (method), G_ARRAY);
523 SPAGAIN;
524
525 // catch this surprisingly common error
526 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
527 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
528
529 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
530 encode_uint (enc, MAJOR_ARRAY, count + 1);
531 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
532
533 {
534 int i;
535
536 for (i = 0; i < count; ++i)
537 encode_sv (enc, SP[i + 1 - count]);
538
539 SP -= count;
540 }
541
542 PUTBACK;
543
544 FREETMPS; LEAVE;
545 }
546 else
547 croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
548 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
549 }
550 else if (svt == SVt_PVHV)
551 encode_hv (enc, (HV *)sv);
552 else if (svt == SVt_PVAV)
553 encode_av (enc, (AV *)sv);
554 else
555 {
556 encode_tag (enc, CBOR_TAG_INDIRECTION);
557 encode_sv (enc, sv);
558 }
559 }
560
561 static void
562 encode_nv (enc_t *enc, SV *sv)
563 {
564 double nv = SvNVX (sv);
565
566 need (enc, 9);
567
568 if (ecb_expect_false (nv == (NV)(U32)nv))
569 encode_uint (enc, MAJOR_POS_INT, (U32)nv);
570 //TODO: maybe I32?
571 else if (ecb_expect_false (nv == (float)nv))
572 {
573 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
574
575 uint32_t fp = ecb_float_to_binary32 (nv);
576
577 if (!ecb_big_endian ())
578 fp = ecb_bswap32 (fp);
579
580 memcpy (enc->cur, &fp, 4);
581 enc->cur += 4;
582 }
583 else
584 {
585 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
586
587 uint64_t fp = ecb_double_to_binary64 (nv);
588
589 if (!ecb_big_endian ())
590 fp = ecb_bswap64 (fp);
591
592 memcpy (enc->cur, &fp, 8);
593 enc->cur += 8;
594 }
595 }
596
597 static void
598 encode_sv (enc_t *enc, SV *sv)
599 {
600 SvGETMAGIC (sv);
601
602 if (SvPOKp (sv))
603 {
604 STRLEN len;
605 char *str = SvPV (sv, len);
606 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
607 }
608 else if (SvNOKp (sv))
609 encode_nv (enc, sv);
610 else if (SvIOKp (sv))
611 {
612 if (SvIsUV (sv))
613 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
614 else if (SvIVX (sv) >= 0)
615 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
616 else
617 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
618 }
619 else if (SvROK (sv))
620 encode_rv (enc, SvRV (sv));
621 else if (!SvOK (sv))
622 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
623 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
624 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
625 else
626 croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
627 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
628 }
629
630 static SV *
631 encode_cbor (SV *scalar, CBOR *cbor)
632 {
633 enc_t enc = { 0 };
634
635 enc.cbor = *cbor;
636 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
637 enc.cur = SvPVX (enc.sv);
638 enc.end = SvEND (enc.sv);
639
640 SvPOK_only (enc.sv);
641
642 if (cbor->flags & F_PACK_STRINGS)
643 {
644 encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
645 enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
646 enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
647 }
648
649 encode_sv (&enc, scalar);
650
651 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
652 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
653
654 if (enc.cbor.flags & F_SHRINK)
655 shrink (enc.sv);
656
657 return enc.sv;
658 }
659
660 /////////////////////////////////////////////////////////////////////////////
661 // decoder
662
663 // structure used for decoding CBOR
664 typedef struct
665 {
666 U8 *cur; // current parser pointer
667 U8 *end; // end of input string
668 const char *err; // parse error, if != 0
669 CBOR cbor;
670 U32 depth; // recursion depth
671 U32 maxdepth; // recursion depth limit
672 AV *shareable;
673 AV *stringref;
674 SV *decode_tagged;
675 SV *err_sv; // optional sv for error, needs to be freed
676 } dec_t;
677
678 // set dec->err to ERRSV
679 ecb_cold static void
680 err_errsv (dec_t *dec)
681 {
682 if (!dec->err)
683 {
684 dec->err_sv = newSVsv (ERRSV);
685
686 // chop off the trailing \n
687 SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
688 *SvEND (dec->err_sv) = 0;
689
690 dec->err = SvPVutf8_nolen (dec->err_sv);
691 }
692 }
693
694 // the following functions are used to reduce code size and help the compiler to optimise
695 ecb_cold static void
696 err_set (dec_t *dec, const char *reason)
697 {
698 if (!dec->err)
699 dec->err = reason;
700 }
701
702 ecb_cold static void
703 err_unexpected_end (dec_t *dec)
704 {
705 err_set (dec, "unexpected end of CBOR data");
706 }
707
708 #define ERR_DO(do) SB do; goto fail; SE
709 #define ERR(reason) ERR_DO (err_set (dec, reason))
710 #define ERR_ERRSV ERR_DO (err_errsv (dec))
711
712 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
713
714 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
715 #define DEC_DEC_DEPTH --dec->depth
716
717 static UV
718 decode_uint (dec_t *dec)
719 {
720 U8 m = *dec->cur & MINOR_MASK;
721 ++dec->cur;
722
723 if (ecb_expect_true (m < LENGTH_EXT1))
724 return m;
725 else if (ecb_expect_true (m == LENGTH_EXT1))
726 {
727 WANT (1);
728 dec->cur += 1;
729 return dec->cur[-1];
730 }
731 else if (ecb_expect_true (m == LENGTH_EXT2))
732 {
733 WANT (2);
734 dec->cur += 2;
735 return (((UV)dec->cur[-2]) << 8)
736 | ((UV)dec->cur[-1]);
737 }
738 else if (ecb_expect_true (m == LENGTH_EXT4))
739 {
740 WANT (4);
741 dec->cur += 4;
742 return (((UV)dec->cur[-4]) << 24)
743 | (((UV)dec->cur[-3]) << 16)
744 | (((UV)dec->cur[-2]) << 8)
745 | ((UV)dec->cur[-1]);
746 }
747 else if (ecb_expect_true (m == LENGTH_EXT8))
748 {
749 WANT (8);
750 dec->cur += 8;
751
752 return
753 #if UVSIZE < 8
754 0
755 #else
756 (((UV)dec->cur[-8]) << 56)
757 | (((UV)dec->cur[-7]) << 48)
758 | (((UV)dec->cur[-6]) << 40)
759 | (((UV)dec->cur[-5]) << 32)
760 #endif
761 | (((UV)dec->cur[-4]) << 24)
762 | (((UV)dec->cur[-3]) << 16)
763 | (((UV)dec->cur[-2]) << 8)
764 | ((UV)dec->cur[-1]);
765 }
766 else
767 ERR ("corrupted CBOR data (unsupported integer minor encoding)");
768
769 fail:
770 return 0;
771 }
772
773 static SV *decode_sv (dec_t *dec);
774
775 static SV *
776 decode_av (dec_t *dec)
777 {
778 AV *av = newAV ();
779
780 DEC_INC_DEPTH;
781
782 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
783 {
784 ++dec->cur;
785
786 for (;;)
787 {
788 WANT (1);
789
790 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
791 {
792 ++dec->cur;
793 break;
794 }
795
796 av_push (av, decode_sv (dec));
797 }
798 }
799 else
800 {
801 UV i, len = decode_uint (dec);
802
803 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
804 av_fill (av, len - 1);
805
806 for (i = 0; i < len; ++i)
807 AvARRAY (av)[i] = decode_sv (dec);
808 }
809
810 DEC_DEC_DEPTH;
811 return newRV_noinc ((SV *)av);
812
813 fail:
814 SvREFCNT_dec_NN (av);
815 DEC_DEC_DEPTH;
816 return &PL_sv_undef;
817 }
818
819 static void
820 decode_he (dec_t *dec, HV *hv)
821 {
822 // for speed reasons, we specialcase single-string
823 // byte or utf-8 strings as keys, but only when !stringref
824
825 if (ecb_expect_true (!dec->stringref))
826 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
827 {
828 STRLEN len = decode_uint (dec);
829 char *key = (char *)dec->cur;
830
831 WANT (len);
832 dec->cur += len;
833
834 hv_store (hv, key, len, decode_sv (dec), 0);
835
836 return;
837 }
838 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
839 {
840 STRLEN len = decode_uint (dec);
841 char *key = (char *)dec->cur;
842
843 WANT (len);
844 dec->cur += len;
845
846 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
847 if (!is_utf8_string (key, len))
848 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
849
850 hv_store (hv, key, -len, decode_sv (dec), 0);
851
852 return;
853 }
854
855 SV *k = decode_sv (dec);
856 SV *v = decode_sv (dec);
857
858 // we leak memory if uncaught exceptions are thrown by random magical
859 // methods, and this is hopefully the only place where it can happen,
860 // so if there is a chance of an exception, take the very slow path.
861 // since catching exceptions is "undocumented/internal/forbidden" by
862 // the new p5p powers, we need to call out to a perl function :/
863 if (ecb_expect_false (SvAMAGIC (k)))
864 {
865 dSP;
866
867 ENTER; SAVETMPS;
868 PUSHMARK (SP);
869 EXTEND (SP, 3);
870 PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
871 PUSHs (sv_2mortal (k));
872 PUSHs (sv_2mortal (v));
873
874 PUTBACK;
875 call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
876 SPAGAIN;
877
878 FREETMPS; LEAVE;
879
880 if (SvTRUE (ERRSV))
881 ERR_ERRSV;
882
883 return;
884 }
885
886 hv_store_ent (hv, k, v, 0);
887 SvREFCNT_dec_NN (k);
888
889 fail:
890 ;
891 }
892
893 static SV *
894 decode_hv (dec_t *dec)
895 {
896 HV *hv = newHV ();
897
898 DEC_INC_DEPTH;
899
900 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
901 {
902 ++dec->cur;
903
904 for (;;)
905 {
906 WANT (1);
907
908 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
909 {
910 ++dec->cur;
911 break;
912 }
913
914 decode_he (dec, hv);
915 }
916 }
917 else
918 {
919 UV pairs = decode_uint (dec);
920
921 WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
922
923 while (pairs--)
924 decode_he (dec, hv);
925 }
926
927 DEC_DEC_DEPTH;
928 return newRV_noinc ((SV *)hv);
929
930 fail:
931 SvREFCNT_dec_NN (hv);
932 DEC_DEC_DEPTH;
933 return &PL_sv_undef;
934 }
935
936 static SV *
937 decode_str (dec_t *dec, int utf8)
938 {
939 SV *sv = 0;
940
941 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
942 {
943 // indefinite length strings
944 ++dec->cur;
945
946 U8 major = *dec->cur & MAJOR_MISC;
947
948 sv = newSVpvn ("", 0);
949
950 for (;;)
951 {
952 WANT (1);
953
954 if ((*dec->cur - major) > LENGTH_EXT8)
955 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
956 {
957 ++dec->cur;
958 break;
959 }
960 else
961 ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
962
963 STRLEN len = decode_uint (dec);
964
965 WANT (len);
966 sv_catpvn (sv, dec->cur, len);
967 dec->cur += len;
968 }
969 }
970 else
971 {
972 STRLEN len = decode_uint (dec);
973
974 WANT (len);
975 sv = newSVpvn (dec->cur, len);
976 dec->cur += len;
977
978 if (ecb_expect_false (dec->stringref)
979 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
980 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
981 }
982
983 if (utf8)
984 {
985 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
986 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
987 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
988
989 SvUTF8_on (sv);
990 }
991
992 return sv;
993
994 fail:
995 SvREFCNT_dec (sv);
996 return &PL_sv_undef;
997 }
998
999 static SV *
1000 decode_tagged (dec_t *dec)
1001 {
1002 SV *sv = 0;
1003 UV tag = decode_uint (dec);
1004
1005 WANT (1);
1006
1007 switch (tag)
1008 {
1009 case CBOR_TAG_MAGIC:
1010 sv = decode_sv (dec);
1011 break;
1012
1013 case CBOR_TAG_INDIRECTION:
1014 sv = newRV_noinc (decode_sv (dec));
1015 break;
1016
1017 case CBOR_TAG_STRINGREF_NAMESPACE:
1018 {
1019 // do not use SAVETMPS/FREETMPS, as these will
1020 // erase mortalised caches, e.g. "shareable"
1021 ENTER;
1022
1023 SAVESPTR (dec->stringref);
1024 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1025
1026 sv = decode_sv (dec);
1027
1028 LEAVE;
1029 }
1030 break;
1031
1032 case CBOR_TAG_STRINGREF:
1033 {
1034 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1035 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1036
1037 UV idx = decode_uint (dec);
1038
1039 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1040 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1041
1042 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1043 }
1044 break;
1045
1046 case CBOR_TAG_VALUE_SHAREABLE:
1047 {
1048 if (ecb_expect_false (!dec->shareable))
1049 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1050
1051 if (dec->cbor.flags & F_ALLOW_CYCLES)
1052 {
1053 sv = newSV (0);
1054 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1055
1056 SV *osv = decode_sv (dec);
1057 sv_setsv (sv, osv);
1058 SvREFCNT_dec_NN (osv);
1059 }
1060 else
1061 {
1062 av_push (dec->shareable, &PL_sv_undef);
1063 int idx = AvFILLp (dec->shareable);
1064 sv = decode_sv (dec);
1065 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1066 }
1067 }
1068 break;
1069
1070 case CBOR_TAG_VALUE_SHAREDREF:
1071 {
1072 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1073 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1074
1075 UV idx = decode_uint (dec);
1076
1077 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1078 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1079
1080 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1081
1082 if (sv == &PL_sv_undef)
1083 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1084 }
1085 break;
1086
1087 case CBOR_TAG_PERL_OBJECT:
1088 {
1089 if (dec->cbor.flags & F_FORBID_OBJECTS)
1090 goto filter;
1091
1092 sv = decode_sv (dec);
1093
1094 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1095 ERR ("corrupted CBOR data (non-array perl object)");
1096
1097 AV *av = (AV *)SvRV (sv);
1098 int len = av_len (av) + 1;
1099 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1100
1101 if (!stash)
1102 ERR ("cannot decode perl-object (package does not exist)");
1103
1104 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1105
1106 if (!method)
1107 ERR ("cannot decode perl-object (package does not have a THAW method)");
1108
1109 dSP;
1110
1111 ENTER; SAVETMPS;
1112 PUSHMARK (SP);
1113 EXTEND (SP, len + 1);
1114 // we re-bless the reference to get overload and other niceties right
1115 PUSHs (*av_fetch (av, 0, 1));
1116 PUSHs (sv_cbor);
1117
1118 int i;
1119
1120 for (i = 1; i < len; ++i)
1121 PUSHs (*av_fetch (av, i, 1));
1122
1123 PUTBACK;
1124 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1125 SPAGAIN;
1126
1127 if (SvTRUE (ERRSV))
1128 {
1129 FREETMPS; LEAVE;
1130 ERR_ERRSV;
1131 }
1132
1133 SvREFCNT_dec_NN (sv);
1134 sv = SvREFCNT_inc (POPs);
1135
1136 PUTBACK;
1137
1138 FREETMPS; LEAVE;
1139 }
1140 break;
1141
1142 default:
1143 filter:
1144 {
1145 SV *tag_sv = newSVuv (tag);
1146
1147 sv = decode_sv (dec);
1148
1149 dSP;
1150 ENTER; SAVETMPS;
1151 PUSHMARK (SP);
1152 EXTEND (SP, 2);
1153 PUSHs (tag_sv);
1154 PUSHs (sv);
1155
1156 PUTBACK;
1157 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1158 SPAGAIN;
1159
1160 if (SvTRUE (ERRSV))
1161 {
1162 SvREFCNT_dec_NN (tag_sv);
1163 FREETMPS; LEAVE;
1164 ERR_ERRSV;
1165 }
1166
1167 if (count)
1168 {
1169 SvREFCNT_dec_NN (tag_sv);
1170 SvREFCNT_dec_NN (sv);
1171 sv = SvREFCNT_inc_NN (TOPs);
1172 SP -= count;
1173 }
1174 else
1175 {
1176 AV *av = newAV ();
1177 av_push (av, tag_sv);
1178 av_push (av, sv);
1179
1180 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1181 ? cbor_tagged_stash
1182 : gv_stashpv ("CBOR::XS::Tagged" , 1);
1183 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1184 }
1185
1186 PUTBACK;
1187
1188 FREETMPS; LEAVE;
1189 }
1190 break;
1191 }
1192
1193 return sv;
1194
1195 fail:
1196 SvREFCNT_dec (sv);
1197 return &PL_sv_undef;
1198 }
1199
1200 static SV *
1201 decode_sv (dec_t *dec)
1202 {
1203 WANT (1);
1204
1205 switch (*dec->cur >> MAJOR_SHIFT)
1206 {
1207 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1208 case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1209 case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1210 case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1211 case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1212 case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1213 case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1214
1215 case MAJOR_MISC >> MAJOR_SHIFT:
1216 switch (*dec->cur++ & MINOR_MASK)
1217 {
1218 case SIMPLE_FALSE:
1219 #if CBOR_SLOW
1220 types_false = get_bool ("Types::Serialiser::false");
1221 #endif
1222 return newSVsv (types_false);
1223 case SIMPLE_TRUE:
1224 #if CBOR_SLOW
1225 types_true = get_bool ("Types::Serialiser::true");
1226 #endif
1227 return newSVsv (types_true);
1228 case SIMPLE_NULL:
1229 return newSVsv (&PL_sv_undef);
1230 case SIMPLE_UNDEF:
1231 #if CBOR_SLOW
1232 types_error = get_bool ("Types::Serialiser::error");
1233 #endif
1234 return newSVsv (types_error);
1235
1236 case MISC_FLOAT16:
1237 {
1238 WANT (2);
1239
1240 uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1241 dec->cur += 2;
1242
1243 return newSVnv (ecb_binary16_to_float (fp));
1244 }
1245
1246 case MISC_FLOAT32:
1247 {
1248 uint32_t fp;
1249 WANT (4);
1250 memcpy (&fp, dec->cur, 4);
1251 dec->cur += 4;
1252
1253 if (!ecb_big_endian ())
1254 fp = ecb_bswap32 (fp);
1255
1256 return newSVnv (ecb_binary32_to_float (fp));
1257 }
1258
1259 case MISC_FLOAT64:
1260 {
1261 uint64_t fp;
1262 WANT (8);
1263 memcpy (&fp, dec->cur, 8);
1264 dec->cur += 8;
1265
1266 if (!ecb_big_endian ())
1267 fp = ecb_bswap64 (fp);
1268
1269 return newSVnv (ecb_binary64_to_double (fp));
1270 }
1271
1272 // 0..19 unassigned simple
1273 // 24 reserved + unassigned simple (reserved values are not encodable)
1274 // 28-30 unassigned misc
1275 // 31 break code
1276 default:
1277 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1278 }
1279
1280 break;
1281 }
1282
1283 fail:
1284 return &PL_sv_undef;
1285 }
1286
1287 static SV *
1288 decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1289 {
1290 dec_t dec = { 0 };
1291 SV *sv;
1292 STRLEN len;
1293 char *data = SvPVbyte (string, len);
1294
1295 if (len > cbor->max_size && cbor->max_size)
1296 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1297 (unsigned long)len, (unsigned long)cbor->max_size);
1298
1299 dec.cbor = *cbor;
1300 dec.cur = (U8 *)data;
1301 dec.end = (U8 *)data + len;
1302
1303 sv = decode_sv (&dec);
1304
1305 if (offset_return)
1306 *offset_return = dec.cur;
1307
1308 if (!(offset_return || !sv))
1309 if (dec.cur != dec.end && !dec.err)
1310 dec.err = "garbage after CBOR object";
1311
1312 if (dec.err)
1313 {
1314 if (dec.shareable)
1315 {
1316 // need to break cyclic links, which would all be in shareable
1317 int i;
1318 SV **svp;
1319
1320 for (i = av_len (dec.shareable) + 1; i--; )
1321 if ((svp = av_fetch (dec.shareable, i, 0)))
1322 sv_setsv (*svp, &PL_sv_undef);
1323 }
1324
1325 SvREFCNT_dec_NN (sv);
1326
1327 if (dec.err_sv)
1328 sv_2mortal (dec.err_sv);
1329
1330 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1331 }
1332
1333 sv = sv_2mortal (sv);
1334
1335 return sv;
1336 }
1337
1338 /////////////////////////////////////////////////////////////////////////////
1339 // incremental parser
1340
1341 #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1342
1343 // returns 0 for notyet, 1 for success or error
1344 static int
1345 incr_parse (CBOR *self, SV *cborstr)
1346 {
1347 STRLEN cur;
1348 SvPV (cborstr, cur);
1349
1350 while (ecb_expect_true (self->incr_need <= cur))
1351 {
1352 // table of integer count bytes
1353 static I8 incr_len[MINOR_MASK + 1] = {
1354 0, 0, 0, 0, 0, 0, 0, 0,
1355 0, 0, 0, 0, 0, 0, 0, 0,
1356 0, 0, 0, 0, 0, 0, 0, 0,
1357 1, 2, 4, 8,-1,-1,-1,-2
1358 };
1359
1360 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1361 U8 m = *p & MINOR_MASK;
1362 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1363 I8 ilen = incr_len[m];
1364
1365 self->incr_need = self->incr_pos + 1;
1366
1367 if (ecb_expect_false (ilen < 0))
1368 {
1369 if (m != MINOR_INDEF)
1370 return 1; // error
1371
1372 if (*p == (MAJOR_MISC | MINOR_INDEF))
1373 {
1374 if (count >= 0)
1375 return 1; // error
1376
1377 count = 1;
1378 }
1379 else
1380 {
1381 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1382 count = -1;
1383 }
1384 }
1385 else
1386 {
1387 self->incr_need += ilen;
1388 if (ecb_expect_false (self->incr_need > cur))
1389 return 0;
1390
1391 int major = *p >> MAJOR_SHIFT;
1392
1393 switch (major)
1394 {
1395 case MAJOR_TAG >> MAJOR_SHIFT:
1396 ++count; // tags merely prefix another value
1397 break;
1398
1399 case MAJOR_BYTES >> MAJOR_SHIFT:
1400 case MAJOR_TEXT >> MAJOR_SHIFT:
1401 case MAJOR_ARRAY >> MAJOR_SHIFT:
1402 case MAJOR_MAP >> MAJOR_SHIFT:
1403 {
1404 UV len;
1405
1406 if (ecb_expect_false (ilen))
1407 {
1408 len = 0;
1409
1410 do {
1411 len = (len << 8) | *++p;
1412 } while (--ilen);
1413 }
1414 else
1415 len = m;
1416
1417 switch (major)
1418 {
1419 case MAJOR_BYTES >> MAJOR_SHIFT:
1420 case MAJOR_TEXT >> MAJOR_SHIFT:
1421 self->incr_need += len;
1422 if (ecb_expect_false (self->incr_need > cur))
1423 return 0;
1424
1425 break;
1426
1427 case MAJOR_MAP >> MAJOR_SHIFT:
1428 len <<= 1;
1429 case MAJOR_ARRAY >> MAJOR_SHIFT:
1430 if (len)
1431 {
1432 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1433 count = len + 1;
1434 }
1435 break;
1436 }
1437 }
1438 }
1439 }
1440
1441 self->incr_pos = self->incr_need;
1442
1443 if (count > 0)
1444 {
1445 while (!--count)
1446 {
1447 if (!AvFILLp (self->incr_count))
1448 return 1; // done
1449
1450 SvREFCNT_dec_NN (av_pop (self->incr_count));
1451 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1452 }
1453
1454 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1455 }
1456 }
1457
1458 return 0;
1459 }
1460
1461
1462 /////////////////////////////////////////////////////////////////////////////
1463 // XS interface functions
1464
1465 MODULE = CBOR::XS PACKAGE = CBOR::XS
1466
1467 BOOT:
1468 {
1469 cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1470 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1471
1472 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1473 types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1474
1475 types_true = get_bool ("Types::Serialiser::true" );
1476 types_false = get_bool ("Types::Serialiser::false");
1477 types_error = get_bool ("Types::Serialiser::error");
1478
1479 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1480
1481 sv_cbor = newSVpv ("CBOR", 0);
1482 SvREADONLY_on (sv_cbor);
1483
1484 assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1485 }
1486
1487 PROTOTYPES: DISABLE
1488
1489 void CLONE (...)
1490 CODE:
1491 cbor_stash = 0;
1492 cbor_tagged_stash = 0;
1493 types_error_stash = 0;
1494 types_boolean_stash = 0;
1495
1496 void new (char *klass)
1497 PPCODE:
1498 {
1499 SV *pv = NEWSV (0, sizeof (CBOR));
1500 SvPOK_only (pv);
1501 cbor_init ((CBOR *)SvPVX (pv));
1502 XPUSHs (sv_2mortal (sv_bless (
1503 newRV_noinc (pv),
1504 strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1505 )));
1506 }
1507
1508 void shrink (CBOR *self, int enable = 1)
1509 ALIAS:
1510 shrink = F_SHRINK
1511 allow_unknown = F_ALLOW_UNKNOWN
1512 allow_sharing = F_ALLOW_SHARING
1513 allow_cycles = F_ALLOW_CYCLES
1514 forbid_objects = F_FORBID_OBJECTS
1515 pack_strings = F_PACK_STRINGS
1516 text_keys = F_TEXT_KEYS
1517 text_strings = F_TEXT_STRINGS
1518 validate_utf8 = F_VALIDATE_UTF8
1519 PPCODE:
1520 {
1521 if (enable)
1522 self->flags |= ix;
1523 else
1524 self->flags &= ~ix;
1525
1526 XPUSHs (ST (0));
1527 }
1528
1529 void get_shrink (CBOR *self)
1530 ALIAS:
1531 get_shrink = F_SHRINK
1532 get_allow_unknown = F_ALLOW_UNKNOWN
1533 get_allow_sharing = F_ALLOW_SHARING
1534 get_allow_cycles = F_ALLOW_CYCLES
1535 get_forbid_objects = F_FORBID_OBJECTS
1536 get_pack_strings = F_PACK_STRINGS
1537 get_text_keys = F_TEXT_KEYS
1538 get_text_strings = F_TEXT_STRINGS
1539 get_validate_utf8 = F_VALIDATE_UTF8
1540 PPCODE:
1541 XPUSHs (boolSV (self->flags & ix));
1542
1543 void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1544 PPCODE:
1545 self->max_depth = max_depth;
1546 XPUSHs (ST (0));
1547
1548 U32 get_max_depth (CBOR *self)
1549 CODE:
1550 RETVAL = self->max_depth;
1551 OUTPUT:
1552 RETVAL
1553
1554 void max_size (CBOR *self, U32 max_size = 0)
1555 PPCODE:
1556 self->max_size = max_size;
1557 XPUSHs (ST (0));
1558
1559 int get_max_size (CBOR *self)
1560 CODE:
1561 RETVAL = self->max_size;
1562 OUTPUT:
1563 RETVAL
1564
1565 void filter (CBOR *self, SV *filter = 0)
1566 PPCODE:
1567 SvREFCNT_dec (self->filter);
1568 self->filter = filter ? newSVsv (filter) : filter;
1569 XPUSHs (ST (0));
1570
1571 SV *get_filter (CBOR *self)
1572 CODE:
1573 RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1574 OUTPUT:
1575 RETVAL
1576
1577 void encode (CBOR *self, SV *scalar)
1578 PPCODE:
1579 PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1580 XPUSHs (scalar);
1581
1582 void decode (CBOR *self, SV *cborstr)
1583 PPCODE:
1584 PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1585 XPUSHs (cborstr);
1586
1587 void decode_prefix (CBOR *self, SV *cborstr)
1588 PPCODE:
1589 {
1590 SV *sv;
1591 char *offset;
1592 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1593 EXTEND (SP, 2);
1594 PUSHs (sv);
1595 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1596 }
1597
1598 void incr_parse (CBOR *self, SV *cborstr)
1599 ALIAS:
1600 incr_parse_multiple = 1
1601 PPCODE:
1602 {
1603 if (SvUTF8 (cborstr))
1604 sv_utf8_downgrade (cborstr, 0);
1605
1606 if (!self->incr_count)
1607 {
1608 self->incr_count = newAV ();
1609 self->incr_pos = 0;
1610 self->incr_need = 1;
1611
1612 av_push (self->incr_count, newSViv (1));
1613 }
1614
1615 do
1616 {
1617 if (!incr_parse (self, cborstr))
1618 {
1619 if (self->incr_need > self->max_size && self->max_size)
1620 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1621 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1622
1623 break;
1624 }
1625
1626 SV *sv;
1627 char *offset;
1628
1629 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1630 XPUSHs (sv);
1631
1632 sv_chop (cborstr, offset);
1633
1634 av_clear (self->incr_count);
1635 av_push (self->incr_count, newSViv (1));
1636
1637 self->incr_pos = 0;
1638 self->incr_need = self->incr_pos + 1;
1639 }
1640 while (ix);
1641 }
1642
1643 void incr_reset (CBOR *self)
1644 CODE:
1645 {
1646 SvREFCNT_dec (self->incr_count);
1647 self->incr_count = 0;
1648 }
1649
1650 void DESTROY (CBOR *self)
1651 PPCODE:
1652 cbor_free (self);
1653
1654 PROTOTYPES: ENABLE
1655
1656 void encode_cbor (SV *scalar)
1657 ALIAS:
1658 encode_cbor = 0
1659 encode_cbor_sharing = F_ALLOW_SHARING
1660 PPCODE:
1661 {
1662 CBOR cbor;
1663 cbor_init (&cbor);
1664 cbor.flags |= ix;
1665 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1666 XPUSHs (scalar);
1667 }
1668
1669 void decode_cbor (SV *cborstr)
1670 PPCODE:
1671 {
1672 CBOR cbor;
1673 cbor_init (&cbor);
1674 PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1675 XPUSHs (cborstr);
1676 }
1677
1678 #ifdef __AFL_COMPILER
1679
1680 void
1681 afl_init ()
1682 CODE:
1683 __AFL_INIT ();
1684
1685 int
1686 afl_loop (unsigned int count = 10000)
1687 CODE:
1688 RETVAL = __AFL_LOOP (count);
1689 OUTPUT:
1690 RETVAL
1691
1692 #endif
1693