ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.37 by root, Sat Nov 30 18:13:53 2013 UTC vs.
Revision 1.63 by root, Sat Nov 26 04:50:58 2016 UTC

6#include <string.h> 6#include <string.h>
7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11#include <inttypes.h>
11 12
13#define ECB_NO_THREADS 1
12#include "ecb.h" 14#include "ecb.h"
13 15
14// compatibility with perl <5.18 16// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 17#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 18# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19# define HvNAMELEN(hv) HvNAMELEN_get (hv) 21# define HvNAMELEN(hv) HvNAMELEN_get (hv)
20#endif 22#endif
21#ifndef HvNAMEUTF8 23#ifndef HvNAMEUTF8
22# define HvNAMEUTF8(hv) 0 24# define HvNAMEUTF8(hv) 0
23#endif 25#endif
26#ifndef SvREFCNT_inc_NN
27# define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28#endif
24#ifndef SvREFCNT_dec_NN 29#ifndef SvREFCNT_dec_NN
25# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) 30# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
26#endif 31#endif
27 32
28// known major and minor types 33// known major and minor types
98 103
99#define F_SHRINK 0x00000001UL 104#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 105#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 106#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL 107#define F_ALLOW_CYCLES 0x00000008UL
108#define F_FORBID_OBJECTS 0x00000010UL
103#define F_PACK_STRINGS 0x00000010UL 109#define F_PACK_STRINGS 0x00000020UL
110#define F_TEXT_KEYS 0x00000040UL
111#define F_TEXT_STRINGS 0x00000080UL
112#define F_VALIDATE_UTF8 0x00000100UL
104 113
105#define INIT_SIZE 32 // initial scalar size to be allocated 114#define INIT_SIZE 32 // initial scalar size to be allocated
106 115
107#define SB do { 116#define SB do {
108#define SE } while (0) 117#define SE } while (0)
127typedef struct { 136typedef struct {
128 U32 flags; 137 U32 flags;
129 U32 max_depth; 138 U32 max_depth;
130 STRLEN max_size; 139 STRLEN max_size;
131 SV *filter; 140 SV *filter;
141
142 // for the incremental parser
143 STRLEN incr_pos; // the current offset into the text
144 STRLEN incr_need; // minimum bytes needed to decode
145 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
132} CBOR; 146} CBOR;
133 147
134ecb_inline void 148ecb_inline void
135cbor_init (CBOR *cbor) 149cbor_init (CBOR *cbor)
136{ 150{
140 154
141ecb_inline void 155ecb_inline void
142cbor_free (CBOR *cbor) 156cbor_free (CBOR *cbor)
143{ 157{
144 SvREFCNT_dec (cbor->filter); 158 SvREFCNT_dec (cbor->filter);
159 SvREFCNT_dec (cbor->incr_count);
145} 160}
146 161
147///////////////////////////////////////////////////////////////////////////// 162/////////////////////////////////////////////////////////////////////////////
148// utility functions 163// utility functions
149 164
175 190
176// minimum length of a string to be registered for stringref 191// minimum length of a string to be registered for stringref
177ecb_inline int 192ecb_inline int
178minimum_string_length (UV idx) 193minimum_string_length (UV idx)
179{ 194{
180 return idx > 23 195 return idx <= 23 ? 3
181 ? idx > 0xffU 196 : idx <= 0xffU ? 4
182 ? idx > 0xffffU 197 : idx <= 0xffffU ? 5
183 ? idx > 0xffffffffU 198 : idx <= 0xffffffffU ? 7
184 ? 11 199 : 11;
185 : 7
186 : 5
187 : 4
188 : 3;
189} 200}
190 201
191///////////////////////////////////////////////////////////////////////////// 202/////////////////////////////////////////////////////////////////////////////
192// encoder 203// encoder
193 204
206} enc_t; 217} enc_t;
207 218
208ecb_inline void 219ecb_inline void
209need (enc_t *enc, STRLEN len) 220need (enc_t *enc, STRLEN len)
210{ 221{
211 if (ecb_expect_false (enc->cur + len >= enc->end)) 222 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
212 { 223 {
213 STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); 224 STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
214 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); 225 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
215 enc->cur = SvPVX (enc->sv) + cur; 226 enc->cur = SvPVX (enc->sv) + cur;
216 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; 227 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
268encode_tag (enc_t *enc, UV tag) 279encode_tag (enc_t *enc, UV tag)
269{ 280{
270 encode_uint (enc, MAJOR_TAG, tag); 281 encode_uint (enc, MAJOR_TAG, tag);
271} 282}
272 283
284// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
285ecb_noinline static void
286encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
287{
288 STRLEN ulen = len;
289 U8 *p, *pend = (U8 *)str + len;
290
291 for (p = (U8 *)str; p < pend; ++p)
292 ulen += *p >> 7; // count set high bits
293
294 encode_uint (enc, MAJOR_TEXT, ulen);
295
296 need (enc, ulen);
297 for (p = (U8 *)str; p < pend; ++p)
298 if (*p < 0x80)
299 *enc->cur++ = *p;
300 else
301 {
302 *enc->cur++ = 0xc0 + (*p >> 6);
303 *enc->cur++ = 0x80 + (*p & 63);
304 }
305}
306
273ecb_inline void 307ecb_inline void
274encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 308encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
275{ 309{
310 if (ecb_expect_false (upgrade_utf8))
311 if (!utf8)
312 {
313 encode_str_utf8 (enc, utf8, str, len);
314 return;
315 }
316
276 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); 317 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
277 need (enc, len); 318 need (enc, len);
278 memcpy (enc->cur, str, len); 319 memcpy (enc->cur, str, len);
279 enc->cur += len; 320 enc->cur += len;
280} 321}
281 322
282static void 323ecb_inline void
283encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) 324encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
284{ 325{
285 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) 326 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
286 { 327 {
287 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); 328 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
288 329
299 sv_setuv (*svp, enc->stringref_idx); 340 sv_setuv (*svp, enc->stringref_idx);
300 ++enc->stringref_idx; 341 ++enc->stringref_idx;
301 } 342 }
302 } 343 }
303 344
304 encode_str (enc, utf8, str, len); 345 encode_str (enc, upgrade_utf8, utf8, str, len);
305} 346}
306 347
307static void encode_sv (enc_t *enc, SV *sv); 348static void encode_sv (enc_t *enc, SV *sv);
308 349
309static void 350static void
316 357
317 ++enc->depth; 358 ++enc->depth;
318 359
319 encode_uint (enc, MAJOR_ARRAY, len + 1); 360 encode_uint (enc, MAJOR_ARRAY, len + 1);
320 361
362 if (ecb_expect_false (SvMAGICAL (av)))
321 for (i = 0; i <= len; ++i) 363 for (i = 0; i <= len; ++i)
322 { 364 {
323 SV **svp = av_fetch (av, i, 0); 365 SV **svp = av_fetch (av, i, 0);
324 encode_sv (enc, svp ? *svp : &PL_sv_undef); 366 encode_sv (enc, svp ? *svp : &PL_sv_undef);
325 } 367 }
368 else
369 for (i = 0; i <= len; ++i)
370 {
371 SV *sv = AvARRAY (av)[i];
372 encode_sv (enc, sv ? sv : &PL_sv_undef);
373 }
326 374
327 --enc->depth; 375 --enc->depth;
328} 376}
329 377
330static void 378static void
338 ++enc->depth; 386 ++enc->depth;
339 387
340 int pairs = hv_iterinit (hv); 388 int pairs = hv_iterinit (hv);
341 int mg = SvMAGICAL (hv); 389 int mg = SvMAGICAL (hv);
342 390
343 if (mg) 391 if (ecb_expect_false (mg))
344 encode_ch (enc, MAJOR_MAP | MINOR_INDEF); 392 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
345 else 393 else
346 encode_uint (enc, MAJOR_MAP, pairs); 394 encode_uint (enc, MAJOR_MAP, pairs);
347 395
348 while ((he = hv_iternext (hv))) 396 while ((he = hv_iternext (hv)))
349 { 397 {
350 if (HeKLEN (he) == HEf_SVKEY) 398 if (HeKLEN (he) == HEf_SVKEY)
351 encode_sv (enc, HeSVKEY (he)); 399 encode_sv (enc, HeSVKEY (he));
352 else 400 else
353 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 401 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
354 402
355 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 403 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
356 } 404 }
357 405
358 if (mg) 406 if (ecb_expect_false (mg))
359 encode_ch (enc, MAJOR_MISC | MINOR_INDEF); 407 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
360 408
361 --enc->depth; 409 --enc->depth;
362} 410}
363 411
406 } 454 }
407 455
408 if (ecb_expect_false (SvREFCNT (sv) > 1) 456 if (ecb_expect_false (SvREFCNT (sv) > 1)
409 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)) 457 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
410 { 458 {
411 if (!enc->shareable) 459 if (ecb_expect_false (!enc->shareable))
412 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ()); 460 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
413 461
414 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1); 462 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
415 463
416 if (SvOK (*svp)) 464 if (SvOK (*svp))
430 if (ecb_expect_false (SvOBJECT (sv))) 478 if (ecb_expect_false (SvOBJECT (sv)))
431 { 479 {
432 HV *stash = SvSTASH (sv); 480 HV *stash = SvSTASH (sv);
433 GV *method; 481 GV *method;
434 482
483 if (enc->cbor.flags & F_FORBID_OBJECTS)
484 croak ("encountered object '%s', but forbid_objects is enabled",
485 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
435 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 486 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
436 { 487 {
437 dSP; 488 dSP;
438 489
439 ENTER; SAVETMPS; PUSHMARK (SP); 490 ENTER; SAVETMPS;
491 PUSHMARK (SP);
440 // we re-bless the reference to get overload and other niceties right 492 // we re-bless the reference to get overload and other niceties right
441 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 493 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
442 494
443 PUTBACK; 495 PUTBACK;
444 // G_SCALAR ensures that return value is 1 496 // G_SCALAR ensures that return value is 1
457 } 509 }
458 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 510 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
459 { 511 {
460 dSP; 512 dSP;
461 513
462 ENTER; SAVETMPS; PUSHMARK (SP); 514 ENTER; SAVETMPS;
515 SAVESTACK_POS ();
516 PUSHMARK (SP);
463 EXTEND (SP, 2); 517 EXTEND (SP, 2);
464 // we re-bless the reference to get overload and other niceties right 518 // we re-bless the reference to get overload and other niceties right
465 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 519 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
466 PUSHs (sv_cbor); 520 PUSHs (sv_cbor);
467 521
473 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 527 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
474 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 528 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
475 529
476 encode_tag (enc, CBOR_TAG_PERL_OBJECT); 530 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
477 encode_uint (enc, MAJOR_ARRAY, count + 1); 531 encode_uint (enc, MAJOR_ARRAY, count + 1);
478 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 532 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
479 533
480 while (count) 534 while (count)
481 encode_sv (enc, SP[1 - count--]); 535 encode_sv (enc, SP[1 - count--]);
482 536
483 PUTBACK; 537 PUTBACK;
509 if (ecb_expect_false (nv == (NV)(U32)nv)) 563 if (ecb_expect_false (nv == (NV)(U32)nv))
510 encode_uint (enc, MAJOR_POS_INT, (U32)nv); 564 encode_uint (enc, MAJOR_POS_INT, (U32)nv);
511 //TODO: maybe I32? 565 //TODO: maybe I32?
512 else if (ecb_expect_false (nv == (float)nv)) 566 else if (ecb_expect_false (nv == (float)nv))
513 { 567 {
568 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
569
514 uint32_t fp = ecb_float_to_binary32 (nv); 570 uint32_t fp = ecb_float_to_binary32 (nv);
515
516 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
517 571
518 if (!ecb_big_endian ()) 572 if (!ecb_big_endian ())
519 fp = ecb_bswap32 (fp); 573 fp = ecb_bswap32 (fp);
520 574
521 memcpy (enc->cur, &fp, 4); 575 memcpy (enc->cur, &fp, 4);
522 enc->cur += 4; 576 enc->cur += 4;
523 } 577 }
524 else 578 else
525 { 579 {
580 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
581
526 uint64_t fp = ecb_double_to_binary64 (nv); 582 uint64_t fp = ecb_double_to_binary64 (nv);
527
528 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
529 583
530 if (!ecb_big_endian ()) 584 if (!ecb_big_endian ())
531 fp = ecb_bswap64 (fp); 585 fp = ecb_bswap64 (fp);
532 586
533 memcpy (enc->cur, &fp, 8); 587 memcpy (enc->cur, &fp, 8);
542 596
543 if (SvPOKp (sv)) 597 if (SvPOKp (sv))
544 { 598 {
545 STRLEN len; 599 STRLEN len;
546 char *str = SvPV (sv, len); 600 char *str = SvPV (sv, len);
547 encode_strref (enc, SvUTF8 (sv), str, len); 601 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
548 } 602 }
549 else if (SvNOKp (sv)) 603 else if (SvNOKp (sv))
550 encode_nv (enc, sv); 604 encode_nv (enc, sv);
551 else if (SvIOKp (sv)) 605 else if (SvIOKp (sv))
552 { 606 {
569} 623}
570 624
571static SV * 625static SV *
572encode_cbor (SV *scalar, CBOR *cbor) 626encode_cbor (SV *scalar, CBOR *cbor)
573{ 627{
574 enc_t enc = { }; 628 enc_t enc = { 0 };
575 629
576 enc.cbor = *cbor; 630 enc.cbor = *cbor;
577 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 631 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
578 enc.cur = SvPVX (enc.sv); 632 enc.cur = SvPVX (enc.sv);
579 enc.end = SvEND (enc.sv); 633 enc.end = SvEND (enc.sv);
580 634
581 SvPOK_only (enc.sv); 635 SvPOK_only (enc.sv);
582 636
583 if (cbor->flags & F_PACK_STRINGS) 637 if (cbor->flags & F_PACK_STRINGS)
584 { 638 {
611 U32 depth; // recursion depth 665 U32 depth; // recursion depth
612 U32 maxdepth; // recursion depth limit 666 U32 maxdepth; // recursion depth limit
613 AV *shareable; 667 AV *shareable;
614 AV *stringref; 668 AV *stringref;
615 SV *decode_tagged; 669 SV *decode_tagged;
670 SV *err_sv; // optional sv for error, needs to be freed
616} dec_t; 671} dec_t;
617 672
618#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 673// set dec->err to ERRSV
674ecb_cold static void
675err_errsv (dec_t *dec)
676{
677 if (!dec->err)
678 {
679 dec->err_sv = newSVsv (ERRSV);
619 680
620#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 681 // chop off the trailing \n
682 SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
683 *SvEND (dec->err_sv) = 0;
621 684
685 dec->err = SvPVutf8_nolen (dec->err_sv);
686 }
687}
688
689// the following functions are used to reduce code size and help the compiler to optimise
690ecb_cold static void
691err_set (dec_t *dec, const char *reason)
692{
693 if (!dec->err)
694 dec->err = reason;
695}
696
697ecb_cold static void
698err_unexpected_end (dec_t *dec)
699{
700 err_set (dec, "unexpected end of CBOR data");
701}
702
703#define ERR_DO(do) SB do; goto fail; SE
704#define ERR(reason) ERR_DO (err_set (dec, reason))
705#define ERR_ERRSV ERR_DO (err_errsv (dec))
706
707#define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
708
622#define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) 709#define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
623#define DEC_DEC_DEPTH --dec->depth 710#define DEC_DEC_DEPTH --dec->depth
624 711
625static UV 712static UV
626decode_uint (dec_t *dec) 713decode_uint (dec_t *dec)
627{ 714{
704 av_push (av, decode_sv (dec)); 791 av_push (av, decode_sv (dec));
705 } 792 }
706 } 793 }
707 else 794 else
708 { 795 {
709 int i, len = decode_uint (dec); 796 UV i, len = decode_uint (dec);
710 797
711 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays 798 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
712 av_fill (av, len - 1); 799 av_fill (av, len - 1);
713 800
714 for (i = 0; i < len; ++i) 801 for (i = 0; i < len; ++i)
717 804
718 DEC_DEC_DEPTH; 805 DEC_DEC_DEPTH;
719 return newRV_noinc ((SV *)av); 806 return newRV_noinc ((SV *)av);
720 807
721fail: 808fail:
722 SvREFCNT_dec (av); 809 SvREFCNT_dec_NN (av);
723 DEC_DEC_DEPTH; 810 DEC_DEC_DEPTH;
724 return &PL_sv_undef; 811 return &PL_sv_undef;
725} 812}
726 813
727static void 814static void
729{ 816{
730 // for speed reasons, we specialcase single-string 817 // for speed reasons, we specialcase single-string
731 // byte or utf-8 strings as keys, but only when !stringref 818 // byte or utf-8 strings as keys, but only when !stringref
732 819
733 if (ecb_expect_true (!dec->stringref)) 820 if (ecb_expect_true (!dec->stringref))
734 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 821 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
735 { 822 {
736 I32 len = decode_uint (dec); 823 STRLEN len = decode_uint (dec);
737 char *key = (char *)dec->cur; 824 char *key = (char *)dec->cur;
738 825
826 WANT (len);
739 dec->cur += len; 827 dec->cur += len;
740 828
741 if (ecb_expect_false (dec->stringref))
742 av_push (dec->stringref, newSVpvn (key, len));
743
744 hv_store (hv, key, len, decode_sv (dec), 0); 829 hv_store (hv, key, len, decode_sv (dec), 0);
745 830
746 return; 831 return;
747 } 832 }
748 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 833 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
749 { 834 {
750 I32 len = decode_uint (dec); 835 STRLEN len = decode_uint (dec);
751 char *key = (char *)dec->cur; 836 char *key = (char *)dec->cur;
752 837
838 WANT (len);
753 dec->cur += len; 839 dec->cur += len;
754 840
755 if (ecb_expect_false (dec->stringref)) 841 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
756 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 842 if (!is_utf8_string (key, len))
843 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
757 844
758 hv_store (hv, key, -len, decode_sv (dec), 0); 845 hv_store (hv, key, -len, decode_sv (dec), 0);
759 846
760 return; 847 return;
761 } 848 }
762 849
763 SV *k = decode_sv (dec); 850 SV *k = decode_sv (dec);
764 SV *v = decode_sv (dec); 851 SV *v = decode_sv (dec);
765 852
853 // we leak memory if uncaught exceptions are thrown by random magical
854 // methods, and this is hopefully the only place where it can happen,
855 // so if there is a chance of an exception, take the very slow path.
856 // since catching exceptions is "undocumented/internal/forbidden" by
857 // the new p5p powers, we need to call out to a perl function :/
858 if (ecb_expect_false (SvAMAGIC (k)))
859 {
860 dSP;
861
862 ENTER; SAVETMPS;
863 PUSHMARK (SP);
864 EXTEND (SP, 3);
865 PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
866 PUSHs (sv_2mortal (k));
867 PUSHs (sv_2mortal (v));
868
869 PUTBACK;
870 call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
871 SPAGAIN;
872
873 FREETMPS; LEAVE;
874
875 if (SvTRUE (ERRSV))
876 ERR_ERRSV;
877
878 return;
879 }
880
766 hv_store_ent (hv, k, v, 0); 881 hv_store_ent (hv, k, v, 0);
767 SvREFCNT_dec (k); 882 SvREFCNT_dec_NN (k);
883
884fail:
885 ;
768} 886}
769 887
770static SV * 888static SV *
771decode_hv (dec_t *dec) 889decode_hv (dec_t *dec)
772{ 890{
791 decode_he (dec, hv); 909 decode_he (dec, hv);
792 } 910 }
793 } 911 }
794 else 912 else
795 { 913 {
796 int pairs = decode_uint (dec); 914 UV pairs = decode_uint (dec);
915
916 WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
797 917
798 while (pairs--) 918 while (pairs--)
799 decode_he (dec, hv); 919 decode_he (dec, hv);
800 } 920 }
801 921
802 DEC_DEC_DEPTH; 922 DEC_DEC_DEPTH;
803 return newRV_noinc ((SV *)hv); 923 return newRV_noinc ((SV *)hv);
804 924
805fail: 925fail:
806 SvREFCNT_dec (hv); 926 SvREFCNT_dec_NN (hv);
807 DEC_DEC_DEPTH; 927 DEC_DEC_DEPTH;
808 return &PL_sv_undef; 928 return &PL_sv_undef;
809} 929}
810 930
811static SV * 931static SV *
812decode_str (dec_t *dec, int utf8) 932decode_str (dec_t *dec, int utf8)
813{ 933{
814 SV *sv = 0; 934 SV *sv = 0;
815 935
816 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF) 936 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
817 { 937 {
818 // indefinite length strings 938 // indefinite length strings
819 ++dec->cur; 939 ++dec->cur;
820 940
821 U8 major = *dec->cur & MAJOR_MISC; 941 U8 major = *dec->cur & MAJOR_MISC;
854 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 974 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
855 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 975 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
856 } 976 }
857 977
858 if (utf8) 978 if (utf8)
979 {
980 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
981 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
982 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
983
859 SvUTF8_on (sv); 984 SvUTF8_on (sv);
985 }
860 986
861 return sv; 987 return sv;
862 988
863fail: 989fail:
864 SvREFCNT_dec (sv); 990 SvREFCNT_dec (sv);
883 sv = newRV_noinc (decode_sv (dec)); 1009 sv = newRV_noinc (decode_sv (dec));
884 break; 1010 break;
885 1011
886 case CBOR_TAG_STRINGREF_NAMESPACE: 1012 case CBOR_TAG_STRINGREF_NAMESPACE:
887 { 1013 {
1014 // do not use SAVETMPS/FREETMPS, as these will
1015 // erase mortalised caches, e.g. "shareable"
888 ENTER; SAVETMPS; 1016 ENTER;
889 1017
890 SAVESPTR (dec->stringref); 1018 SAVESPTR (dec->stringref);
891 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); 1019 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
892 1020
893 sv = decode_sv (dec); 1021 sv = decode_sv (dec);
894 1022
895 FREETMPS; LEAVE; 1023 LEAVE;
896 } 1024 }
897 break; 1025 break;
898 1026
899 case CBOR_TAG_STRINGREF: 1027 case CBOR_TAG_STRINGREF:
900 { 1028 {
951 } 1079 }
952 break; 1080 break;
953 1081
954 case CBOR_TAG_PERL_OBJECT: 1082 case CBOR_TAG_PERL_OBJECT:
955 { 1083 {
1084 if (dec->cbor.flags & F_FORBID_OBJECTS)
1085 goto filter;
1086
956 sv = decode_sv (dec); 1087 sv = decode_sv (dec);
957 1088
958 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) 1089 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
959 ERR ("corrupted CBOR data (non-array perl object)"); 1090 ERR ("corrupted CBOR data (non-array perl object)");
960 1091
970 if (!method) 1101 if (!method)
971 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1102 ERR ("cannot decode perl-object (package does not have a THAW method)");
972 1103
973 dSP; 1104 dSP;
974 1105
975 ENTER; SAVETMPS; PUSHMARK (SP); 1106 ENTER; SAVETMPS;
1107 PUSHMARK (SP);
976 EXTEND (SP, len + 1); 1108 EXTEND (SP, len + 1);
977 // we re-bless the reference to get overload and other niceties right 1109 // we re-bless the reference to get overload and other niceties right
978 PUSHs (*av_fetch (av, 0, 1)); 1110 PUSHs (*av_fetch (av, 0, 1));
979 PUSHs (sv_cbor); 1111 PUSHs (sv_cbor);
980 1112
988 SPAGAIN; 1120 SPAGAIN;
989 1121
990 if (SvTRUE (ERRSV)) 1122 if (SvTRUE (ERRSV))
991 { 1123 {
992 FREETMPS; LEAVE; 1124 FREETMPS; LEAVE;
993 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); 1125 ERR_ERRSV;
994 } 1126 }
995 1127
996 SvREFCNT_dec (sv); 1128 SvREFCNT_dec_NN (sv);
997 sv = SvREFCNT_inc (POPs); 1129 sv = SvREFCNT_inc (POPs);
998 1130
999 PUTBACK; 1131 PUTBACK;
1000 1132
1001 FREETMPS; LEAVE; 1133 FREETMPS; LEAVE;
1002 } 1134 }
1003 break; 1135 break;
1004 1136
1005 default: 1137 default:
1138 filter:
1006 { 1139 {
1140 SV *tag_sv = newSVuv (tag);
1141
1007 sv = decode_sv (dec); 1142 sv = decode_sv (dec);
1008 1143
1009 dSP; 1144 dSP;
1010 ENTER; SAVETMPS; PUSHMARK (SP); 1145 ENTER; SAVETMPS;
1146 SAVESTACK_POS ();
1147 PUSHMARK (SP);
1011 EXTEND (SP, 2); 1148 EXTEND (SP, 2);
1012 PUSHs (newSVuv (tag)); 1149 PUSHs (tag_sv);
1013 PUSHs (sv); 1150 PUSHs (sv);
1014 1151
1015 PUTBACK; 1152 PUTBACK;
1016 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL); 1153 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1017 SPAGAIN; 1154 SPAGAIN;
1018 1155
1019 if (SvTRUE (ERRSV)) 1156 if (SvTRUE (ERRSV))
1020 { 1157 {
1158 SvREFCNT_dec_NN (tag_sv);
1021 FREETMPS; LEAVE; 1159 FREETMPS; LEAVE;
1022 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); 1160 ERR_ERRSV;
1023 } 1161 }
1024 1162
1025 if (count) 1163 if (count)
1026 { 1164 {
1165 SvREFCNT_dec_NN (tag_sv);
1027 SvREFCNT_dec (sv); 1166 SvREFCNT_dec_NN (sv);
1028 sv = SvREFCNT_inc (POPs); 1167 sv = SvREFCNT_inc_NN (POPs);
1029 } 1168 }
1030 else 1169 else
1031 { 1170 {
1032 AV *av = newAV (); 1171 AV *av = newAV ();
1033 av_push (av, newSVuv (tag)); 1172 av_push (av, tag_sv);
1034 av_push (av, sv); 1173 av_push (av, sv);
1035 1174
1036 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 1175 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1037 ? cbor_tagged_stash 1176 ? cbor_tagged_stash
1038 : gv_stashpv ("CBOR::XS::Tagged" , 1); 1177 : gv_stashpv ("CBOR::XS::Tagged" , 1);
1124 1263
1125 return newSVnv (ecb_binary64_to_double (fp)); 1264 return newSVnv (ecb_binary64_to_double (fp));
1126 } 1265 }
1127 1266
1128 // 0..19 unassigned simple 1267 // 0..19 unassigned simple
1129 // 24 reserved + unassigned (reserved values are not encodable) 1268 // 24 reserved + unassigned simple (reserved values are not encodable)
1269 // 28-30 unassigned misc
1270 // 31 break code
1130 default: 1271 default:
1131 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1272 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1132 } 1273 }
1133 1274
1134 break; 1275 break;
1135 } 1276 }
1136 1277
1139} 1280}
1140 1281
1141static SV * 1282static SV *
1142decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1283decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1143{ 1284{
1144 dec_t dec = { }; 1285 dec_t dec = { 0 };
1145 SV *sv; 1286 SV *sv;
1146 STRLEN len; 1287 STRLEN len;
1147 char *data = SvPVbyte (string, len); 1288 char *data = SvPVbyte (string, len);
1148 1289
1149 if (len > cbor->max_size && cbor->max_size) 1290 if (len > cbor->max_size && cbor->max_size)
1163 if (dec.cur != dec.end && !dec.err) 1304 if (dec.cur != dec.end && !dec.err)
1164 dec.err = "garbage after CBOR object"; 1305 dec.err = "garbage after CBOR object";
1165 1306
1166 if (dec.err) 1307 if (dec.err)
1167 { 1308 {
1309 if (dec.shareable)
1310 {
1311 // need to break cyclic links, which would all be in shareable
1312 int i;
1313 SV **svp;
1314
1315 for (i = av_len (dec.shareable) + 1; i--; )
1316 if ((svp = av_fetch (dec.shareable, i, 0)))
1317 sv_setsv (*svp, &PL_sv_undef);
1318 }
1319
1168 SvREFCNT_dec (sv); 1320 SvREFCNT_dec_NN (sv);
1321
1322 if (dec.err_sv)
1323 sv_2mortal (dec.err_sv);
1324
1169 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1325 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1170 } 1326 }
1171 1327
1172 sv = sv_2mortal (sv); 1328 sv = sv_2mortal (sv);
1173 1329
1174 return sv; 1330 return sv;
1175} 1331}
1176 1332
1333/////////////////////////////////////////////////////////////////////////////
1334// incremental parser
1335
1336#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1337
1338// returns 0 for notyet, 1 for success or error
1339static int
1340incr_parse (CBOR *self, SV *cborstr)
1341{
1342 STRLEN cur;
1343 SvPV (cborstr, cur);
1344
1345 while (ecb_expect_true (self->incr_need <= cur))
1346 {
1347 // table of integer count bytes
1348 static I8 incr_len[MINOR_MASK + 1] = {
1349 0, 0, 0, 0, 0, 0, 0, 0,
1350 0, 0, 0, 0, 0, 0, 0, 0,
1351 0, 0, 0, 0, 0, 0, 0, 0,
1352 1, 2, 4, 8,-1,-1,-1,-2
1353 };
1354
1355 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1356 U8 m = *p & MINOR_MASK;
1357 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1358 I8 ilen = incr_len[m];
1359
1360 self->incr_need = self->incr_pos + 1;
1361
1362 if (ecb_expect_false (ilen < 0))
1363 {
1364 if (m != MINOR_INDEF)
1365 return 1; // error
1366
1367 if (*p == (MAJOR_MISC | MINOR_INDEF))
1368 {
1369 if (count >= 0)
1370 return 1; // error
1371
1372 count = 1;
1373 }
1374 else
1375 {
1376 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1377 count = -1;
1378 }
1379 }
1380 else
1381 {
1382 self->incr_need += ilen;
1383 if (ecb_expect_false (self->incr_need > cur))
1384 return 0;
1385
1386 int major = *p >> MAJOR_SHIFT;
1387
1388 switch (major)
1389 {
1390 case MAJOR_TAG >> MAJOR_SHIFT:
1391 ++count; // tags merely prefix another value
1392 break;
1393
1394 case MAJOR_BYTES >> MAJOR_SHIFT:
1395 case MAJOR_TEXT >> MAJOR_SHIFT:
1396 case MAJOR_ARRAY >> MAJOR_SHIFT:
1397 case MAJOR_MAP >> MAJOR_SHIFT:
1398 {
1399 UV len;
1400
1401 if (ecb_expect_false (ilen))
1402 {
1403 len = 0;
1404
1405 do {
1406 len = (len << 8) | *++p;
1407 } while (--ilen);
1408 }
1409 else
1410 len = m;
1411
1412 switch (major)
1413 {
1414 case MAJOR_BYTES >> MAJOR_SHIFT:
1415 case MAJOR_TEXT >> MAJOR_SHIFT:
1416 self->incr_need += len;
1417 if (ecb_expect_false (self->incr_need > cur))
1418 return 0;
1419
1420 break;
1421
1422 case MAJOR_MAP >> MAJOR_SHIFT:
1423 len <<= 1;
1424 case MAJOR_ARRAY >> MAJOR_SHIFT:
1425 if (len)
1426 {
1427 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1428 count = len + 1;
1429 }
1430 break;
1431 }
1432 }
1433 }
1434 }
1435
1436 self->incr_pos = self->incr_need;
1437
1438 if (count > 0)
1439 {
1440 while (!--count)
1441 {
1442 if (!AvFILLp (self->incr_count))
1443 return 1; // done
1444
1445 SvREFCNT_dec_NN (av_pop (self->incr_count));
1446 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1447 }
1448
1449 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1450 }
1451 }
1452
1453 return 0;
1454}
1455
1456
1177///////////////////////////////////////////////////////////////////////////// 1457/////////////////////////////////////////////////////////////////////////////
1178// XS interface functions 1458// XS interface functions
1179 1459
1180MODULE = CBOR::XS PACKAGE = CBOR::XS 1460MODULE = CBOR::XS PACKAGE = CBOR::XS
1181 1461
1193 1473
1194 default_filter = newSVpv ("CBOR::XS::default_filter", 0); 1474 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1195 1475
1196 sv_cbor = newSVpv ("CBOR", 0); 1476 sv_cbor = newSVpv ("CBOR", 0);
1197 SvREADONLY_on (sv_cbor); 1477 SvREADONLY_on (sv_cbor);
1478
1479 assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1198} 1480}
1199 1481
1200PROTOTYPES: DISABLE 1482PROTOTYPES: DISABLE
1201 1483
1202void CLONE (...) 1484void CLONE (...)
1222 ALIAS: 1504 ALIAS:
1223 shrink = F_SHRINK 1505 shrink = F_SHRINK
1224 allow_unknown = F_ALLOW_UNKNOWN 1506 allow_unknown = F_ALLOW_UNKNOWN
1225 allow_sharing = F_ALLOW_SHARING 1507 allow_sharing = F_ALLOW_SHARING
1226 allow_cycles = F_ALLOW_CYCLES 1508 allow_cycles = F_ALLOW_CYCLES
1509 forbid_objects = F_FORBID_OBJECTS
1227 pack_strings = F_PACK_STRINGS 1510 pack_strings = F_PACK_STRINGS
1511 text_keys = F_TEXT_KEYS
1512 text_strings = F_TEXT_STRINGS
1513 validate_utf8 = F_VALIDATE_UTF8
1228 PPCODE: 1514 PPCODE:
1229{ 1515{
1230 if (enable) 1516 if (enable)
1231 self->flags |= ix; 1517 self->flags |= ix;
1232 else 1518 else
1239 ALIAS: 1525 ALIAS:
1240 get_shrink = F_SHRINK 1526 get_shrink = F_SHRINK
1241 get_allow_unknown = F_ALLOW_UNKNOWN 1527 get_allow_unknown = F_ALLOW_UNKNOWN
1242 get_allow_sharing = F_ALLOW_SHARING 1528 get_allow_sharing = F_ALLOW_SHARING
1243 get_allow_cycles = F_ALLOW_CYCLES 1529 get_allow_cycles = F_ALLOW_CYCLES
1530 get_forbid_objects = F_FORBID_OBJECTS
1244 get_pack_strings = F_PACK_STRINGS 1531 get_pack_strings = F_PACK_STRINGS
1532 get_text_keys = F_TEXT_KEYS
1533 get_text_strings = F_TEXT_STRINGS
1534 get_validate_utf8 = F_VALIDATE_UTF8
1245 PPCODE: 1535 PPCODE:
1246 XPUSHs (boolSV (self->flags & ix)); 1536 XPUSHs (boolSV (self->flags & ix));
1247 1537
1248void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1538void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1249 PPCODE: 1539 PPCODE:
1298 EXTEND (SP, 2); 1588 EXTEND (SP, 2);
1299 PUSHs (sv); 1589 PUSHs (sv);
1300 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1590 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1301} 1591}
1302 1592
1593void incr_parse (CBOR *self, SV *cborstr)
1594 ALIAS:
1595 incr_parse_multiple = 1
1596 PPCODE:
1597{
1598 if (SvUTF8 (cborstr))
1599 sv_utf8_downgrade (cborstr, 0);
1600
1601 if (!self->incr_count)
1602 {
1603 self->incr_count = newAV ();
1604 self->incr_pos = 0;
1605 self->incr_need = 1;
1606
1607 av_push (self->incr_count, newSViv (1));
1608 }
1609
1610 do
1611 {
1612 if (!incr_parse (self, cborstr))
1613 {
1614 if (self->incr_need > self->max_size && self->max_size)
1615 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1616 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1617
1618 break;
1619 }
1620
1621 SV *sv;
1622 char *offset;
1623
1624 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1625 XPUSHs (sv);
1626
1627 sv_chop (cborstr, offset);
1628
1629 av_clear (self->incr_count);
1630 av_push (self->incr_count, newSViv (1));
1631
1632 self->incr_pos = 0;
1633 self->incr_need = self->incr_pos + 1;
1634 }
1635 while (ix);
1636}
1637
1638void incr_reset (CBOR *self)
1639 CODE:
1640{
1641 SvREFCNT_dec (self->incr_count);
1642 self->incr_count = 0;
1643}
1644
1303void DESTROY (CBOR *self) 1645void DESTROY (CBOR *self)
1304 PPCODE: 1646 PPCODE:
1305 cbor_free (self); 1647 cbor_free (self);
1306 1648
1307PROTOTYPES: ENABLE 1649PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines