ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.62 by root, Sat Nov 26 02:11:52 2016 UTC vs.
Revision 1.77 by root, Fri Sep 8 05:47:10 2023 UTC

21# define HvNAMELEN(hv) HvNAMELEN_get (hv) 21# define HvNAMELEN(hv) HvNAMELEN_get (hv)
22#endif 22#endif
23#ifndef HvNAMEUTF8 23#ifndef HvNAMEUTF8
24# define HvNAMEUTF8(hv) 0 24# define HvNAMEUTF8(hv) 0
25#endif 25#endif
26#ifndef SvREFCNT_inc_NN
27# define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28#endif
26#ifndef SvREFCNT_dec_NN 29#ifndef SvREFCNT_dec_NN
27# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) 30# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
28#endif 31#endif
32
33// perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34#define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
29 35
30// known major and minor types 36// known major and minor types
31enum cbor_type 37enum cbor_type
32{ 38{
33 MAJOR_SHIFT = 5, 39 MAJOR_SHIFT = 5,
94 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 100 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
95 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 101 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
96 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 102 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
97 103
98 CBOR_TAG_MAGIC = 55799, // self-describe cbor 104 CBOR_TAG_MAGIC = 55799, // self-describe cbor
105};
106
107// known forced types, also hardcoded in CBOR.pm
108enum
109{
110 AS_CBOR = 0,
111 AS_INT = 1,
112 AS_BYTES = 2,
113 AS_TEXT = 3,
114 AS_FLOAT16 = 4,
115 AS_FLOAT32 = 5,
116 AS_FLOAT64 = 6,
117 AS_MAP = 7,
118 // possibly future enhancements: (generic) float, (generic) string
99}; 119};
100 120
101#define F_SHRINK 0x00000001UL 121#define F_SHRINK 0x00000001UL
102#define F_ALLOW_UNKNOWN 0x00000002UL 122#define F_ALLOW_UNKNOWN 0x00000002UL
103#define F_ALLOW_SHARING 0x00000004UL 123#define F_ALLOW_SHARING 0x00000004UL
184#endif 204#endif
185 } 205 }
186} 206}
187 207
188// minimum length of a string to be registered for stringref 208// minimum length of a string to be registered for stringref
189ecb_inline int 209ecb_inline STRLEN
190minimum_string_length (UV idx) 210minimum_string_length (UV idx)
191{ 211{
192 return idx <= 23 ? 3 212 return idx <= 23 ? 3
193 : idx <= 0xffU ? 4 213 : idx <= 0xffU ? 4
194 : idx <= 0xffffU ? 5 214 : idx <= 0xffffU ? 5
223 enc->cur = SvPVX (enc->sv) + cur; 243 enc->cur = SvPVX (enc->sv) + cur;
224 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; 244 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
225 } 245 }
226} 246}
227 247
248static void encode_sv (enc_t *enc, SV *sv);
249
228ecb_inline void 250ecb_inline void
229encode_ch (enc_t *enc, char ch) 251encode_ch (enc_t *enc, char ch)
230{ 252{
231 need (enc, 1); 253 need (enc, 1);
232 *enc->cur++ = ch; 254 *enc->cur++ = ch;
233} 255}
234 256
257// used for tags, intregers, element counts and so on
235static void 258static void
236encode_uint (enc_t *enc, int major, UV len) 259encode_uint (enc_t *enc, int major, UV len)
237{ 260{
238 need (enc, 9); 261 need (enc, 9);
239 262
270 *enc->cur++ = len >> 8; 293 *enc->cur++ = len >> 8;
271 *enc->cur++ = len; 294 *enc->cur++ = len;
272 } 295 }
273} 296}
274 297
298// encodes a perl value into a CBOR integer
299ecb_inline void
300encode_int (enc_t *enc, SV *sv)
301{
302 if (SvIsUV (sv))
303 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
304 else if (SvIVX (sv) >= 0)
305 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
306 else
307 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
308}
309
275ecb_inline void 310ecb_inline void
276encode_tag (enc_t *enc, UV tag) 311encode_tag (enc_t *enc, UV tag)
277{ 312{
278 encode_uint (enc, MAJOR_TAG, tag); 313 encode_uint (enc, MAJOR_TAG, tag);
279} 314}
340 } 375 }
341 376
342 encode_str (enc, upgrade_utf8, utf8, str, len); 377 encode_str (enc, upgrade_utf8, utf8, str, len);
343} 378}
344 379
345static void encode_sv (enc_t *enc, SV *sv); 380ecb_inline void
381encode_float16 (enc_t *enc, NV nv)
382{
383 need (enc, 1+2);
384
385 *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
386
387 uint16_t fp = ecb_float_to_binary16 (nv);
388
389 if (!ecb_big_endian ())
390 fp = ecb_bswap16 (fp);
391
392 memcpy (enc->cur, &fp, 2);
393 enc->cur += 2;
394}
395
396ecb_inline void
397encode_float32 (enc_t *enc, NV nv)
398{
399 need (enc, 1+4);
400
401 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
402
403 uint32_t fp = ecb_float_to_binary32 (nv);
404
405 if (!ecb_big_endian ())
406 fp = ecb_bswap32 (fp);
407
408 memcpy (enc->cur, &fp, 4);
409 enc->cur += 4;
410}
411
412ecb_inline void
413encode_float64 (enc_t *enc, NV nv)
414{
415 need (enc, 1+8);
416
417 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
418
419 uint64_t fp = ecb_double_to_binary64 (nv);
420
421 if (!ecb_big_endian ())
422 fp = ecb_bswap64 (fp);
423
424 memcpy (enc->cur, &fp, 8);
425 enc->cur += 8;
426}
427
428ecb_inline void
429encode_bool (enc_t *enc, int istrue)
430{
431 encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
432}
433
434// encodes an arrayref containing key-value pairs as CBOR map
435ecb_inline void
436encode_array_as_map (enc_t *enc, SV *sv)
437{
438 if (enc->depth >= enc->cbor.max_depth)
439 croak (ERR_NESTING_EXCEEDED);
440
441 ++enc->depth;
442
443 // as_map does error checking for us, but we re-check in case
444 // things have changed.
445
446 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
447 croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
448
449 AV *av = (AV *)SvRV (sv);
450 int i, len = av_len (av);
451
452 if (!(len & 1))
453 croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
454
455 encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
456
457 for (i = 0; i <= len; ++i)
458 {
459 SV **svp = av_fetch (av, i, 0);
460 encode_sv (enc, svp ? *svp : &PL_sv_undef);
461 }
462
463 --enc->depth;
464}
465
466ecb_inline void
467encode_forced (enc_t *enc, UV type, SV *sv)
468{
469 switch (type)
470 {
471 case AS_CBOR:
472 {
473 STRLEN len;
474 char *str = SvPVbyte (sv, len);
475
476 need (enc, len);
477 memcpy (enc->cur, str, len);
478 enc->cur += len;
479 }
480 break;
481
482 case AS_BYTES:
483 {
484 STRLEN len;
485 char *str = SvPVbyte (sv, len);
486 encode_strref (enc, 0, 0, str, len);
487 }
488 break;
489
490 case AS_TEXT:
491 {
492 STRLEN len;
493 char *str = SvPVutf8 (sv, len);
494 encode_strref (enc, 1, 1, str, len);
495 }
496 break;
497
498 case AS_INT: encode_int (enc, sv); break;
499
500 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
501 case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
502 case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
503
504 case AS_MAP: encode_array_as_map (enc, sv); break;
505
506 default:
507 croak ("encountered malformed CBOR::XS::Tagged object");
508 }
509}
346 510
347static void 511static void
348encode_av (enc_t *enc, AV *av) 512encode_av (enc_t *enc, AV *av)
349{ 513{
350 int i, len = av_len (av); 514 int i, len = av_len (av);
354 518
355 ++enc->depth; 519 ++enc->depth;
356 520
357 encode_uint (enc, MAJOR_ARRAY, len + 1); 521 encode_uint (enc, MAJOR_ARRAY, len + 1);
358 522
359 if (SvMAGICAL (av)) 523 if (ecb_expect_false (SvMAGICAL (av)))
360 for (i = 0; i <= len; ++i) 524 for (i = 0; i <= len; ++i)
361 { 525 {
362 SV **svp = av_fetch (av, i, 0); 526 SV **svp = av_fetch (av, i, 0);
363 encode_sv (enc, svp ? *svp : &PL_sv_undef); 527 encode_sv (enc, svp ? *svp : &PL_sv_undef);
364 } 528 }
383 ++enc->depth; 547 ++enc->depth;
384 548
385 int pairs = hv_iterinit (hv); 549 int pairs = hv_iterinit (hv);
386 int mg = SvMAGICAL (hv); 550 int mg = SvMAGICAL (hv);
387 551
388 if (mg) 552 if (ecb_expect_false (mg))
389 encode_ch (enc, MAJOR_MAP | MINOR_INDEF); 553 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
390 else 554 else
391 encode_uint (enc, MAJOR_MAP, pairs); 555 encode_uint (enc, MAJOR_MAP, pairs);
392 556
393 while ((he = hv_iternext (hv))) 557 while ((he = hv_iternext (hv)))
398 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 562 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
399 563
400 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 564 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
401 } 565 }
402 566
403 if (mg) 567 if (ecb_expect_false (mg))
404 encode_ch (enc, MAJOR_MISC | MINOR_INDEF); 568 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
405 569
406 --enc->depth; 570 --enc->depth;
407} 571}
408 572
428 592
429 HV *stash = SvSTASH (sv); 593 HV *stash = SvSTASH (sv);
430 594
431 if (stash == boolean_stash) 595 if (stash == boolean_stash)
432 { 596 {
433 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); 597 encode_bool (enc, SvIV (sv));
434 return; 598 return;
435 } 599 }
436 else if (stash == error_stash) 600 else if (stash == error_stash)
437 { 601 {
438 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF); 602 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
441 else if (stash == tagged_stash) 605 else if (stash == tagged_stash)
442 { 606 {
443 if (svt != SVt_PVAV) 607 if (svt != SVt_PVAV)
444 croak ("encountered CBOR::XS::Tagged object that isn't an array"); 608 croak ("encountered CBOR::XS::Tagged object that isn't an array");
445 609
610 switch (av_len ((AV *)sv))
611 {
612 case 2-1:
613 // actually a tagged value
446 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); 614 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
447 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); 615 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
616 break;
617
618 case 3-1:
619 // a forced type [value, type, undef]
620 encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
621 break;
622
623 default:
624 croak ("encountered malformed CBOR::XS::Tagged object");
625 }
448 626
449 return; 627 return;
450 } 628 }
451 } 629 }
452 630
453 if (ecb_expect_false (SvREFCNT (sv) > 1) 631 if (ecb_expect_false (SvREFCNT (sv) > 1)
454 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)) 632 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
455 { 633 {
456 if (!enc->shareable) 634 if (ecb_expect_false (!enc->shareable))
457 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ()); 635 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
458 636
459 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1); 637 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
460 638
461 if (SvOK (*svp)) 639 if (SvOK (*svp))
507 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 685 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
508 { 686 {
509 dSP; 687 dSP;
510 688
511 ENTER; SAVETMPS; 689 ENTER; SAVETMPS;
512 SAVESTACK_POS ();
513 PUSHMARK (SP); 690 PUSHMARK (SP);
514 EXTEND (SP, 2); 691 EXTEND (SP, 2);
515 // we re-bless the reference to get overload and other niceties right 692 // we re-bless the reference to get overload and other niceties right
516 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 693 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
517 PUSHs (sv_cbor); 694 PUSHs (sv_cbor);
526 703
527 encode_tag (enc, CBOR_TAG_PERL_OBJECT); 704 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
528 encode_uint (enc, MAJOR_ARRAY, count + 1); 705 encode_uint (enc, MAJOR_ARRAY, count + 1);
529 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 706 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
530 707
531 while (count) 708 {
709 int i;
710
711 for (i = 0; i < count; ++i)
532 encode_sv (enc, SP[1 - count--]); 712 encode_sv (enc, SP[i + 1 - count]);
713
714 SP -= count;
715 }
533 716
534 PUTBACK; 717 PUTBACK;
535 718
536 FREETMPS; LEAVE; 719 FREETMPS; LEAVE;
537 } 720 }
559 742
560 if (ecb_expect_false (nv == (NV)(U32)nv)) 743 if (ecb_expect_false (nv == (NV)(U32)nv))
561 encode_uint (enc, MAJOR_POS_INT, (U32)nv); 744 encode_uint (enc, MAJOR_POS_INT, (U32)nv);
562 //TODO: maybe I32? 745 //TODO: maybe I32?
563 else if (ecb_expect_false (nv == (float)nv)) 746 else if (ecb_expect_false (nv == (float)nv))
564 { 747 encode_float32 (enc, nv);
565 uint32_t fp = ecb_float_to_binary32 (nv);
566
567 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
568
569 if (!ecb_big_endian ())
570 fp = ecb_bswap32 (fp);
571
572 memcpy (enc->cur, &fp, 4);
573 enc->cur += 4;
574 }
575 else 748 else
576 { 749 encode_float64 (enc, nv);
577 uint64_t fp = ecb_double_to_binary64 (nv);
578
579 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
580
581 if (!ecb_big_endian ())
582 fp = ecb_bswap64 (fp);
583
584 memcpy (enc->cur, &fp, 8);
585 enc->cur += 8;
586 }
587} 750}
588 751
589static void 752static void
590encode_sv (enc_t *enc, SV *sv) 753encode_sv (enc_t *enc, SV *sv)
591{ 754{
598 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); 761 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
599 } 762 }
600 else if (SvNOKp (sv)) 763 else if (SvNOKp (sv))
601 encode_nv (enc, sv); 764 encode_nv (enc, sv);
602 else if (SvIOKp (sv)) 765 else if (SvIOKp (sv))
603 { 766 encode_int (enc, sv);
604 if (SvIsUV (sv))
605 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
606 else if (SvIVX (sv) >= 0)
607 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
608 else
609 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
610 }
611 else if (SvROK (sv)) 767 else if (SvROK (sv))
612 encode_rv (enc, SvRV (sv)); 768 encode_rv (enc, SvRV (sv));
613 else if (!SvOK (sv)) 769 else if (!SvOK (sv))
614 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL); 770 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
615 else if (enc->cbor.flags & F_ALLOW_UNKNOWN) 771 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
622static SV * 778static SV *
623encode_cbor (SV *scalar, CBOR *cbor) 779encode_cbor (SV *scalar, CBOR *cbor)
624{ 780{
625 enc_t enc = { 0 }; 781 enc_t enc = { 0 };
626 782
627 enc.cbor = *cbor; 783 enc.cbor = *cbor;
628 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 784 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
629 enc.cur = SvPVX (enc.sv); 785 enc.cur = SvPVX (enc.sv);
630 enc.end = SvEND (enc.sv); 786 enc.end = SvEND (enc.sv);
631 787
632 SvPOK_only (enc.sv); 788 SvPOK_only (enc.sv);
633 789
634 if (cbor->flags & F_PACK_STRINGS) 790 if (cbor->flags & F_PACK_STRINGS)
635 { 791 {
693 849
694ecb_cold static void 850ecb_cold static void
695err_unexpected_end (dec_t *dec) 851err_unexpected_end (dec_t *dec)
696{ 852{
697 err_set (dec, "unexpected end of CBOR data"); 853 err_set (dec, "unexpected end of CBOR data");
698}
699
700ecb_cold static void
701err_nesting_exceeded (dec_t *dec)
702{
703 err_set (dec, ERR_NESTING_EXCEEDED);
704} 854}
705 855
706#define ERR_DO(do) SB do; goto fail; SE 856#define ERR_DO(do) SB do; goto fail; SE
707#define ERR(reason) ERR_DO (err_set (dec, reason)) 857#define ERR(reason) ERR_DO (err_set (dec, reason))
708#define ERR_ERRSV ERR_DO (err_errsv (dec)) 858#define ERR_ERRSV ERR_DO (err_errsv (dec))
783 933
784 for (;;) 934 for (;;)
785 { 935 {
786 WANT (1); 936 WANT (1);
787 937
788 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) 938 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
789 { 939 {
790 ++dec->cur; 940 ++dec->cur;
791 break; 941 break;
792 } 942 }
793 943
807 957
808 DEC_DEC_DEPTH; 958 DEC_DEC_DEPTH;
809 return newRV_noinc ((SV *)av); 959 return newRV_noinc ((SV *)av);
810 960
811fail: 961fail:
812 SvREFCNT_dec (av); 962 SvREFCNT_dec_NN (av);
813 DEC_DEC_DEPTH; 963 DEC_DEC_DEPTH;
814 return &PL_sv_undef; 964 return &PL_sv_undef;
815} 965}
816 966
817static void 967static void
840 990
841 WANT (len); 991 WANT (len);
842 dec->cur += len; 992 dec->cur += len;
843 993
844 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 994 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
845 if (!is_utf8_string (key, len)) 995 if (!cbor_is_utf8_string ((U8 *)key, len))
846 ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); 996 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
847 997
848 hv_store (hv, key, -len, decode_sv (dec), 0); 998 hv_store (hv, key, -len, decode_sv (dec), 0);
849 999
850 return; 1000 return;
880 1030
881 return; 1031 return;
882 } 1032 }
883 1033
884 hv_store_ent (hv, k, v, 0); 1034 hv_store_ent (hv, k, v, 0);
885 SvREFCNT_dec (k); 1035 SvREFCNT_dec_NN (k);
886 1036
887fail: 1037fail:
888 ; 1038 ;
889} 1039}
890 1040
901 1051
902 for (;;) 1052 for (;;)
903 { 1053 {
904 WANT (1); 1054 WANT (1);
905 1055
906 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) 1056 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
907 { 1057 {
908 ++dec->cur; 1058 ++dec->cur;
909 break; 1059 break;
910 } 1060 }
911 1061
924 1074
925 DEC_DEC_DEPTH; 1075 DEC_DEC_DEPTH;
926 return newRV_noinc ((SV *)hv); 1076 return newRV_noinc ((SV *)hv);
927 1077
928fail: 1078fail:
929 SvREFCNT_dec (hv); 1079 SvREFCNT_dec_NN (hv);
930 DEC_DEC_DEPTH; 1080 DEC_DEC_DEPTH;
931 return &PL_sv_undef; 1081 return &PL_sv_undef;
932} 1082}
933 1083
934static SV * 1084static SV *
935decode_str (dec_t *dec, int utf8) 1085decode_str (dec_t *dec, int utf8)
936{ 1086{
937 SV *sv = 0; 1087 SV *sv = 0;
938 1088
939 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF) 1089 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
940 { 1090 {
941 // indefinite length strings 1091 // indefinite length strings
942 ++dec->cur; 1092 ++dec->cur;
943 1093
944 U8 major = *dec->cur & MAJOR_MISC; 1094 U8 major = *dec->cur & MAJOR_MISC;
979 } 1129 }
980 1130
981 if (utf8) 1131 if (utf8)
982 { 1132 {
983 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 1133 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
984 if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) 1134 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
985 ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); 1135 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
986 1136
987 SvUTF8_on (sv); 1137 SvUTF8_on (sv);
988 } 1138 }
989 1139
1012 sv = newRV_noinc (decode_sv (dec)); 1162 sv = newRV_noinc (decode_sv (dec));
1013 break; 1163 break;
1014 1164
1015 case CBOR_TAG_STRINGREF_NAMESPACE: 1165 case CBOR_TAG_STRINGREF_NAMESPACE:
1016 { 1166 {
1017 // do nmot use SAVETMPS/FREETMPS, as these will 1167 // do not use SAVETMPS/FREETMPS, as these will
1018 // erase mortalised caches, e.g. "shareable" 1168 // erase mortalised caches, e.g. "shareable"
1019 ENTER; 1169 ENTER;
1020 1170
1021 SAVESPTR (dec->stringref); 1171 SAVESPTR (dec->stringref);
1022 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); 1172 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1032 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT)) 1182 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1033 ERR ("corrupted CBOR data (stringref index not an unsigned integer)"); 1183 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1034 1184
1035 UV idx = decode_uint (dec); 1185 UV idx = decode_uint (dec);
1036 1186
1037 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) 1187 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1038 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); 1188 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1039 1189
1040 sv = newSVsv (AvARRAY (dec->stringref)[idx]); 1190 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1041 } 1191 }
1042 break; 1192 break;
1046 if (ecb_expect_false (!dec->shareable)) 1196 if (ecb_expect_false (!dec->shareable))
1047 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 1197 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1048 1198
1049 if (dec->cbor.flags & F_ALLOW_CYCLES) 1199 if (dec->cbor.flags & F_ALLOW_CYCLES)
1050 { 1200 {
1201 // if cycles are allowed, then we store an AV as value
1202 // while it is being decoded, and gather unresolved
1203 // references in it, to be re4solved after decoding.
1204 int idx, i;
1051 sv = newSV (0); 1205 AV *av = newAV ();
1052 av_push (dec->shareable, SvREFCNT_inc_NN (sv)); 1206 av_push (dec->shareable, SvREFCNT_inc_NN ((SV *)av));
1207 idx = AvFILLp (dec->shareable);
1053 1208
1054 SV *osv = decode_sv (dec); 1209 sv = decode_sv (dec);
1055 sv_setsv (sv, osv); 1210
1211 // the AV now contains \undef for all unresolved references,
1212 // so we fix them up here.
1213 for (i = 0; i <= AvFILLp (av); ++i)
1214 SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv)));
1215
1216 // now replace the AV by a reference to the completed value
1056 SvREFCNT_dec_NN (osv); 1217 SvREFCNT_dec_NN (av);
1218 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1057 } 1219 }
1058 else 1220 else
1059 { 1221 {
1060 av_push (dec->shareable, &PL_sv_undef); 1222 av_push (dec->shareable, &PL_sv_undef);
1061 int idx = AvFILLp (dec->shareable); 1223 int idx = AvFILLp (dec->shareable);
1070 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT)) 1232 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1071 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)"); 1233 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1072 1234
1073 UV idx = decode_uint (dec); 1235 UV idx = decode_uint (dec);
1074 1236
1075 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) 1237 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1076 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 1238 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1077 1239
1078 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 1240 sv = AvARRAY (dec->shareable)[idx];
1079 1241
1080 if (sv == &PL_sv_undef) 1242 // reference to cycle, we create a new \undef and use that, and also
1243 // registerr it in the AV for later fixing
1244 if (SvTYPE (sv) == SVt_PVAV)
1245 {
1246 AV *av = (AV *)sv;
1247 sv = newRV_noinc (&PL_sv_undef);
1248 av_push (av, sv);
1249 }
1250 else if (sv == &PL_sv_undef) // not yet decoded, but cycles not allowed
1081 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); 1251 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1252 else // we decoded the object earlier, no cycle
1253 sv = newSVsv (sv);
1082 } 1254 }
1083 break; 1255 break;
1084 1256
1085 case CBOR_TAG_PERL_OBJECT: 1257 case CBOR_TAG_PERL_OBJECT:
1086 { 1258 {
1126 { 1298 {
1127 FREETMPS; LEAVE; 1299 FREETMPS; LEAVE;
1128 ERR_ERRSV; 1300 ERR_ERRSV;
1129 } 1301 }
1130 1302
1131 SvREFCNT_dec (sv); 1303 SvREFCNT_dec_NN (sv);
1132 sv = SvREFCNT_inc (POPs); 1304 sv = SvREFCNT_inc (POPs);
1133 1305
1134 PUTBACK; 1306 PUTBACK;
1135 1307
1136 FREETMPS; LEAVE; 1308 FREETMPS; LEAVE;
1144 1316
1145 sv = decode_sv (dec); 1317 sv = decode_sv (dec);
1146 1318
1147 dSP; 1319 dSP;
1148 ENTER; SAVETMPS; 1320 ENTER; SAVETMPS;
1149 SAVESTACK_POS ();
1150 PUSHMARK (SP); 1321 PUSHMARK (SP);
1151 EXTEND (SP, 2); 1322 EXTEND (SP, 2);
1152 PUSHs (tag_sv); 1323 PUSHs (tag_sv);
1153 PUSHs (sv); 1324 PUSHs (sv);
1154 1325
1156 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL); 1327 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1157 SPAGAIN; 1328 SPAGAIN;
1158 1329
1159 if (SvTRUE (ERRSV)) 1330 if (SvTRUE (ERRSV))
1160 { 1331 {
1161 SvREFCNT_dec (tag_sv); 1332 SvREFCNT_dec_NN (tag_sv);
1162 FREETMPS; LEAVE; 1333 FREETMPS; LEAVE;
1163 ERR_ERRSV; 1334 ERR_ERRSV;
1164 } 1335 }
1165 1336
1166 if (count) 1337 if (count)
1167 { 1338 {
1168 SvREFCNT_dec (tag_sv); 1339 SvREFCNT_dec_NN (tag_sv);
1169 SvREFCNT_dec (sv); 1340 SvREFCNT_dec_NN (sv);
1170 sv = SvREFCNT_inc (POPs); 1341 sv = SvREFCNT_inc_NN (TOPs);
1342 SP -= count;
1171 } 1343 }
1172 else 1344 else
1173 { 1345 {
1174 AV *av = newAV (); 1346 AV *av = newAV ();
1175 av_push (av, tag_sv); 1347 av_push (av, tag_sv);
1318 for (i = av_len (dec.shareable) + 1; i--; ) 1490 for (i = av_len (dec.shareable) + 1; i--; )
1319 if ((svp = av_fetch (dec.shareable, i, 0))) 1491 if ((svp = av_fetch (dec.shareable, i, 0)))
1320 sv_setsv (*svp, &PL_sv_undef); 1492 sv_setsv (*svp, &PL_sv_undef);
1321 } 1493 }
1322 1494
1323 SvREFCNT_dec (sv); 1495 SvREFCNT_dec_NN (sv);
1324 1496
1325 if (dec.err_sv) 1497 if (dec.err_sv)
1326 sv_2mortal (dec.err_sv); 1498 sv_2mortal (dec.err_sv);
1327 1499
1328 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1500 croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur);
1329 } 1501 }
1330 1502
1331 sv = sv_2mortal (sv); 1503 sv = sv_2mortal (sv);
1332 1504
1333 return sv; 1505 return sv;
1422 1594
1423 break; 1595 break;
1424 1596
1425 case MAJOR_MAP >> MAJOR_SHIFT: 1597 case MAJOR_MAP >> MAJOR_SHIFT:
1426 len <<= 1; 1598 len <<= 1;
1599 /* FALLTHROUGH */
1427 case MAJOR_ARRAY >> MAJOR_SHIFT: 1600 case MAJOR_ARRAY >> MAJOR_SHIFT:
1428 if (len) 1601 if (len)
1429 { 1602 {
1430 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest 1603 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1431 count = len + 1; 1604 count = len + 1;
1671 cbor_init (&cbor); 1844 cbor_init (&cbor);
1672 PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN; 1845 PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1673 XPUSHs (cborstr); 1846 XPUSHs (cborstr);
1674} 1847}
1675 1848
1849#ifdef __AFL_COMPILER
1850
1851void
1852afl_init ()
1853 CODE:
1854 __AFL_INIT ();
1855
1856int
1857afl_loop (unsigned int count = 10000)
1858 CODE:
1859 RETVAL = __AFL_LOOP (count);
1860 OUTPUT:
1861 RETVAL
1862
1863#endif
1864

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines