ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.61 by root, Sat Nov 26 02:10:19 2016 UTC vs.
Revision 1.79 by root, Fri Sep 8 06:05:01 2023 UTC

21# define HvNAMELEN(hv) HvNAMELEN_get (hv) 21# define HvNAMELEN(hv) HvNAMELEN_get (hv)
22#endif 22#endif
23#ifndef HvNAMEUTF8 23#ifndef HvNAMEUTF8
24# define HvNAMEUTF8(hv) 0 24# define HvNAMEUTF8(hv) 0
25#endif 25#endif
26#ifndef SvREFCNT_inc_NN
27# define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28#endif
26#ifndef SvREFCNT_dec_NN 29#ifndef SvREFCNT_dec_NN
27# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) 30# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
28#endif 31#endif
32
33// perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34#define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
29 35
30// known major and minor types 36// known major and minor types
31enum cbor_type 37enum cbor_type
32{ 38{
33 MAJOR_SHIFT = 5, 39 MAJOR_SHIFT = 5,
94 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 100 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
95 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 101 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
96 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 102 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
97 103
98 CBOR_TAG_MAGIC = 55799, // self-describe cbor 104 CBOR_TAG_MAGIC = 55799, // self-describe cbor
105};
106
107// known forced types, also hardcoded in CBOR.pm
108enum
109{
110 AS_CBOR = 0,
111 AS_INT = 1,
112 AS_BYTES = 2,
113 AS_TEXT = 3,
114 AS_FLOAT16 = 4,
115 AS_FLOAT32 = 5,
116 AS_FLOAT64 = 6,
117 AS_MAP = 7,
118 // possibly future enhancements: (generic) float, (generic) string
99}; 119};
100 120
101#define F_SHRINK 0x00000001UL 121#define F_SHRINK 0x00000001UL
102#define F_ALLOW_UNKNOWN 0x00000002UL 122#define F_ALLOW_UNKNOWN 0x00000002UL
103#define F_ALLOW_SHARING 0x00000004UL 123#define F_ALLOW_SHARING 0x00000004UL
184#endif 204#endif
185 } 205 }
186} 206}
187 207
188// minimum length of a string to be registered for stringref 208// minimum length of a string to be registered for stringref
189ecb_inline int 209ecb_inline STRLEN
190minimum_string_length (UV idx) 210minimum_string_length (UV idx)
191{ 211{
192
193 return idx <= 23 ? 3 212 return idx <= 23 ? 3
194 : idx <= 0xffU ? 4 213 : idx <= 0xffU ? 4
195 : idx <= 0xffffU ? 5 214 : idx <= 0xffffU ? 5
196 : idx <= 0xffffffffU ? 7 215 : idx <= 0xffffffffU ? 7
197 : 11; 216 : 11;
224 enc->cur = SvPVX (enc->sv) + cur; 243 enc->cur = SvPVX (enc->sv) + cur;
225 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; 244 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
226 } 245 }
227} 246}
228 247
248static void encode_sv (enc_t *enc, SV *sv);
249
229ecb_inline void 250ecb_inline void
230encode_ch (enc_t *enc, char ch) 251encode_ch (enc_t *enc, char ch)
231{ 252{
232 need (enc, 1); 253 need (enc, 1);
233 *enc->cur++ = ch; 254 *enc->cur++ = ch;
234} 255}
235 256
257// used for tags, intregers, element counts and so on
236static void 258static void
237encode_uint (enc_t *enc, int major, UV len) 259encode_uint (enc_t *enc, int major, UV len)
238{ 260{
239 need (enc, 9); 261 need (enc, 9);
240 262
271 *enc->cur++ = len >> 8; 293 *enc->cur++ = len >> 8;
272 *enc->cur++ = len; 294 *enc->cur++ = len;
273 } 295 }
274} 296}
275 297
298// encodes a perl value into a CBOR integer
299ecb_inline void
300encode_int (enc_t *enc, SV *sv)
301{
302 if (SvIsUV (sv))
303 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
304 else if (SvIVX (sv) >= 0)
305 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
306 else
307 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
308}
309
276ecb_inline void 310ecb_inline void
277encode_tag (enc_t *enc, UV tag) 311encode_tag (enc_t *enc, UV tag)
278{ 312{
279 encode_uint (enc, MAJOR_TAG, tag); 313 encode_uint (enc, MAJOR_TAG, tag);
280} 314}
341 } 375 }
342 376
343 encode_str (enc, upgrade_utf8, utf8, str, len); 377 encode_str (enc, upgrade_utf8, utf8, str, len);
344} 378}
345 379
346static void encode_sv (enc_t *enc, SV *sv); 380ecb_inline void
381encode_float16 (enc_t *enc, NV nv)
382{
383 need (enc, 1+2);
384
385 *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
386
387 uint16_t fp = ecb_float_to_binary16 (nv);
388
389 if (!ecb_big_endian ())
390 fp = ecb_bswap16 (fp);
391
392 memcpy (enc->cur, &fp, 2);
393 enc->cur += 2;
394}
395
396ecb_inline void
397encode_float32 (enc_t *enc, NV nv)
398{
399 need (enc, 1+4);
400
401 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
402
403 uint32_t fp = ecb_float_to_binary32 (nv);
404
405 if (!ecb_big_endian ())
406 fp = ecb_bswap32 (fp);
407
408 memcpy (enc->cur, &fp, 4);
409 enc->cur += 4;
410}
411
412ecb_inline void
413encode_float64 (enc_t *enc, NV nv)
414{
415 need (enc, 1+8);
416
417 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
418
419 uint64_t fp = ecb_double_to_binary64 (nv);
420
421 if (!ecb_big_endian ())
422 fp = ecb_bswap64 (fp);
423
424 memcpy (enc->cur, &fp, 8);
425 enc->cur += 8;
426}
427
428ecb_inline void
429encode_bool (enc_t *enc, int istrue)
430{
431 encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
432}
433
434// encodes an arrayref containing key-value pairs as CBOR map
435ecb_inline void
436encode_array_as_map (enc_t *enc, SV *sv)
437{
438 if (enc->depth >= enc->cbor.max_depth)
439 croak (ERR_NESTING_EXCEEDED);
440
441 ++enc->depth;
442
443 // as_map does error checking for us, but we re-check in case
444 // things have changed.
445
446 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
447 croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
448
449 AV *av = (AV *)SvRV (sv);
450 int i, len = av_len (av);
451
452 if (!(len & 1))
453 croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
454
455 encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
456
457 for (i = 0; i <= len; ++i)
458 {
459 SV **svp = av_fetch (av, i, 0);
460 encode_sv (enc, svp ? *svp : &PL_sv_undef);
461 }
462
463 --enc->depth;
464}
465
466ecb_inline void
467encode_forced (enc_t *enc, UV type, SV *sv)
468{
469 switch (type)
470 {
471 case AS_CBOR:
472 {
473 STRLEN len;
474 char *str = SvPVbyte (sv, len);
475
476 need (enc, len);
477 memcpy (enc->cur, str, len);
478 enc->cur += len;
479 }
480 break;
481
482 case AS_BYTES:
483 {
484 STRLEN len;
485 char *str = SvPVbyte (sv, len);
486 encode_strref (enc, 0, 0, str, len);
487 }
488 break;
489
490 case AS_TEXT:
491 {
492 STRLEN len;
493 char *str = SvPVutf8 (sv, len);
494 encode_strref (enc, 1, 1, str, len);
495 }
496 break;
497
498 case AS_INT: encode_int (enc, sv); break;
499
500 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
501 case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
502 case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
503
504 case AS_MAP: encode_array_as_map (enc, sv); break;
505
506 default:
507 croak ("encountered malformed CBOR::XS::Tagged object");
508 }
509}
347 510
348static void 511static void
349encode_av (enc_t *enc, AV *av) 512encode_av (enc_t *enc, AV *av)
350{ 513{
351 int i, len = av_len (av); 514 int i, len = av_len (av);
355 518
356 ++enc->depth; 519 ++enc->depth;
357 520
358 encode_uint (enc, MAJOR_ARRAY, len + 1); 521 encode_uint (enc, MAJOR_ARRAY, len + 1);
359 522
360 if (SvMAGICAL (av)) 523 if (ecb_expect_false (SvMAGICAL (av)))
361 for (i = 0; i <= len; ++i) 524 for (i = 0; i <= len; ++i)
362 { 525 {
363 SV **svp = av_fetch (av, i, 0); 526 SV **svp = av_fetch (av, i, 0);
364 encode_sv (enc, svp ? *svp : &PL_sv_undef); 527 encode_sv (enc, svp ? *svp : &PL_sv_undef);
365 } 528 }
384 ++enc->depth; 547 ++enc->depth;
385 548
386 int pairs = hv_iterinit (hv); 549 int pairs = hv_iterinit (hv);
387 int mg = SvMAGICAL (hv); 550 int mg = SvMAGICAL (hv);
388 551
389 if (mg) 552 if (ecb_expect_false (mg))
390 encode_ch (enc, MAJOR_MAP | MINOR_INDEF); 553 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
391 else 554 else
392 encode_uint (enc, MAJOR_MAP, pairs); 555 encode_uint (enc, MAJOR_MAP, pairs);
393 556
394 while ((he = hv_iternext (hv))) 557 while ((he = hv_iternext (hv)))
399 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 562 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
400 563
401 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 564 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
402 } 565 }
403 566
404 if (mg) 567 if (ecb_expect_false (mg))
405 encode_ch (enc, MAJOR_MISC | MINOR_INDEF); 568 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
406 569
407 --enc->depth; 570 --enc->depth;
408} 571}
409 572
429 592
430 HV *stash = SvSTASH (sv); 593 HV *stash = SvSTASH (sv);
431 594
432 if (stash == boolean_stash) 595 if (stash == boolean_stash)
433 { 596 {
434 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); 597 encode_bool (enc, SvIV (sv));
435 return; 598 return;
436 } 599 }
437 else if (stash == error_stash) 600 else if (stash == error_stash)
438 { 601 {
439 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF); 602 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
442 else if (stash == tagged_stash) 605 else if (stash == tagged_stash)
443 { 606 {
444 if (svt != SVt_PVAV) 607 if (svt != SVt_PVAV)
445 croak ("encountered CBOR::XS::Tagged object that isn't an array"); 608 croak ("encountered CBOR::XS::Tagged object that isn't an array");
446 609
610 switch (av_len ((AV *)sv))
611 {
612 case 2-1:
613 // actually a tagged value
447 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); 614 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
448 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); 615 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
616 break;
617
618 case 3-1:
619 // a forced type [value, type, undef]
620 encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
621 break;
622
623 default:
624 croak ("encountered malformed CBOR::XS::Tagged object");
625 }
449 626
450 return; 627 return;
451 } 628 }
452 } 629 }
453 630
454 if (ecb_expect_false (SvREFCNT (sv) > 1) 631 if (ecb_expect_false (SvREFCNT (sv) > 1)
455 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)) 632 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
456 { 633 {
457 if (!enc->shareable) 634 if (ecb_expect_false (!enc->shareable))
458 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ()); 635 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
459 636
460 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1); 637 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
461 638
462 if (SvOK (*svp)) 639 if (SvOK (*svp))
508 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 685 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
509 { 686 {
510 dSP; 687 dSP;
511 688
512 ENTER; SAVETMPS; 689 ENTER; SAVETMPS;
513 SAVESTACK_POS ();
514 PUSHMARK (SP); 690 PUSHMARK (SP);
515 EXTEND (SP, 2); 691 EXTEND (SP, 2);
516 // we re-bless the reference to get overload and other niceties right 692 // we re-bless the reference to get overload and other niceties right
517 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 693 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
518 PUSHs (sv_cbor); 694 PUSHs (sv_cbor);
527 703
528 encode_tag (enc, CBOR_TAG_PERL_OBJECT); 704 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
529 encode_uint (enc, MAJOR_ARRAY, count + 1); 705 encode_uint (enc, MAJOR_ARRAY, count + 1);
530 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 706 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
531 707
532 while (count) 708 {
709 int i;
710
711 for (i = 0; i < count; ++i)
533 encode_sv (enc, SP[1 - count--]); 712 encode_sv (enc, SP[i + 1 - count]);
713
714 SP -= count;
715 }
534 716
535 PUTBACK; 717 PUTBACK;
536 718
537 FREETMPS; LEAVE; 719 FREETMPS; LEAVE;
538 } 720 }
560 742
561 if (ecb_expect_false (nv == (NV)(U32)nv)) 743 if (ecb_expect_false (nv == (NV)(U32)nv))
562 encode_uint (enc, MAJOR_POS_INT, (U32)nv); 744 encode_uint (enc, MAJOR_POS_INT, (U32)nv);
563 //TODO: maybe I32? 745 //TODO: maybe I32?
564 else if (ecb_expect_false (nv == (float)nv)) 746 else if (ecb_expect_false (nv == (float)nv))
565 { 747 encode_float32 (enc, nv);
566 uint32_t fp = ecb_float_to_binary32 (nv);
567
568 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
569
570 if (!ecb_big_endian ())
571 fp = ecb_bswap32 (fp);
572
573 memcpy (enc->cur, &fp, 4);
574 enc->cur += 4;
575 }
576 else 748 else
577 { 749 encode_float64 (enc, nv);
578 uint64_t fp = ecb_double_to_binary64 (nv);
579
580 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
581
582 if (!ecb_big_endian ())
583 fp = ecb_bswap64 (fp);
584
585 memcpy (enc->cur, &fp, 8);
586 enc->cur += 8;
587 }
588} 750}
589 751
590static void 752static void
591encode_sv (enc_t *enc, SV *sv) 753encode_sv (enc_t *enc, SV *sv)
592{ 754{
599 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); 761 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
600 } 762 }
601 else if (SvNOKp (sv)) 763 else if (SvNOKp (sv))
602 encode_nv (enc, sv); 764 encode_nv (enc, sv);
603 else if (SvIOKp (sv)) 765 else if (SvIOKp (sv))
604 { 766 encode_int (enc, sv);
605 if (SvIsUV (sv))
606 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
607 else if (SvIVX (sv) >= 0)
608 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
609 else
610 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
611 }
612 else if (SvROK (sv)) 767 else if (SvROK (sv))
613 encode_rv (enc, SvRV (sv)); 768 encode_rv (enc, SvRV (sv));
614 else if (!SvOK (sv)) 769 else if (!SvOK (sv))
615 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL); 770 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
616 else if (enc->cbor.flags & F_ALLOW_UNKNOWN) 771 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
623static SV * 778static SV *
624encode_cbor (SV *scalar, CBOR *cbor) 779encode_cbor (SV *scalar, CBOR *cbor)
625{ 780{
626 enc_t enc = { 0 }; 781 enc_t enc = { 0 };
627 782
628 enc.cbor = *cbor; 783 enc.cbor = *cbor;
629 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 784 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
630 enc.cur = SvPVX (enc.sv); 785 enc.cur = SvPVX (enc.sv);
631 enc.end = SvEND (enc.sv); 786 enc.end = SvEND (enc.sv);
632 787
633 SvPOK_only (enc.sv); 788 SvPOK_only (enc.sv);
634 789
635 if (cbor->flags & F_PACK_STRINGS) 790 if (cbor->flags & F_PACK_STRINGS)
636 { 791 {
694 849
695ecb_cold static void 850ecb_cold static void
696err_unexpected_end (dec_t *dec) 851err_unexpected_end (dec_t *dec)
697{ 852{
698 err_set (dec, "unexpected end of CBOR data"); 853 err_set (dec, "unexpected end of CBOR data");
699}
700
701ecb_cold static void
702err_nesting_exceeded (dec_t *dec)
703{
704 err_set (dec, ERR_NESTING_EXCEEDED);
705} 854}
706 855
707#define ERR_DO(do) SB do; goto fail; SE 856#define ERR_DO(do) SB do; goto fail; SE
708#define ERR(reason) ERR_DO (err_set (dec, reason)) 857#define ERR(reason) ERR_DO (err_set (dec, reason))
709#define ERR_ERRSV ERR_DO (err_errsv (dec)) 858#define ERR_ERRSV ERR_DO (err_errsv (dec))
784 933
785 for (;;) 934 for (;;)
786 { 935 {
787 WANT (1); 936 WANT (1);
788 937
789 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) 938 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
790 { 939 {
791 ++dec->cur; 940 ++dec->cur;
792 break; 941 break;
793 } 942 }
794 943
808 957
809 DEC_DEC_DEPTH; 958 DEC_DEC_DEPTH;
810 return newRV_noinc ((SV *)av); 959 return newRV_noinc ((SV *)av);
811 960
812fail: 961fail:
813 SvREFCNT_dec (av); 962 SvREFCNT_dec_NN (av);
814 DEC_DEC_DEPTH; 963 DEC_DEC_DEPTH;
815 return &PL_sv_undef; 964 return &PL_sv_undef;
816} 965}
817 966
818static void 967static void
841 990
842 WANT (len); 991 WANT (len);
843 dec->cur += len; 992 dec->cur += len;
844 993
845 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 994 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
846 if (!is_utf8_string (key, len)) 995 if (!cbor_is_utf8_string ((U8 *)key, len))
847 ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); 996 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
848 997
849 hv_store (hv, key, -len, decode_sv (dec), 0); 998 hv_store (hv, key, -len, decode_sv (dec), 0);
850 999
851 return; 1000 return;
881 1030
882 return; 1031 return;
883 } 1032 }
884 1033
885 hv_store_ent (hv, k, v, 0); 1034 hv_store_ent (hv, k, v, 0);
886 SvREFCNT_dec (k); 1035 SvREFCNT_dec_NN (k);
887 1036
888fail: 1037fail:
889 ; 1038 ;
890} 1039}
891 1040
902 1051
903 for (;;) 1052 for (;;)
904 { 1053 {
905 WANT (1); 1054 WANT (1);
906 1055
907 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) 1056 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
908 { 1057 {
909 ++dec->cur; 1058 ++dec->cur;
910 break; 1059 break;
911 } 1060 }
912 1061
925 1074
926 DEC_DEC_DEPTH; 1075 DEC_DEC_DEPTH;
927 return newRV_noinc ((SV *)hv); 1076 return newRV_noinc ((SV *)hv);
928 1077
929fail: 1078fail:
930 SvREFCNT_dec (hv); 1079 SvREFCNT_dec_NN (hv);
931 DEC_DEC_DEPTH; 1080 DEC_DEC_DEPTH;
932 return &PL_sv_undef; 1081 return &PL_sv_undef;
933} 1082}
934 1083
935static SV * 1084static SV *
936decode_str (dec_t *dec, int utf8) 1085decode_str (dec_t *dec, int utf8)
937{ 1086{
938 SV *sv = 0; 1087 SV *sv = 0;
939 1088
940 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF) 1089 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
941 { 1090 {
942 // indefinite length strings 1091 // indefinite length strings
943 ++dec->cur; 1092 ++dec->cur;
944 1093
945 U8 major = *dec->cur & MAJOR_MISC; 1094 U8 major = *dec->cur & MAJOR_MISC;
980 } 1129 }
981 1130
982 if (utf8) 1131 if (utf8)
983 { 1132 {
984 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 1133 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
985 if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) 1134 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
986 ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); 1135 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
987 1136
988 SvUTF8_on (sv); 1137 SvUTF8_on (sv);
989 } 1138 }
990 1139
1013 sv = newRV_noinc (decode_sv (dec)); 1162 sv = newRV_noinc (decode_sv (dec));
1014 break; 1163 break;
1015 1164
1016 case CBOR_TAG_STRINGREF_NAMESPACE: 1165 case CBOR_TAG_STRINGREF_NAMESPACE:
1017 { 1166 {
1018 // do nmot use SAVETMPS/FREETMPS, as these will 1167 // do not use SAVETMPS/FREETMPS, as these will
1019 // erase mortalised caches, e.g. "shareable" 1168 // erase mortalised caches, e.g. "shareable"
1020 ENTER; 1169 ENTER;
1021 1170
1022 SAVESPTR (dec->stringref); 1171 SAVESPTR (dec->stringref);
1023 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); 1172 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1033 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT)) 1182 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1034 ERR ("corrupted CBOR data (stringref index not an unsigned integer)"); 1183 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1035 1184
1036 UV idx = decode_uint (dec); 1185 UV idx = decode_uint (dec);
1037 1186
1038 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) 1187 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1039 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); 1188 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1040 1189
1041 sv = newSVsv (AvARRAY (dec->stringref)[idx]); 1190 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1042 } 1191 }
1043 break; 1192 break;
1047 if (ecb_expect_false (!dec->shareable)) 1196 if (ecb_expect_false (!dec->shareable))
1048 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 1197 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1049 1198
1050 if (dec->cbor.flags & F_ALLOW_CYCLES) 1199 if (dec->cbor.flags & F_ALLOW_CYCLES)
1051 { 1200 {
1201 // if cycles are allowed, then we store an AV as value
1202 // while it is being decoded, and gather unresolved
1203 // references in it, to be re4solved after decoding.
1204 int idx, i;
1052 sv = newSV (0); 1205 AV *av = newAV ();
1053 av_push (dec->shareable, SvREFCNT_inc_NN (sv)); 1206 av_push (dec->shareable, (SV *)av);
1207 idx = AvFILLp (dec->shareable);
1054 1208
1055 SV *osv = decode_sv (dec); 1209 sv = decode_sv (dec);
1056 sv_setsv (sv, osv); 1210
1211 // the AV now contains \undef for all unresolved references,
1212 // so we fix them up here.
1213 for (i = 0; i <= AvFILLp (av); ++i)
1214 SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv)));
1215
1216 // now replace the AV by a reference to the completed value
1057 SvREFCNT_dec_NN (osv); 1217 SvREFCNT_dec_NN ((SV *)av);
1218 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1058 } 1219 }
1059 else 1220 else
1060 { 1221 {
1061 av_push (dec->shareable, &PL_sv_undef); 1222 av_push (dec->shareable, &PL_sv_undef);
1062 int idx = AvFILLp (dec->shareable); 1223 int idx = AvFILLp (dec->shareable);
1063 sv = decode_sv (dec); 1224 sv = decode_sv (dec);
1064 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv)); 1225 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1065 } 1226 }
1066 } 1227 }
1067 break; 1228 break;
1068 1229
1069 case CBOR_TAG_VALUE_SHAREDREF: 1230 case CBOR_TAG_VALUE_SHAREDREF:
1071 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT)) 1232 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1072 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)"); 1233 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1073 1234
1074 UV idx = decode_uint (dec); 1235 UV idx = decode_uint (dec);
1075 1236
1076 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) 1237 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1077 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 1238 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1078 1239
1079 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 1240 sv = AvARRAY (dec->shareable)[idx];
1080 1241
1081 if (sv == &PL_sv_undef) 1242 // reference to cycle, we create a new \undef and use that, and also
1243 // registerr it in the AV for later fixing
1244 if (SvTYPE (sv) == SVt_PVAV)
1245 {
1246 AV *av = (AV *)sv;
1247 sv = newRV_noinc (&PL_sv_undef);
1248 av_push (av, SvREFCNT_inc_NN (sv));
1249 }
1250 else if (sv == &PL_sv_undef) // not yet decoded, but cycles not allowed
1082 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); 1251 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1252 else // we decoded the object earlier, no cycle
1253 sv = newSVsv (sv);
1083 } 1254 }
1084 break; 1255 break;
1085 1256
1086 case CBOR_TAG_PERL_OBJECT: 1257 case CBOR_TAG_PERL_OBJECT:
1087 { 1258 {
1127 { 1298 {
1128 FREETMPS; LEAVE; 1299 FREETMPS; LEAVE;
1129 ERR_ERRSV; 1300 ERR_ERRSV;
1130 } 1301 }
1131 1302
1132 SvREFCNT_dec (sv); 1303 SvREFCNT_dec_NN (sv);
1133 sv = SvREFCNT_inc (POPs); 1304 sv = SvREFCNT_inc (POPs);
1134 1305
1135 PUTBACK; 1306 PUTBACK;
1136 1307
1137 FREETMPS; LEAVE; 1308 FREETMPS; LEAVE;
1145 1316
1146 sv = decode_sv (dec); 1317 sv = decode_sv (dec);
1147 1318
1148 dSP; 1319 dSP;
1149 ENTER; SAVETMPS; 1320 ENTER; SAVETMPS;
1150 SAVESTACK_POS ();
1151 PUSHMARK (SP); 1321 PUSHMARK (SP);
1152 EXTEND (SP, 2); 1322 EXTEND (SP, 2);
1153 PUSHs (tag_sv); 1323 PUSHs (tag_sv);
1154 PUSHs (sv); 1324 PUSHs (sv);
1155 1325
1157 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL); 1327 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1158 SPAGAIN; 1328 SPAGAIN;
1159 1329
1160 if (SvTRUE (ERRSV)) 1330 if (SvTRUE (ERRSV))
1161 { 1331 {
1162 SvREFCNT_dec (tag_sv); 1332 SvREFCNT_dec_NN (tag_sv);
1163 FREETMPS; LEAVE; 1333 FREETMPS; LEAVE;
1164 ERR_ERRSV; 1334 ERR_ERRSV;
1165 } 1335 }
1166 1336
1167 if (count) 1337 if (count)
1168 { 1338 {
1169 SvREFCNT_dec (tag_sv); 1339 SvREFCNT_dec_NN (tag_sv);
1170 SvREFCNT_dec (sv); 1340 SvREFCNT_dec_NN (sv);
1171 sv = SvREFCNT_inc (POPs); 1341 sv = SvREFCNT_inc_NN (TOPs);
1342 SP -= count;
1172 } 1343 }
1173 else 1344 else
1174 { 1345 {
1175 AV *av = newAV (); 1346 AV *av = newAV ();
1176 av_push (av, tag_sv); 1347 av_push (av, tag_sv);
1319 for (i = av_len (dec.shareable) + 1; i--; ) 1490 for (i = av_len (dec.shareable) + 1; i--; )
1320 if ((svp = av_fetch (dec.shareable, i, 0))) 1491 if ((svp = av_fetch (dec.shareable, i, 0)))
1321 sv_setsv (*svp, &PL_sv_undef); 1492 sv_setsv (*svp, &PL_sv_undef);
1322 } 1493 }
1323 1494
1324 SvREFCNT_dec (sv); 1495 SvREFCNT_dec_NN (sv);
1325 1496
1326 if (dec.err_sv) 1497 if (dec.err_sv)
1327 sv_2mortal (dec.err_sv); 1498 sv_2mortal (dec.err_sv);
1328 1499
1329 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1500 croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur);
1330 } 1501 }
1331 1502
1332 sv = sv_2mortal (sv); 1503 sv = sv_2mortal (sv);
1333 1504
1334 return sv; 1505 return sv;
1423 1594
1424 break; 1595 break;
1425 1596
1426 case MAJOR_MAP >> MAJOR_SHIFT: 1597 case MAJOR_MAP >> MAJOR_SHIFT:
1427 len <<= 1; 1598 len <<= 1;
1599 /* FALLTHROUGH */
1428 case MAJOR_ARRAY >> MAJOR_SHIFT: 1600 case MAJOR_ARRAY >> MAJOR_SHIFT:
1429 if (len) 1601 if (len)
1430 { 1602 {
1431 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest 1603 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1432 count = len + 1; 1604 count = len + 1;
1672 cbor_init (&cbor); 1844 cbor_init (&cbor);
1673 PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN; 1845 PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1674 XPUSHs (cborstr); 1846 XPUSHs (cborstr);
1675} 1847}
1676 1848
1849#ifdef __AFL_COMPILER
1850
1851void
1852afl_init ()
1853 CODE:
1854 __AFL_INIT ();
1855
1856int
1857afl_loop (unsigned int count = 10000)
1858 CODE:
1859 RETVAL = __AFL_LOOP (count);
1860 OUTPUT:
1861 RETVAL
1862
1863#endif
1864

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines