ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.36 by root, Sat Nov 30 17:37:45 2013 UTC vs.
Revision 1.58 by root, Fri Nov 25 13:27:29 2016 UTC

7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#define ECB_NO_THREADS 1
12#include "ecb.h" 13#include "ecb.h"
13 14
14// compatibility with perl <5.18 15// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 16#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 17# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
97}; 98};
98 99
99#define F_SHRINK 0x00000001UL 100#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 101#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 102#define F_ALLOW_SHARING 0x00000004UL
103#define F_ALLOW_CYCLES 0x00000008UL
102#define F_PACK_STRINGS 0x00000008UL 104#define F_PACK_STRINGS 0x00000010UL
105#define F_TEXT_KEYS 0x00000020UL
106#define F_TEXT_STRINGS 0x00000040UL
107#define F_VALIDATE_UTF8 0x00000080UL
103 108
104#define INIT_SIZE 32 // initial scalar size to be allocated 109#define INIT_SIZE 32 // initial scalar size to be allocated
105 110
106#define SB do { 111#define SB do {
107#define SE } while (0) 112#define SE } while (0)
126typedef struct { 131typedef struct {
127 U32 flags; 132 U32 flags;
128 U32 max_depth; 133 U32 max_depth;
129 STRLEN max_size; 134 STRLEN max_size;
130 SV *filter; 135 SV *filter;
136
137 // for the incremental parser
138 STRLEN incr_pos; // the current offset into the text
139 STRLEN incr_need; // minimum bytes needed to decode
140 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
131} CBOR; 141} CBOR;
132 142
133ecb_inline void 143ecb_inline void
134cbor_init (CBOR *cbor) 144cbor_init (CBOR *cbor)
135{ 145{
139 149
140ecb_inline void 150ecb_inline void
141cbor_free (CBOR *cbor) 151cbor_free (CBOR *cbor)
142{ 152{
143 SvREFCNT_dec (cbor->filter); 153 SvREFCNT_dec (cbor->filter);
154 SvREFCNT_dec (cbor->incr_count);
144} 155}
145 156
146///////////////////////////////////////////////////////////////////////////// 157/////////////////////////////////////////////////////////////////////////////
147// utility functions 158// utility functions
148 159
267encode_tag (enc_t *enc, UV tag) 278encode_tag (enc_t *enc, UV tag)
268{ 279{
269 encode_uint (enc, MAJOR_TAG, tag); 280 encode_uint (enc, MAJOR_TAG, tag);
270} 281}
271 282
283// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284ecb_noinline static void
285encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286{
287 STRLEN ulen = len;
288 U8 *p, *pend = (U8 *)str + len;
289
290 for (p = (U8 *)str; p < pend; ++p)
291 ulen += *p >> 7; // count set high bits
292
293 encode_uint (enc, MAJOR_TEXT, ulen);
294
295 need (enc, ulen);
296 for (p = (U8 *)str; p < pend; ++p)
297 if (*p < 0x80)
298 *enc->cur++ = *p;
299 else
300 {
301 *enc->cur++ = 0xc0 + (*p >> 6);
302 *enc->cur++ = 0x80 + (*p & 63);
303 }
304}
305
272ecb_inline void 306ecb_inline void
273encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 307encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
274{ 308{
309 if (ecb_expect_false (upgrade_utf8))
310 if (!utf8)
311 {
312 encode_str_utf8 (enc, utf8, str, len);
313 return;
314 }
315
275 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); 316 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
276 need (enc, len); 317 need (enc, len);
277 memcpy (enc->cur, str, len); 318 memcpy (enc->cur, str, len);
278 enc->cur += len; 319 enc->cur += len;
279} 320}
280 321
281static void 322ecb_inline void
282encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) 323encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
283{ 324{
284 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) 325 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
285 { 326 {
286 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); 327 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
287 328
298 sv_setuv (*svp, enc->stringref_idx); 339 sv_setuv (*svp, enc->stringref_idx);
299 ++enc->stringref_idx; 340 ++enc->stringref_idx;
300 } 341 }
301 } 342 }
302 343
303 encode_str (enc, utf8, str, len); 344 encode_str (enc, upgrade_utf8, utf8, str, len);
304} 345}
305 346
306static void encode_sv (enc_t *enc, SV *sv); 347static void encode_sv (enc_t *enc, SV *sv);
307 348
308static void 349static void
315 356
316 ++enc->depth; 357 ++enc->depth;
317 358
318 encode_uint (enc, MAJOR_ARRAY, len + 1); 359 encode_uint (enc, MAJOR_ARRAY, len + 1);
319 360
361 if (SvMAGICAL (av))
320 for (i = 0; i <= len; ++i) 362 for (i = 0; i <= len; ++i)
321 { 363 {
322 SV **svp = av_fetch (av, i, 0); 364 SV **svp = av_fetch (av, i, 0);
323 encode_sv (enc, svp ? *svp : &PL_sv_undef); 365 encode_sv (enc, svp ? *svp : &PL_sv_undef);
324 } 366 }
367 else
368 for (i = 0; i <= len; ++i)
369 {
370 SV *sv = AvARRAY (av)[i];
371 encode_sv (enc, sv ? sv : &PL_sv_undef);
372 }
325 373
326 --enc->depth; 374 --enc->depth;
327} 375}
328 376
329static void 377static void
347 while ((he = hv_iternext (hv))) 395 while ((he = hv_iternext (hv)))
348 { 396 {
349 if (HeKLEN (he) == HEf_SVKEY) 397 if (HeKLEN (he) == HEf_SVKEY)
350 encode_sv (enc, HeSVKEY (he)); 398 encode_sv (enc, HeSVKEY (he));
351 else 399 else
352 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 400 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
353 401
354 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 402 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
355 } 403 }
356 404
357 if (mg) 405 if (mg)
433 481
434 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 482 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
435 { 483 {
436 dSP; 484 dSP;
437 485
438 ENTER; SAVETMPS; PUSHMARK (SP); 486 ENTER; SAVETMPS;
487 PUSHMARK (SP);
439 // we re-bless the reference to get overload and other niceties right 488 // we re-bless the reference to get overload and other niceties right
440 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 489 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
441 490
442 PUTBACK; 491 PUTBACK;
443 // G_SCALAR ensures that return value is 1 492 // G_SCALAR ensures that return value is 1
456 } 505 }
457 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 506 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
458 { 507 {
459 dSP; 508 dSP;
460 509
461 ENTER; SAVETMPS; PUSHMARK (SP); 510 ENTER; SAVETMPS;
511 SAVESTACK_POS ();
512 PUSHMARK (SP);
462 EXTEND (SP, 2); 513 EXTEND (SP, 2);
463 // we re-bless the reference to get overload and other niceties right 514 // we re-bless the reference to get overload and other niceties right
464 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 515 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
465 PUSHs (sv_cbor); 516 PUSHs (sv_cbor);
466 517
472 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 523 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
473 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 524 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
474 525
475 encode_tag (enc, CBOR_TAG_PERL_OBJECT); 526 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
476 encode_uint (enc, MAJOR_ARRAY, count + 1); 527 encode_uint (enc, MAJOR_ARRAY, count + 1);
477 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 528 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
478 529
479 while (count) 530 while (count)
480 encode_sv (enc, SP[1 - count--]); 531 encode_sv (enc, SP[1 - count--]);
481 532
482 PUTBACK; 533 PUTBACK;
541 592
542 if (SvPOKp (sv)) 593 if (SvPOKp (sv))
543 { 594 {
544 STRLEN len; 595 STRLEN len;
545 char *str = SvPV (sv, len); 596 char *str = SvPV (sv, len);
546 encode_strref (enc, SvUTF8 (sv), str, len); 597 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
547 } 598 }
548 else if (SvNOKp (sv)) 599 else if (SvNOKp (sv))
549 encode_nv (enc, sv); 600 encode_nv (enc, sv);
550 else if (SvIOKp (sv)) 601 else if (SvIOKp (sv))
551 { 602 {
568} 619}
569 620
570static SV * 621static SV *
571encode_cbor (SV *scalar, CBOR *cbor) 622encode_cbor (SV *scalar, CBOR *cbor)
572{ 623{
573 enc_t enc = { }; 624 enc_t enc = { 0 };
574 625
575 enc.cbor = *cbor; 626 enc.cbor = *cbor;
576 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 627 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
577 enc.cur = SvPVX (enc.sv); 628 enc.cur = SvPVX (enc.sv);
578 enc.end = SvEND (enc.sv); 629 enc.end = SvEND (enc.sv);
614 SV *decode_tagged; 665 SV *decode_tagged;
615} dec_t; 666} dec_t;
616 667
617#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 668#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
618 669
619#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 670#define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR ("unexpected end of CBOR data")
620 671
621#define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) 672#define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
622#define DEC_DEC_DEPTH --dec->depth 673#define DEC_DEC_DEPTH --dec->depth
623 674
624static UV 675static UV
625decode_uint (dec_t *dec) 676decode_uint (dec_t *dec)
626{ 677{
703 av_push (av, decode_sv (dec)); 754 av_push (av, decode_sv (dec));
704 } 755 }
705 } 756 }
706 else 757 else
707 { 758 {
708 int i, len = decode_uint (dec); 759 UV i, len = decode_uint (dec);
709 760
710 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays 761 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
711 av_fill (av, len - 1); 762 av_fill (av, len - 1);
712 763
713 for (i = 0; i < len; ++i) 764 for (i = 0; i < len; ++i)
728{ 779{
729 // for speed reasons, we specialcase single-string 780 // for speed reasons, we specialcase single-string
730 // byte or utf-8 strings as keys, but only when !stringref 781 // byte or utf-8 strings as keys, but only when !stringref
731 782
732 if (ecb_expect_true (!dec->stringref)) 783 if (ecb_expect_true (!dec->stringref))
733 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 784 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
734 { 785 {
735 I32 len = decode_uint (dec); 786 STRLEN len = decode_uint (dec);
736 char *key = (char *)dec->cur; 787 char *key = (char *)dec->cur;
737 788
789 WANT (len);
738 dec->cur += len; 790 dec->cur += len;
739 791
740 if (ecb_expect_false (dec->stringref))
741 av_push (dec->stringref, newSVpvn (key, len));
742
743 hv_store (hv, key, len, decode_sv (dec), 0); 792 hv_store (hv, key, len, decode_sv (dec), 0);
744 793
745 return; 794 return;
746 } 795 }
747 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 796 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
748 { 797 {
749 I32 len = decode_uint (dec); 798 STRLEN len = decode_uint (dec);
750 char *key = (char *)dec->cur; 799 char *key = (char *)dec->cur;
751 800
801 WANT (len);
752 dec->cur += len; 802 dec->cur += len;
753 803
754 if (ecb_expect_false (dec->stringref)) 804 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
755 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 805 if (!is_utf8_string (key, len))
806 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
756 807
757 hv_store (hv, key, -len, decode_sv (dec), 0); 808 hv_store (hv, key, -len, decode_sv (dec), 0);
758 809
759 return; 810 return;
760 } 811 }
762 SV *k = decode_sv (dec); 813 SV *k = decode_sv (dec);
763 SV *v = decode_sv (dec); 814 SV *v = decode_sv (dec);
764 815
765 hv_store_ent (hv, k, v, 0); 816 hv_store_ent (hv, k, v, 0);
766 SvREFCNT_dec (k); 817 SvREFCNT_dec (k);
818
819fail:
820 ;
767} 821}
768 822
769static SV * 823static SV *
770decode_hv (dec_t *dec) 824decode_hv (dec_t *dec)
771{ 825{
790 decode_he (dec, hv); 844 decode_he (dec, hv);
791 } 845 }
792 } 846 }
793 else 847 else
794 { 848 {
795 int pairs = decode_uint (dec); 849 UV pairs = decode_uint (dec);
850
851 WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
796 852
797 while (pairs--) 853 while (pairs--)
798 decode_he (dec, hv); 854 decode_he (dec, hv);
799 } 855 }
800 856
853 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 909 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
854 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 910 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
855 } 911 }
856 912
857 if (utf8) 913 if (utf8)
914 {
915 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
916 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
917 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
918
858 SvUTF8_on (sv); 919 SvUTF8_on (sv);
920 }
859 921
860 return sv; 922 return sv;
861 923
862fail: 924fail:
863 SvREFCNT_dec (sv); 925 SvREFCNT_dec (sv);
882 sv = newRV_noinc (decode_sv (dec)); 944 sv = newRV_noinc (decode_sv (dec));
883 break; 945 break;
884 946
885 case CBOR_TAG_STRINGREF_NAMESPACE: 947 case CBOR_TAG_STRINGREF_NAMESPACE:
886 { 948 {
949 // do nmot use SAVETMPS/FREETMPS, as these will
950 // erase mortalised caches, e.g. "shareable"
887 ENTER; SAVETMPS; 951 ENTER;
888 952
889 SAVESPTR (dec->stringref); 953 SAVESPTR (dec->stringref);
890 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); 954 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
891 955
892 sv = decode_sv (dec); 956 sv = decode_sv (dec);
893 957
894 FREETMPS; LEAVE; 958 LEAVE;
895 } 959 }
896 break; 960 break;
897 961
898 case CBOR_TAG_STRINGREF: 962 case CBOR_TAG_STRINGREF:
899 { 963 {
912 case CBOR_TAG_VALUE_SHAREABLE: 976 case CBOR_TAG_VALUE_SHAREABLE:
913 { 977 {
914 if (ecb_expect_false (!dec->shareable)) 978 if (ecb_expect_false (!dec->shareable))
915 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 979 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
916 980
981 if (dec->cbor.flags & F_ALLOW_CYCLES)
982 {
917 sv = newSV (0); 983 sv = newSV (0);
918 av_push (dec->shareable, SvREFCNT_inc_NN (sv)); 984 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
919 985
920 SV *osv = decode_sv (dec); 986 SV *osv = decode_sv (dec);
921 sv_setsv (sv, osv); 987 sv_setsv (sv, osv);
922 SvREFCNT_dec_NN (osv); 988 SvREFCNT_dec_NN (osv);
989 }
990 else
991 {
992 av_push (dec->shareable, &PL_sv_undef);
993 int idx = AvFILLp (dec->shareable);
994 sv = decode_sv (dec);
995 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
996 }
923 } 997 }
924 break; 998 break;
925 999
926 case CBOR_TAG_VALUE_SHAREDREF: 1000 case CBOR_TAG_VALUE_SHAREDREF:
927 { 1001 {
932 1006
933 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) 1007 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
934 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 1008 ERR ("corrupted CBOR data (sharedref index out of bounds)");
935 1009
936 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 1010 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1011
1012 if (sv == &PL_sv_undef)
1013 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
937 } 1014 }
938 break; 1015 break;
939 1016
940 case CBOR_TAG_PERL_OBJECT: 1017 case CBOR_TAG_PERL_OBJECT:
941 { 1018 {
956 if (!method) 1033 if (!method)
957 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1034 ERR ("cannot decode perl-object (package does not have a THAW method)");
958 1035
959 dSP; 1036 dSP;
960 1037
961 ENTER; SAVETMPS; PUSHMARK (SP); 1038 ENTER; SAVETMPS;
1039 PUSHMARK (SP);
962 EXTEND (SP, len + 1); 1040 EXTEND (SP, len + 1);
963 // we re-bless the reference to get overload and other niceties right 1041 // we re-bless the reference to get overload and other niceties right
964 PUSHs (*av_fetch (av, 0, 1)); 1042 PUSHs (*av_fetch (av, 0, 1));
965 PUSHs (sv_cbor); 1043 PUSHs (sv_cbor);
966 1044
988 } 1066 }
989 break; 1067 break;
990 1068
991 default: 1069 default:
992 { 1070 {
1071 SV *tag_sv = newSVuv (tag);
1072
993 sv = decode_sv (dec); 1073 sv = decode_sv (dec);
994 1074
995 dSP; 1075 dSP;
996 ENTER; SAVETMPS; PUSHMARK (SP); 1076 ENTER; SAVETMPS;
1077 SAVESTACK_POS ();
1078 PUSHMARK (SP);
997 EXTEND (SP, 2); 1079 EXTEND (SP, 2);
998 PUSHs (newSVuv (tag)); 1080 PUSHs (tag_sv);
999 PUSHs (sv); 1081 PUSHs (sv);
1000 1082
1001 PUTBACK; 1083 PUTBACK;
1002 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL); 1084 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1003 SPAGAIN; 1085 SPAGAIN;
1004 1086
1005 if (SvTRUE (ERRSV)) 1087 if (SvTRUE (ERRSV))
1006 { 1088 {
1089 SvREFCNT_dec (tag_sv);
1007 FREETMPS; LEAVE; 1090 FREETMPS; LEAVE;
1008 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); 1091 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1009 } 1092 }
1010 1093
1011 if (count) 1094 if (count)
1012 { 1095 {
1096 SvREFCNT_dec (tag_sv);
1013 SvREFCNT_dec (sv); 1097 SvREFCNT_dec (sv);
1014 sv = SvREFCNT_inc (POPs); 1098 sv = SvREFCNT_inc (POPs);
1015 } 1099 }
1016 else 1100 else
1017 { 1101 {
1018 AV *av = newAV (); 1102 AV *av = newAV ();
1019 av_push (av, newSVuv (tag)); 1103 av_push (av, tag_sv);
1020 av_push (av, sv); 1104 av_push (av, sv);
1021 1105
1022 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 1106 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1023 ? cbor_tagged_stash 1107 ? cbor_tagged_stash
1024 : gv_stashpv ("CBOR::XS::Tagged" , 1); 1108 : gv_stashpv ("CBOR::XS::Tagged" , 1);
1110 1194
1111 return newSVnv (ecb_binary64_to_double (fp)); 1195 return newSVnv (ecb_binary64_to_double (fp));
1112 } 1196 }
1113 1197
1114 // 0..19 unassigned simple 1198 // 0..19 unassigned simple
1115 // 24 reserved + unassigned (reserved values are not encodable) 1199 // 24 reserved + unassigned simple (reserved values are not encodable)
1200 // 28-30 unassigned misc
1201 // 31 break code
1116 default: 1202 default:
1117 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1203 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1118 } 1204 }
1119 1205
1120 break; 1206 break;
1121 } 1207 }
1122 1208
1125} 1211}
1126 1212
1127static SV * 1213static SV *
1128decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1214decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1129{ 1215{
1130 dec_t dec = { }; 1216 dec_t dec = { 0 };
1131 SV *sv; 1217 SV *sv;
1132 STRLEN len; 1218 STRLEN len;
1133 char *data = SvPVbyte (string, len); 1219 char *data = SvPVbyte (string, len);
1134 1220
1135 if (len > cbor->max_size && cbor->max_size) 1221 if (len > cbor->max_size && cbor->max_size)
1149 if (dec.cur != dec.end && !dec.err) 1235 if (dec.cur != dec.end && !dec.err)
1150 dec.err = "garbage after CBOR object"; 1236 dec.err = "garbage after CBOR object";
1151 1237
1152 if (dec.err) 1238 if (dec.err)
1153 { 1239 {
1240 if (dec.shareable)
1241 {
1242 // need to break cyclic links, which would all be in shareable
1243 int i;
1244 SV **svp;
1245
1246 for (i = av_len (dec.shareable) + 1; i--; )
1247 if ((svp = av_fetch (dec.shareable, i, 0)))
1248 sv_setsv (*svp, &PL_sv_undef);
1249 }
1250
1154 SvREFCNT_dec (sv); 1251 SvREFCNT_dec (sv);
1155 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1252 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1156 } 1253 }
1157 1254
1158 sv = sv_2mortal (sv); 1255 sv = sv_2mortal (sv);
1159 1256
1160 return sv; 1257 return sv;
1161} 1258}
1162 1259
1260/////////////////////////////////////////////////////////////////////////////
1261// incremental parser
1262
1263#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1264
1265// returns 0 for notyet, 1 for success or error
1266static int
1267incr_parse (CBOR *self, SV *cborstr)
1268{
1269 STRLEN cur;
1270 SvPV (cborstr, cur);
1271
1272 while (ecb_expect_true (self->incr_need <= cur))
1273 {
1274 // table of integer count bytes
1275 static I8 incr_len[MINOR_MASK + 1] = {
1276 0, 0, 0, 0, 0, 0, 0, 0,
1277 0, 0, 0, 0, 0, 0, 0, 0,
1278 0, 0, 0, 0, 0, 0, 0, 0,
1279 1, 2, 4, 8,-1,-1,-1,-2
1280 };
1281
1282 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1283 U8 m = *p & MINOR_MASK;
1284 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1285 I8 ilen = incr_len[m];
1286
1287 self->incr_need = self->incr_pos + 1;
1288
1289 if (ecb_expect_false (ilen < 0))
1290 {
1291 if (m != MINOR_INDEF)
1292 return 1; // error
1293
1294 if (*p == (MAJOR_MISC | MINOR_INDEF))
1295 {
1296 if (count >= 0)
1297 return 1; // error
1298
1299 count = 1;
1300 }
1301 else
1302 {
1303 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1304 count = -1;
1305 }
1306 }
1307 else
1308 {
1309 self->incr_need += ilen;
1310 if (ecb_expect_false (self->incr_need > cur))
1311 return 0;
1312
1313 int major = *p >> MAJOR_SHIFT;
1314
1315 switch (major)
1316 {
1317 case MAJOR_TAG >> MAJOR_SHIFT:
1318 ++count; // tags merely prefix another value
1319 break;
1320
1321 case MAJOR_BYTES >> MAJOR_SHIFT:
1322 case MAJOR_TEXT >> MAJOR_SHIFT:
1323 case MAJOR_ARRAY >> MAJOR_SHIFT:
1324 case MAJOR_MAP >> MAJOR_SHIFT:
1325 {
1326 UV len;
1327
1328 if (ecb_expect_false (ilen))
1329 {
1330 len = 0;
1331
1332 do {
1333 len = (len << 8) | *++p;
1334 } while (--ilen);
1335 }
1336 else
1337 len = m;
1338
1339 switch (major)
1340 {
1341 case MAJOR_BYTES >> MAJOR_SHIFT:
1342 case MAJOR_TEXT >> MAJOR_SHIFT:
1343 self->incr_need += len;
1344 if (ecb_expect_false (self->incr_need > cur))
1345 return 0;
1346
1347 break;
1348
1349 case MAJOR_MAP >> MAJOR_SHIFT:
1350 len <<= 1;
1351 case MAJOR_ARRAY >> MAJOR_SHIFT:
1352 if (len)
1353 {
1354 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1355 count = len + 1;
1356 }
1357 break;
1358 }
1359 }
1360 }
1361 }
1362
1363 self->incr_pos = self->incr_need;
1364
1365 if (count > 0)
1366 {
1367 while (!--count)
1368 {
1369 if (!AvFILLp (self->incr_count))
1370 return 1; // done
1371
1372 SvREFCNT_dec_NN (av_pop (self->incr_count));
1373 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1374 }
1375
1376 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1377 }
1378 }
1379
1380 return 0;
1381}
1382
1383
1163///////////////////////////////////////////////////////////////////////////// 1384/////////////////////////////////////////////////////////////////////////////
1164// XS interface functions 1385// XS interface functions
1165 1386
1166MODULE = CBOR::XS PACKAGE = CBOR::XS 1387MODULE = CBOR::XS PACKAGE = CBOR::XS
1167 1388
1179 1400
1180 default_filter = newSVpv ("CBOR::XS::default_filter", 0); 1401 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1181 1402
1182 sv_cbor = newSVpv ("CBOR", 0); 1403 sv_cbor = newSVpv ("CBOR", 0);
1183 SvREADONLY_on (sv_cbor); 1404 SvREADONLY_on (sv_cbor);
1405
1406 assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1184} 1407}
1185 1408
1186PROTOTYPES: DISABLE 1409PROTOTYPES: DISABLE
1187 1410
1188void CLONE (...) 1411void CLONE (...)
1207void shrink (CBOR *self, int enable = 1) 1430void shrink (CBOR *self, int enable = 1)
1208 ALIAS: 1431 ALIAS:
1209 shrink = F_SHRINK 1432 shrink = F_SHRINK
1210 allow_unknown = F_ALLOW_UNKNOWN 1433 allow_unknown = F_ALLOW_UNKNOWN
1211 allow_sharing = F_ALLOW_SHARING 1434 allow_sharing = F_ALLOW_SHARING
1435 allow_cycles = F_ALLOW_CYCLES
1212 pack_strings = F_PACK_STRINGS 1436 pack_strings = F_PACK_STRINGS
1437 text_keys = F_TEXT_KEYS
1438 text_strings = F_TEXT_STRINGS
1439 validate_utf8 = F_VALIDATE_UTF8
1213 PPCODE: 1440 PPCODE:
1214{ 1441{
1215 if (enable) 1442 if (enable)
1216 self->flags |= ix; 1443 self->flags |= ix;
1217 else 1444 else
1223void get_shrink (CBOR *self) 1450void get_shrink (CBOR *self)
1224 ALIAS: 1451 ALIAS:
1225 get_shrink = F_SHRINK 1452 get_shrink = F_SHRINK
1226 get_allow_unknown = F_ALLOW_UNKNOWN 1453 get_allow_unknown = F_ALLOW_UNKNOWN
1227 get_allow_sharing = F_ALLOW_SHARING 1454 get_allow_sharing = F_ALLOW_SHARING
1455 get_allow_cycles = F_ALLOW_CYCLES
1228 get_pack_strings = F_PACK_STRINGS 1456 get_pack_strings = F_PACK_STRINGS
1457 get_text_keys = F_TEXT_KEYS
1458 get_text_strings = F_TEXT_STRINGS
1459 get_validate_utf8 = F_VALIDATE_UTF8
1229 PPCODE: 1460 PPCODE:
1230 XPUSHs (boolSV (self->flags & ix)); 1461 XPUSHs (boolSV (self->flags & ix));
1231 1462
1232void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1463void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1233 PPCODE: 1464 PPCODE:
1282 EXTEND (SP, 2); 1513 EXTEND (SP, 2);
1283 PUSHs (sv); 1514 PUSHs (sv);
1284 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1515 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1285} 1516}
1286 1517
1518void incr_parse (CBOR *self, SV *cborstr)
1519 ALIAS:
1520 incr_parse_multiple = 1
1521 PPCODE:
1522{
1523 if (SvUTF8 (cborstr))
1524 sv_utf8_downgrade (cborstr, 0);
1525
1526 if (!self->incr_count)
1527 {
1528 self->incr_count = newAV ();
1529 self->incr_pos = 0;
1530 self->incr_need = 1;
1531
1532 av_push (self->incr_count, newSViv (1));
1533 }
1534
1535 do
1536 {
1537 if (!incr_parse (self, cborstr))
1538 {
1539 if (self->incr_need > self->max_size && self->max_size)
1540 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1541 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1542
1543 break;
1544 }
1545
1546 SV *sv;
1547 char *offset;
1548
1549 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1550 XPUSHs (sv);
1551
1552 sv_chop (cborstr, offset);
1553
1554 av_clear (self->incr_count);
1555 av_push (self->incr_count, newSViv (1));
1556
1557 self->incr_pos = 0;
1558 self->incr_need = self->incr_pos + 1;
1559 }
1560 while (ix);
1561}
1562
1563void incr_reset (CBOR *self)
1564 CODE:
1565{
1566 SvREFCNT_dec (self->incr_count);
1567 self->incr_count = 0;
1568}
1569
1287void DESTROY (CBOR *self) 1570void DESTROY (CBOR *self)
1288 PPCODE: 1571 PPCODE:
1289 cbor_free (self); 1572 cbor_free (self);
1290 1573
1291PROTOTYPES: ENABLE 1574PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines