ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.37 by root, Sat Nov 30 18:13:53 2013 UTC vs.
Revision 1.58 by root, Fri Nov 25 13:27:29 2016 UTC

7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#define ECB_NO_THREADS 1
12#include "ecb.h" 13#include "ecb.h"
13 14
14// compatibility with perl <5.18 15// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 16#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 17# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
99#define F_SHRINK 0x00000001UL 100#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 101#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 102#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL 103#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL 104#define F_PACK_STRINGS 0x00000010UL
105#define F_TEXT_KEYS 0x00000020UL
106#define F_TEXT_STRINGS 0x00000040UL
107#define F_VALIDATE_UTF8 0x00000080UL
104 108
105#define INIT_SIZE 32 // initial scalar size to be allocated 109#define INIT_SIZE 32 // initial scalar size to be allocated
106 110
107#define SB do { 111#define SB do {
108#define SE } while (0) 112#define SE } while (0)
127typedef struct { 131typedef struct {
128 U32 flags; 132 U32 flags;
129 U32 max_depth; 133 U32 max_depth;
130 STRLEN max_size; 134 STRLEN max_size;
131 SV *filter; 135 SV *filter;
136
137 // for the incremental parser
138 STRLEN incr_pos; // the current offset into the text
139 STRLEN incr_need; // minimum bytes needed to decode
140 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
132} CBOR; 141} CBOR;
133 142
134ecb_inline void 143ecb_inline void
135cbor_init (CBOR *cbor) 144cbor_init (CBOR *cbor)
136{ 145{
140 149
141ecb_inline void 150ecb_inline void
142cbor_free (CBOR *cbor) 151cbor_free (CBOR *cbor)
143{ 152{
144 SvREFCNT_dec (cbor->filter); 153 SvREFCNT_dec (cbor->filter);
154 SvREFCNT_dec (cbor->incr_count);
145} 155}
146 156
147///////////////////////////////////////////////////////////////////////////// 157/////////////////////////////////////////////////////////////////////////////
148// utility functions 158// utility functions
149 159
268encode_tag (enc_t *enc, UV tag) 278encode_tag (enc_t *enc, UV tag)
269{ 279{
270 encode_uint (enc, MAJOR_TAG, tag); 280 encode_uint (enc, MAJOR_TAG, tag);
271} 281}
272 282
283// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284ecb_noinline static void
285encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286{
287 STRLEN ulen = len;
288 U8 *p, *pend = (U8 *)str + len;
289
290 for (p = (U8 *)str; p < pend; ++p)
291 ulen += *p >> 7; // count set high bits
292
293 encode_uint (enc, MAJOR_TEXT, ulen);
294
295 need (enc, ulen);
296 for (p = (U8 *)str; p < pend; ++p)
297 if (*p < 0x80)
298 *enc->cur++ = *p;
299 else
300 {
301 *enc->cur++ = 0xc0 + (*p >> 6);
302 *enc->cur++ = 0x80 + (*p & 63);
303 }
304}
305
273ecb_inline void 306ecb_inline void
274encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 307encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
275{ 308{
309 if (ecb_expect_false (upgrade_utf8))
310 if (!utf8)
311 {
312 encode_str_utf8 (enc, utf8, str, len);
313 return;
314 }
315
276 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); 316 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
277 need (enc, len); 317 need (enc, len);
278 memcpy (enc->cur, str, len); 318 memcpy (enc->cur, str, len);
279 enc->cur += len; 319 enc->cur += len;
280} 320}
281 321
282static void 322ecb_inline void
283encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) 323encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
284{ 324{
285 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) 325 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
286 { 326 {
287 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); 327 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
288 328
299 sv_setuv (*svp, enc->stringref_idx); 339 sv_setuv (*svp, enc->stringref_idx);
300 ++enc->stringref_idx; 340 ++enc->stringref_idx;
301 } 341 }
302 } 342 }
303 343
304 encode_str (enc, utf8, str, len); 344 encode_str (enc, upgrade_utf8, utf8, str, len);
305} 345}
306 346
307static void encode_sv (enc_t *enc, SV *sv); 347static void encode_sv (enc_t *enc, SV *sv);
308 348
309static void 349static void
316 356
317 ++enc->depth; 357 ++enc->depth;
318 358
319 encode_uint (enc, MAJOR_ARRAY, len + 1); 359 encode_uint (enc, MAJOR_ARRAY, len + 1);
320 360
361 if (SvMAGICAL (av))
321 for (i = 0; i <= len; ++i) 362 for (i = 0; i <= len; ++i)
322 { 363 {
323 SV **svp = av_fetch (av, i, 0); 364 SV **svp = av_fetch (av, i, 0);
324 encode_sv (enc, svp ? *svp : &PL_sv_undef); 365 encode_sv (enc, svp ? *svp : &PL_sv_undef);
325 } 366 }
367 else
368 for (i = 0; i <= len; ++i)
369 {
370 SV *sv = AvARRAY (av)[i];
371 encode_sv (enc, sv ? sv : &PL_sv_undef);
372 }
326 373
327 --enc->depth; 374 --enc->depth;
328} 375}
329 376
330static void 377static void
348 while ((he = hv_iternext (hv))) 395 while ((he = hv_iternext (hv)))
349 { 396 {
350 if (HeKLEN (he) == HEf_SVKEY) 397 if (HeKLEN (he) == HEf_SVKEY)
351 encode_sv (enc, HeSVKEY (he)); 398 encode_sv (enc, HeSVKEY (he));
352 else 399 else
353 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 400 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
354 401
355 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 402 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
356 } 403 }
357 404
358 if (mg) 405 if (mg)
434 481
435 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 482 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
436 { 483 {
437 dSP; 484 dSP;
438 485
439 ENTER; SAVETMPS; PUSHMARK (SP); 486 ENTER; SAVETMPS;
487 PUSHMARK (SP);
440 // we re-bless the reference to get overload and other niceties right 488 // we re-bless the reference to get overload and other niceties right
441 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 489 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
442 490
443 PUTBACK; 491 PUTBACK;
444 // G_SCALAR ensures that return value is 1 492 // G_SCALAR ensures that return value is 1
457 } 505 }
458 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 506 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
459 { 507 {
460 dSP; 508 dSP;
461 509
462 ENTER; SAVETMPS; PUSHMARK (SP); 510 ENTER; SAVETMPS;
511 SAVESTACK_POS ();
512 PUSHMARK (SP);
463 EXTEND (SP, 2); 513 EXTEND (SP, 2);
464 // we re-bless the reference to get overload and other niceties right 514 // we re-bless the reference to get overload and other niceties right
465 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 515 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
466 PUSHs (sv_cbor); 516 PUSHs (sv_cbor);
467 517
473 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 523 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
474 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 524 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
475 525
476 encode_tag (enc, CBOR_TAG_PERL_OBJECT); 526 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
477 encode_uint (enc, MAJOR_ARRAY, count + 1); 527 encode_uint (enc, MAJOR_ARRAY, count + 1);
478 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 528 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
479 529
480 while (count) 530 while (count)
481 encode_sv (enc, SP[1 - count--]); 531 encode_sv (enc, SP[1 - count--]);
482 532
483 PUTBACK; 533 PUTBACK;
542 592
543 if (SvPOKp (sv)) 593 if (SvPOKp (sv))
544 { 594 {
545 STRLEN len; 595 STRLEN len;
546 char *str = SvPV (sv, len); 596 char *str = SvPV (sv, len);
547 encode_strref (enc, SvUTF8 (sv), str, len); 597 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
548 } 598 }
549 else if (SvNOKp (sv)) 599 else if (SvNOKp (sv))
550 encode_nv (enc, sv); 600 encode_nv (enc, sv);
551 else if (SvIOKp (sv)) 601 else if (SvIOKp (sv))
552 { 602 {
569} 619}
570 620
571static SV * 621static SV *
572encode_cbor (SV *scalar, CBOR *cbor) 622encode_cbor (SV *scalar, CBOR *cbor)
573{ 623{
574 enc_t enc = { }; 624 enc_t enc = { 0 };
575 625
576 enc.cbor = *cbor; 626 enc.cbor = *cbor;
577 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 627 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
578 enc.cur = SvPVX (enc.sv); 628 enc.cur = SvPVX (enc.sv);
579 enc.end = SvEND (enc.sv); 629 enc.end = SvEND (enc.sv);
615 SV *decode_tagged; 665 SV *decode_tagged;
616} dec_t; 666} dec_t;
617 667
618#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 668#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
619 669
620#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 670#define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR ("unexpected end of CBOR data")
621 671
622#define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) 672#define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
623#define DEC_DEC_DEPTH --dec->depth 673#define DEC_DEC_DEPTH --dec->depth
624 674
625static UV 675static UV
626decode_uint (dec_t *dec) 676decode_uint (dec_t *dec)
627{ 677{
704 av_push (av, decode_sv (dec)); 754 av_push (av, decode_sv (dec));
705 } 755 }
706 } 756 }
707 else 757 else
708 { 758 {
709 int i, len = decode_uint (dec); 759 UV i, len = decode_uint (dec);
710 760
711 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays 761 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
712 av_fill (av, len - 1); 762 av_fill (av, len - 1);
713 763
714 for (i = 0; i < len; ++i) 764 for (i = 0; i < len; ++i)
729{ 779{
730 // for speed reasons, we specialcase single-string 780 // for speed reasons, we specialcase single-string
731 // byte or utf-8 strings as keys, but only when !stringref 781 // byte or utf-8 strings as keys, but only when !stringref
732 782
733 if (ecb_expect_true (!dec->stringref)) 783 if (ecb_expect_true (!dec->stringref))
734 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 784 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
735 { 785 {
736 I32 len = decode_uint (dec); 786 STRLEN len = decode_uint (dec);
737 char *key = (char *)dec->cur; 787 char *key = (char *)dec->cur;
738 788
789 WANT (len);
739 dec->cur += len; 790 dec->cur += len;
740 791
741 if (ecb_expect_false (dec->stringref))
742 av_push (dec->stringref, newSVpvn (key, len));
743
744 hv_store (hv, key, len, decode_sv (dec), 0); 792 hv_store (hv, key, len, decode_sv (dec), 0);
745 793
746 return; 794 return;
747 } 795 }
748 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 796 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
749 { 797 {
750 I32 len = decode_uint (dec); 798 STRLEN len = decode_uint (dec);
751 char *key = (char *)dec->cur; 799 char *key = (char *)dec->cur;
752 800
801 WANT (len);
753 dec->cur += len; 802 dec->cur += len;
754 803
755 if (ecb_expect_false (dec->stringref)) 804 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
756 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 805 if (!is_utf8_string (key, len))
806 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
757 807
758 hv_store (hv, key, -len, decode_sv (dec), 0); 808 hv_store (hv, key, -len, decode_sv (dec), 0);
759 809
760 return; 810 return;
761 } 811 }
763 SV *k = decode_sv (dec); 813 SV *k = decode_sv (dec);
764 SV *v = decode_sv (dec); 814 SV *v = decode_sv (dec);
765 815
766 hv_store_ent (hv, k, v, 0); 816 hv_store_ent (hv, k, v, 0);
767 SvREFCNT_dec (k); 817 SvREFCNT_dec (k);
818
819fail:
820 ;
768} 821}
769 822
770static SV * 823static SV *
771decode_hv (dec_t *dec) 824decode_hv (dec_t *dec)
772{ 825{
791 decode_he (dec, hv); 844 decode_he (dec, hv);
792 } 845 }
793 } 846 }
794 else 847 else
795 { 848 {
796 int pairs = decode_uint (dec); 849 UV pairs = decode_uint (dec);
850
851 WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
797 852
798 while (pairs--) 853 while (pairs--)
799 decode_he (dec, hv); 854 decode_he (dec, hv);
800 } 855 }
801 856
854 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 909 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
855 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 910 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
856 } 911 }
857 912
858 if (utf8) 913 if (utf8)
914 {
915 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
916 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
917 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
918
859 SvUTF8_on (sv); 919 SvUTF8_on (sv);
920 }
860 921
861 return sv; 922 return sv;
862 923
863fail: 924fail:
864 SvREFCNT_dec (sv); 925 SvREFCNT_dec (sv);
883 sv = newRV_noinc (decode_sv (dec)); 944 sv = newRV_noinc (decode_sv (dec));
884 break; 945 break;
885 946
886 case CBOR_TAG_STRINGREF_NAMESPACE: 947 case CBOR_TAG_STRINGREF_NAMESPACE:
887 { 948 {
949 // do nmot use SAVETMPS/FREETMPS, as these will
950 // erase mortalised caches, e.g. "shareable"
888 ENTER; SAVETMPS; 951 ENTER;
889 952
890 SAVESPTR (dec->stringref); 953 SAVESPTR (dec->stringref);
891 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); 954 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
892 955
893 sv = decode_sv (dec); 956 sv = decode_sv (dec);
894 957
895 FREETMPS; LEAVE; 958 LEAVE;
896 } 959 }
897 break; 960 break;
898 961
899 case CBOR_TAG_STRINGREF: 962 case CBOR_TAG_STRINGREF:
900 { 963 {
970 if (!method) 1033 if (!method)
971 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1034 ERR ("cannot decode perl-object (package does not have a THAW method)");
972 1035
973 dSP; 1036 dSP;
974 1037
975 ENTER; SAVETMPS; PUSHMARK (SP); 1038 ENTER; SAVETMPS;
1039 PUSHMARK (SP);
976 EXTEND (SP, len + 1); 1040 EXTEND (SP, len + 1);
977 // we re-bless the reference to get overload and other niceties right 1041 // we re-bless the reference to get overload and other niceties right
978 PUSHs (*av_fetch (av, 0, 1)); 1042 PUSHs (*av_fetch (av, 0, 1));
979 PUSHs (sv_cbor); 1043 PUSHs (sv_cbor);
980 1044
1002 } 1066 }
1003 break; 1067 break;
1004 1068
1005 default: 1069 default:
1006 { 1070 {
1071 SV *tag_sv = newSVuv (tag);
1072
1007 sv = decode_sv (dec); 1073 sv = decode_sv (dec);
1008 1074
1009 dSP; 1075 dSP;
1010 ENTER; SAVETMPS; PUSHMARK (SP); 1076 ENTER; SAVETMPS;
1077 SAVESTACK_POS ();
1078 PUSHMARK (SP);
1011 EXTEND (SP, 2); 1079 EXTEND (SP, 2);
1012 PUSHs (newSVuv (tag)); 1080 PUSHs (tag_sv);
1013 PUSHs (sv); 1081 PUSHs (sv);
1014 1082
1015 PUTBACK; 1083 PUTBACK;
1016 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL); 1084 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1017 SPAGAIN; 1085 SPAGAIN;
1018 1086
1019 if (SvTRUE (ERRSV)) 1087 if (SvTRUE (ERRSV))
1020 { 1088 {
1089 SvREFCNT_dec (tag_sv);
1021 FREETMPS; LEAVE; 1090 FREETMPS; LEAVE;
1022 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); 1091 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1023 } 1092 }
1024 1093
1025 if (count) 1094 if (count)
1026 { 1095 {
1096 SvREFCNT_dec (tag_sv);
1027 SvREFCNT_dec (sv); 1097 SvREFCNT_dec (sv);
1028 sv = SvREFCNT_inc (POPs); 1098 sv = SvREFCNT_inc (POPs);
1029 } 1099 }
1030 else 1100 else
1031 { 1101 {
1032 AV *av = newAV (); 1102 AV *av = newAV ();
1033 av_push (av, newSVuv (tag)); 1103 av_push (av, tag_sv);
1034 av_push (av, sv); 1104 av_push (av, sv);
1035 1105
1036 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 1106 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1037 ? cbor_tagged_stash 1107 ? cbor_tagged_stash
1038 : gv_stashpv ("CBOR::XS::Tagged" , 1); 1108 : gv_stashpv ("CBOR::XS::Tagged" , 1);
1124 1194
1125 return newSVnv (ecb_binary64_to_double (fp)); 1195 return newSVnv (ecb_binary64_to_double (fp));
1126 } 1196 }
1127 1197
1128 // 0..19 unassigned simple 1198 // 0..19 unassigned simple
1129 // 24 reserved + unassigned (reserved values are not encodable) 1199 // 24 reserved + unassigned simple (reserved values are not encodable)
1200 // 28-30 unassigned misc
1201 // 31 break code
1130 default: 1202 default:
1131 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1203 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1132 } 1204 }
1133 1205
1134 break; 1206 break;
1135 } 1207 }
1136 1208
1139} 1211}
1140 1212
1141static SV * 1213static SV *
1142decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1214decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1143{ 1215{
1144 dec_t dec = { }; 1216 dec_t dec = { 0 };
1145 SV *sv; 1217 SV *sv;
1146 STRLEN len; 1218 STRLEN len;
1147 char *data = SvPVbyte (string, len); 1219 char *data = SvPVbyte (string, len);
1148 1220
1149 if (len > cbor->max_size && cbor->max_size) 1221 if (len > cbor->max_size && cbor->max_size)
1163 if (dec.cur != dec.end && !dec.err) 1235 if (dec.cur != dec.end && !dec.err)
1164 dec.err = "garbage after CBOR object"; 1236 dec.err = "garbage after CBOR object";
1165 1237
1166 if (dec.err) 1238 if (dec.err)
1167 { 1239 {
1240 if (dec.shareable)
1241 {
1242 // need to break cyclic links, which would all be in shareable
1243 int i;
1244 SV **svp;
1245
1246 for (i = av_len (dec.shareable) + 1; i--; )
1247 if ((svp = av_fetch (dec.shareable, i, 0)))
1248 sv_setsv (*svp, &PL_sv_undef);
1249 }
1250
1168 SvREFCNT_dec (sv); 1251 SvREFCNT_dec (sv);
1169 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1252 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1170 } 1253 }
1171 1254
1172 sv = sv_2mortal (sv); 1255 sv = sv_2mortal (sv);
1173 1256
1174 return sv; 1257 return sv;
1175} 1258}
1176 1259
1260/////////////////////////////////////////////////////////////////////////////
1261// incremental parser
1262
1263#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1264
1265// returns 0 for notyet, 1 for success or error
1266static int
1267incr_parse (CBOR *self, SV *cborstr)
1268{
1269 STRLEN cur;
1270 SvPV (cborstr, cur);
1271
1272 while (ecb_expect_true (self->incr_need <= cur))
1273 {
1274 // table of integer count bytes
1275 static I8 incr_len[MINOR_MASK + 1] = {
1276 0, 0, 0, 0, 0, 0, 0, 0,
1277 0, 0, 0, 0, 0, 0, 0, 0,
1278 0, 0, 0, 0, 0, 0, 0, 0,
1279 1, 2, 4, 8,-1,-1,-1,-2
1280 };
1281
1282 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1283 U8 m = *p & MINOR_MASK;
1284 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1285 I8 ilen = incr_len[m];
1286
1287 self->incr_need = self->incr_pos + 1;
1288
1289 if (ecb_expect_false (ilen < 0))
1290 {
1291 if (m != MINOR_INDEF)
1292 return 1; // error
1293
1294 if (*p == (MAJOR_MISC | MINOR_INDEF))
1295 {
1296 if (count >= 0)
1297 return 1; // error
1298
1299 count = 1;
1300 }
1301 else
1302 {
1303 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1304 count = -1;
1305 }
1306 }
1307 else
1308 {
1309 self->incr_need += ilen;
1310 if (ecb_expect_false (self->incr_need > cur))
1311 return 0;
1312
1313 int major = *p >> MAJOR_SHIFT;
1314
1315 switch (major)
1316 {
1317 case MAJOR_TAG >> MAJOR_SHIFT:
1318 ++count; // tags merely prefix another value
1319 break;
1320
1321 case MAJOR_BYTES >> MAJOR_SHIFT:
1322 case MAJOR_TEXT >> MAJOR_SHIFT:
1323 case MAJOR_ARRAY >> MAJOR_SHIFT:
1324 case MAJOR_MAP >> MAJOR_SHIFT:
1325 {
1326 UV len;
1327
1328 if (ecb_expect_false (ilen))
1329 {
1330 len = 0;
1331
1332 do {
1333 len = (len << 8) | *++p;
1334 } while (--ilen);
1335 }
1336 else
1337 len = m;
1338
1339 switch (major)
1340 {
1341 case MAJOR_BYTES >> MAJOR_SHIFT:
1342 case MAJOR_TEXT >> MAJOR_SHIFT:
1343 self->incr_need += len;
1344 if (ecb_expect_false (self->incr_need > cur))
1345 return 0;
1346
1347 break;
1348
1349 case MAJOR_MAP >> MAJOR_SHIFT:
1350 len <<= 1;
1351 case MAJOR_ARRAY >> MAJOR_SHIFT:
1352 if (len)
1353 {
1354 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1355 count = len + 1;
1356 }
1357 break;
1358 }
1359 }
1360 }
1361 }
1362
1363 self->incr_pos = self->incr_need;
1364
1365 if (count > 0)
1366 {
1367 while (!--count)
1368 {
1369 if (!AvFILLp (self->incr_count))
1370 return 1; // done
1371
1372 SvREFCNT_dec_NN (av_pop (self->incr_count));
1373 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1374 }
1375
1376 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1377 }
1378 }
1379
1380 return 0;
1381}
1382
1383
1177///////////////////////////////////////////////////////////////////////////// 1384/////////////////////////////////////////////////////////////////////////////
1178// XS interface functions 1385// XS interface functions
1179 1386
1180MODULE = CBOR::XS PACKAGE = CBOR::XS 1387MODULE = CBOR::XS PACKAGE = CBOR::XS
1181 1388
1193 1400
1194 default_filter = newSVpv ("CBOR::XS::default_filter", 0); 1401 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1195 1402
1196 sv_cbor = newSVpv ("CBOR", 0); 1403 sv_cbor = newSVpv ("CBOR", 0);
1197 SvREADONLY_on (sv_cbor); 1404 SvREADONLY_on (sv_cbor);
1405
1406 assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1198} 1407}
1199 1408
1200PROTOTYPES: DISABLE 1409PROTOTYPES: DISABLE
1201 1410
1202void CLONE (...) 1411void CLONE (...)
1223 shrink = F_SHRINK 1432 shrink = F_SHRINK
1224 allow_unknown = F_ALLOW_UNKNOWN 1433 allow_unknown = F_ALLOW_UNKNOWN
1225 allow_sharing = F_ALLOW_SHARING 1434 allow_sharing = F_ALLOW_SHARING
1226 allow_cycles = F_ALLOW_CYCLES 1435 allow_cycles = F_ALLOW_CYCLES
1227 pack_strings = F_PACK_STRINGS 1436 pack_strings = F_PACK_STRINGS
1437 text_keys = F_TEXT_KEYS
1438 text_strings = F_TEXT_STRINGS
1439 validate_utf8 = F_VALIDATE_UTF8
1228 PPCODE: 1440 PPCODE:
1229{ 1441{
1230 if (enable) 1442 if (enable)
1231 self->flags |= ix; 1443 self->flags |= ix;
1232 else 1444 else
1240 get_shrink = F_SHRINK 1452 get_shrink = F_SHRINK
1241 get_allow_unknown = F_ALLOW_UNKNOWN 1453 get_allow_unknown = F_ALLOW_UNKNOWN
1242 get_allow_sharing = F_ALLOW_SHARING 1454 get_allow_sharing = F_ALLOW_SHARING
1243 get_allow_cycles = F_ALLOW_CYCLES 1455 get_allow_cycles = F_ALLOW_CYCLES
1244 get_pack_strings = F_PACK_STRINGS 1456 get_pack_strings = F_PACK_STRINGS
1457 get_text_keys = F_TEXT_KEYS
1458 get_text_strings = F_TEXT_STRINGS
1459 get_validate_utf8 = F_VALIDATE_UTF8
1245 PPCODE: 1460 PPCODE:
1246 XPUSHs (boolSV (self->flags & ix)); 1461 XPUSHs (boolSV (self->flags & ix));
1247 1462
1248void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1463void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1249 PPCODE: 1464 PPCODE:
1298 EXTEND (SP, 2); 1513 EXTEND (SP, 2);
1299 PUSHs (sv); 1514 PUSHs (sv);
1300 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1515 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1301} 1516}
1302 1517
1518void incr_parse (CBOR *self, SV *cborstr)
1519 ALIAS:
1520 incr_parse_multiple = 1
1521 PPCODE:
1522{
1523 if (SvUTF8 (cborstr))
1524 sv_utf8_downgrade (cborstr, 0);
1525
1526 if (!self->incr_count)
1527 {
1528 self->incr_count = newAV ();
1529 self->incr_pos = 0;
1530 self->incr_need = 1;
1531
1532 av_push (self->incr_count, newSViv (1));
1533 }
1534
1535 do
1536 {
1537 if (!incr_parse (self, cborstr))
1538 {
1539 if (self->incr_need > self->max_size && self->max_size)
1540 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1541 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1542
1543 break;
1544 }
1545
1546 SV *sv;
1547 char *offset;
1548
1549 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1550 XPUSHs (sv);
1551
1552 sv_chop (cborstr, offset);
1553
1554 av_clear (self->incr_count);
1555 av_push (self->incr_count, newSViv (1));
1556
1557 self->incr_pos = 0;
1558 self->incr_need = self->incr_pos + 1;
1559 }
1560 while (ix);
1561}
1562
1563void incr_reset (CBOR *self)
1564 CODE:
1565{
1566 SvREFCNT_dec (self->incr_count);
1567 self->incr_count = 0;
1568}
1569
1303void DESTROY (CBOR *self) 1570void DESTROY (CBOR *self)
1304 PPCODE: 1571 PPCODE:
1305 cbor_free (self); 1572 cbor_free (self);
1306 1573
1307PROTOTYPES: ENABLE 1574PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines