… | |
… | |
100 | #define F_SHRINK 0x00000001UL |
100 | #define F_SHRINK 0x00000001UL |
101 | #define F_ALLOW_UNKNOWN 0x00000002UL |
101 | #define F_ALLOW_UNKNOWN 0x00000002UL |
102 | #define F_ALLOW_SHARING 0x00000004UL |
102 | #define F_ALLOW_SHARING 0x00000004UL |
103 | #define F_ALLOW_CYCLES 0x00000008UL |
103 | #define F_ALLOW_CYCLES 0x00000008UL |
104 | #define F_PACK_STRINGS 0x00000010UL |
104 | #define F_PACK_STRINGS 0x00000010UL |
|
|
105 | #define F_TEXT_KEYS 0x00000020UL |
105 | #define F_UTF8_STRINGS 0x00000020UL |
106 | #define F_TEXT_STRINGS 0x00000040UL |
106 | #define F_VALIDATE_UTF8 0x00000040UL |
107 | #define F_VALIDATE_UTF8 0x00000080UL |
107 | |
108 | |
108 | #define INIT_SIZE 32 // initial scalar size to be allocated |
109 | #define INIT_SIZE 32 // initial scalar size to be allocated |
109 | |
110 | |
110 | #define SB do { |
111 | #define SB do { |
111 | #define SE } while (0) |
112 | #define SE } while (0) |
… | |
… | |
277 | encode_tag (enc_t *enc, UV tag) |
278 | encode_tag (enc_t *enc, UV tag) |
278 | { |
279 | { |
279 | encode_uint (enc, MAJOR_TAG, tag); |
280 | encode_uint (enc, MAJOR_TAG, tag); |
280 | } |
281 | } |
281 | |
282 | |
|
|
283 | // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded |
|
|
284 | ecb_noinline static void |
|
|
285 | encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len) |
|
|
286 | { |
|
|
287 | STRLEN ulen = len; |
|
|
288 | U8 *p, *pend = (U8 *)str + len; |
|
|
289 | |
|
|
290 | for (p = (U8 *)str; p < pend; ++p) |
|
|
291 | ulen += *p >> 7; // count set high bits |
|
|
292 | |
|
|
293 | encode_uint (enc, MAJOR_TEXT, ulen); |
|
|
294 | |
|
|
295 | need (enc, ulen); |
|
|
296 | for (p = (U8 *)str; p < pend; ++p) |
|
|
297 | if (*p < 0x80) |
|
|
298 | *enc->cur++ = *p; |
|
|
299 | else |
|
|
300 | { |
|
|
301 | *enc->cur++ = 0xc0 + (*p >> 6); |
|
|
302 | *enc->cur++ = 0x80 + (*p & 63); |
|
|
303 | } |
|
|
304 | } |
|
|
305 | |
282 | ecb_inline void |
306 | ecb_inline void |
283 | encode_str (enc_t *enc, int utf8, char *str, STRLEN len) |
307 | encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) |
284 | { |
308 | { |
285 | if (ecb_expect_false (enc->cbor.flags & F_UTF8_STRINGS)) |
309 | if (ecb_expect_false (upgrade_utf8)) |
286 | if (!utf8) |
310 | if (!utf8) |
287 | { |
311 | { |
288 | // exceptional path for bytze strings that need to be utf8-encoded |
312 | encode_str_utf8 (enc, utf8, str, len); |
289 | STRLEN ulen = len; |
|
|
290 | U8 *p, *pend = (U8 *)str + len; |
|
|
291 | |
|
|
292 | for (p = (U8 *)str; p < pend; ++p) |
|
|
293 | ulen += *p >> 7; // count set high bits |
|
|
294 | |
|
|
295 | encode_uint (enc, MAJOR_TEXT, ulen); |
|
|
296 | |
|
|
297 | need (enc, ulen); |
|
|
298 | for (p = (U8 *)str; p < pend; ++p) |
|
|
299 | if (*p < 0x80) |
|
|
300 | *enc->cur++ = *p; |
|
|
301 | else |
|
|
302 | { |
|
|
303 | *enc->cur++ = 0xc0 + (*p >> 6); |
|
|
304 | *enc->cur++ = 0x80 + (*p & 63); |
|
|
305 | } |
|
|
306 | |
|
|
307 | return; |
313 | return; |
308 | } |
314 | } |
309 | |
315 | |
310 | encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); |
316 | encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); |
311 | need (enc, len); |
317 | need (enc, len); |
312 | memcpy (enc->cur, str, len); |
318 | memcpy (enc->cur, str, len); |
313 | enc->cur += len; |
319 | enc->cur += len; |
314 | } |
320 | } |
315 | |
321 | |
316 | static void |
322 | ecb_inline void |
317 | encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) |
323 | encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) |
318 | { |
324 | { |
319 | if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) |
325 | if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) |
320 | { |
326 | { |
321 | SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); |
327 | SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); |
322 | |
328 | |
… | |
… | |
333 | sv_setuv (*svp, enc->stringref_idx); |
339 | sv_setuv (*svp, enc->stringref_idx); |
334 | ++enc->stringref_idx; |
340 | ++enc->stringref_idx; |
335 | } |
341 | } |
336 | } |
342 | } |
337 | |
343 | |
338 | encode_str (enc, utf8, str, len); |
344 | encode_str (enc, upgrade_utf8, utf8, str, len); |
339 | } |
345 | } |
340 | |
346 | |
341 | static void encode_sv (enc_t *enc, SV *sv); |
347 | static void encode_sv (enc_t *enc, SV *sv); |
342 | |
348 | |
343 | static void |
349 | static void |
… | |
… | |
389 | while ((he = hv_iternext (hv))) |
395 | while ((he = hv_iternext (hv))) |
390 | { |
396 | { |
391 | if (HeKLEN (he) == HEf_SVKEY) |
397 | if (HeKLEN (he) == HEf_SVKEY) |
392 | encode_sv (enc, HeSVKEY (he)); |
398 | encode_sv (enc, HeSVKEY (he)); |
393 | else |
399 | else |
394 | encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); |
400 | encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); |
395 | |
401 | |
396 | encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); |
402 | encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); |
397 | } |
403 | } |
398 | |
404 | |
399 | if (mg) |
405 | if (mg) |
… | |
… | |
517 | if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) |
523 | if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) |
518 | croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); |
524 | croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); |
519 | |
525 | |
520 | encode_tag (enc, CBOR_TAG_PERL_OBJECT); |
526 | encode_tag (enc, CBOR_TAG_PERL_OBJECT); |
521 | encode_uint (enc, MAJOR_ARRAY, count + 1); |
527 | encode_uint (enc, MAJOR_ARRAY, count + 1); |
522 | encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); |
528 | encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); |
523 | |
529 | |
524 | while (count) |
530 | while (count) |
525 | encode_sv (enc, SP[1 - count--]); |
531 | encode_sv (enc, SP[1 - count--]); |
526 | |
532 | |
527 | PUTBACK; |
533 | PUTBACK; |
… | |
… | |
586 | |
592 | |
587 | if (SvPOKp (sv)) |
593 | if (SvPOKp (sv)) |
588 | { |
594 | { |
589 | STRLEN len; |
595 | STRLEN len; |
590 | char *str = SvPV (sv, len); |
596 | char *str = SvPV (sv, len); |
591 | encode_strref (enc, SvUTF8 (sv), str, len); |
597 | encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); |
592 | } |
598 | } |
593 | else if (SvNOKp (sv)) |
599 | else if (SvNOKp (sv)) |
594 | encode_nv (enc, sv); |
600 | encode_nv (enc, sv); |
595 | else if (SvIOKp (sv)) |
601 | else if (SvIOKp (sv)) |
596 | { |
602 | { |
… | |
… | |
661 | |
667 | |
662 | #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE |
668 | #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE |
663 | |
669 | |
664 | #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") |
670 | #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") |
665 | |
671 | |
666 | #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) |
672 | #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED) |
667 | #define DEC_DEC_DEPTH --dec->depth |
673 | #define DEC_DEC_DEPTH --dec->depth |
668 | |
674 | |
669 | static UV |
675 | static UV |
670 | decode_uint (dec_t *dec) |
676 | decode_uint (dec_t *dec) |
671 | { |
677 | { |
… | |
… | |
775 | // byte or utf-8 strings as keys, but only when !stringref |
781 | // byte or utf-8 strings as keys, but only when !stringref |
776 | |
782 | |
777 | if (ecb_expect_true (!dec->stringref)) |
783 | if (ecb_expect_true (!dec->stringref)) |
778 | if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) |
784 | if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) |
779 | { |
785 | { |
780 | I32 len = decode_uint (dec); |
786 | STRLEN len = decode_uint (dec); |
781 | char *key = (char *)dec->cur; |
787 | char *key = (char *)dec->cur; |
782 | |
788 | |
783 | WANT (len); |
789 | WANT (len); |
784 | dec->cur += len; |
790 | dec->cur += len; |
785 | |
791 | |
… | |
… | |
787 | |
793 | |
788 | return; |
794 | return; |
789 | } |
795 | } |
790 | else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) |
796 | else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) |
791 | { |
797 | { |
792 | I32 len = decode_uint (dec); |
798 | STRLEN len = decode_uint (dec); |
793 | char *key = (char *)dec->cur; |
799 | char *key = (char *)dec->cur; |
794 | |
800 | |
795 | WANT (len); |
801 | WANT (len); |
796 | dec->cur += len; |
802 | dec->cur += len; |
797 | |
803 | |
… | |
… | |
936 | sv = newRV_noinc (decode_sv (dec)); |
942 | sv = newRV_noinc (decode_sv (dec)); |
937 | break; |
943 | break; |
938 | |
944 | |
939 | case CBOR_TAG_STRINGREF_NAMESPACE: |
945 | case CBOR_TAG_STRINGREF_NAMESPACE: |
940 | { |
946 | { |
|
|
947 | // do nmot use SAVETMPS/FREETMPS, as these will |
|
|
948 | // erase mortalised caches, e.g. "shareable" |
941 | ENTER; SAVETMPS; |
949 | ENTER; |
942 | |
950 | |
943 | SAVESPTR (dec->stringref); |
951 | SAVESPTR (dec->stringref); |
944 | dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); |
952 | dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); |
945 | |
953 | |
946 | sv = decode_sv (dec); |
954 | sv = decode_sv (dec); |
947 | |
955 | |
948 | FREETMPS; LEAVE; |
956 | LEAVE; |
949 | } |
957 | } |
950 | break; |
958 | break; |
951 | |
959 | |
952 | case CBOR_TAG_STRINGREF: |
960 | case CBOR_TAG_STRINGREF: |
953 | { |
961 | { |
… | |
… | |
1386 | |
1394 | |
1387 | default_filter = newSVpv ("CBOR::XS::default_filter", 0); |
1395 | default_filter = newSVpv ("CBOR::XS::default_filter", 0); |
1388 | |
1396 | |
1389 | sv_cbor = newSVpv ("CBOR", 0); |
1397 | sv_cbor = newSVpv ("CBOR", 0); |
1390 | SvREADONLY_on (sv_cbor); |
1398 | SvREADONLY_on (sv_cbor); |
|
|
1399 | |
|
|
1400 | assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1)); |
1391 | } |
1401 | } |
1392 | |
1402 | |
1393 | PROTOTYPES: DISABLE |
1403 | PROTOTYPES: DISABLE |
1394 | |
1404 | |
1395 | void CLONE (...) |
1405 | void CLONE (...) |
… | |
… | |
1416 | shrink = F_SHRINK |
1426 | shrink = F_SHRINK |
1417 | allow_unknown = F_ALLOW_UNKNOWN |
1427 | allow_unknown = F_ALLOW_UNKNOWN |
1418 | allow_sharing = F_ALLOW_SHARING |
1428 | allow_sharing = F_ALLOW_SHARING |
1419 | allow_cycles = F_ALLOW_CYCLES |
1429 | allow_cycles = F_ALLOW_CYCLES |
1420 | pack_strings = F_PACK_STRINGS |
1430 | pack_strings = F_PACK_STRINGS |
|
|
1431 | text_keys = F_TEXT_KEYS |
1421 | utf8_strings = F_UTF8_STRINGS |
1432 | text_strings = F_TEXT_STRINGS |
1422 | validate_utf8 = F_VALIDATE_UTF8 |
1433 | validate_utf8 = F_VALIDATE_UTF8 |
1423 | PPCODE: |
1434 | PPCODE: |
1424 | { |
1435 | { |
1425 | if (enable) |
1436 | if (enable) |
1426 | self->flags |= ix; |
1437 | self->flags |= ix; |
… | |
… | |
1435 | get_shrink = F_SHRINK |
1446 | get_shrink = F_SHRINK |
1436 | get_allow_unknown = F_ALLOW_UNKNOWN |
1447 | get_allow_unknown = F_ALLOW_UNKNOWN |
1437 | get_allow_sharing = F_ALLOW_SHARING |
1448 | get_allow_sharing = F_ALLOW_SHARING |
1438 | get_allow_cycles = F_ALLOW_CYCLES |
1449 | get_allow_cycles = F_ALLOW_CYCLES |
1439 | get_pack_strings = F_PACK_STRINGS |
1450 | get_pack_strings = F_PACK_STRINGS |
|
|
1451 | get_text_keys = F_TEXT_KEYS |
|
|
1452 | get_text_strings = F_TEXT_STRINGS |
1440 | get_validate_utf8 = F_VALIDATE_UTF8 |
1453 | get_validate_utf8 = F_VALIDATE_UTF8 |
1441 | PPCODE: |
1454 | PPCODE: |
1442 | XPUSHs (boolSV (self->flags & ix)); |
1455 | XPUSHs (boolSV (self->flags & ix)); |
1443 | |
1456 | |
1444 | void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) |
1457 | void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) |