… | |
… | |
28 | #endif |
28 | #endif |
29 | #ifndef SvREFCNT_dec_NN |
29 | #ifndef SvREFCNT_dec_NN |
30 | # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) |
30 | # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) |
31 | #endif |
31 | #endif |
32 | |
32 | |
|
|
33 | // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0 |
|
|
34 | #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len))) |
|
|
35 | |
33 | // known major and minor types |
36 | // known major and minor types |
34 | enum cbor_type |
37 | enum cbor_type |
35 | { |
38 | { |
36 | MAJOR_SHIFT = 5, |
39 | MAJOR_SHIFT = 5, |
37 | MINOR_MASK = 0x1f, |
40 | MINOR_MASK = 0x1f, |
… | |
… | |
97 | CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 |
100 | CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 |
98 | CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 |
101 | CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 |
99 | CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 |
102 | CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 |
100 | |
103 | |
101 | CBOR_TAG_MAGIC = 55799, // self-describe cbor |
104 | CBOR_TAG_MAGIC = 55799, // self-describe cbor |
|
|
105 | }; |
|
|
106 | |
|
|
107 | // known forced types, also hardcoded in CBOR.pm |
|
|
108 | enum |
|
|
109 | { |
|
|
110 | AS_CBOR = 0, |
|
|
111 | AS_INT = 1, |
|
|
112 | AS_BYTES = 2, |
|
|
113 | AS_TEXT = 3, |
|
|
114 | AS_FLOAT16 = 4, |
|
|
115 | AS_FLOAT32 = 5, |
|
|
116 | AS_FLOAT64 = 6, |
|
|
117 | AS_MAP = 7, |
|
|
118 | // possibly future enhancements: (generic) float, (generic) string |
102 | }; |
119 | }; |
103 | |
120 | |
104 | #define F_SHRINK 0x00000001UL |
121 | #define F_SHRINK 0x00000001UL |
105 | #define F_ALLOW_UNKNOWN 0x00000002UL |
122 | #define F_ALLOW_UNKNOWN 0x00000002UL |
106 | #define F_ALLOW_SHARING 0x00000004UL |
123 | #define F_ALLOW_SHARING 0x00000004UL |
… | |
… | |
187 | #endif |
204 | #endif |
188 | } |
205 | } |
189 | } |
206 | } |
190 | |
207 | |
191 | // minimum length of a string to be registered for stringref |
208 | // minimum length of a string to be registered for stringref |
192 | ecb_inline int |
209 | ecb_inline STRLEN |
193 | minimum_string_length (UV idx) |
210 | minimum_string_length (UV idx) |
194 | { |
211 | { |
195 | return idx <= 23 ? 3 |
212 | return idx <= 23 ? 3 |
196 | : idx <= 0xffU ? 4 |
213 | : idx <= 0xffU ? 4 |
197 | : idx <= 0xffffU ? 5 |
214 | : idx <= 0xffffU ? 5 |
… | |
… | |
226 | enc->cur = SvPVX (enc->sv) + cur; |
243 | enc->cur = SvPVX (enc->sv) + cur; |
227 | enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; |
244 | enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; |
228 | } |
245 | } |
229 | } |
246 | } |
230 | |
247 | |
|
|
248 | static void encode_sv (enc_t *enc, SV *sv); |
|
|
249 | |
231 | ecb_inline void |
250 | ecb_inline void |
232 | encode_ch (enc_t *enc, char ch) |
251 | encode_ch (enc_t *enc, char ch) |
233 | { |
252 | { |
234 | need (enc, 1); |
253 | need (enc, 1); |
235 | *enc->cur++ = ch; |
254 | *enc->cur++ = ch; |
236 | } |
255 | } |
237 | |
256 | |
|
|
257 | // used for tags, intregers, element counts and so on |
238 | static void |
258 | static void |
239 | encode_uint (enc_t *enc, int major, UV len) |
259 | encode_uint (enc_t *enc, int major, UV len) |
240 | { |
260 | { |
241 | need (enc, 9); |
261 | need (enc, 9); |
242 | |
262 | |
… | |
… | |
273 | *enc->cur++ = len >> 8; |
293 | *enc->cur++ = len >> 8; |
274 | *enc->cur++ = len; |
294 | *enc->cur++ = len; |
275 | } |
295 | } |
276 | } |
296 | } |
277 | |
297 | |
|
|
298 | // encodes a perl value into a CBOR integer |
|
|
299 | ecb_inline void |
|
|
300 | encode_int (enc_t *enc, SV *sv) |
|
|
301 | { |
|
|
302 | if (SvIsUV (sv)) |
|
|
303 | encode_uint (enc, MAJOR_POS_INT, SvUVX (sv)); |
|
|
304 | else if (SvIVX (sv) >= 0) |
|
|
305 | encode_uint (enc, MAJOR_POS_INT, SvIVX (sv)); |
|
|
306 | else |
|
|
307 | encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1)); |
|
|
308 | } |
|
|
309 | |
278 | ecb_inline void |
310 | ecb_inline void |
279 | encode_tag (enc_t *enc, UV tag) |
311 | encode_tag (enc_t *enc, UV tag) |
280 | { |
312 | { |
281 | encode_uint (enc, MAJOR_TAG, tag); |
313 | encode_uint (enc, MAJOR_TAG, tag); |
282 | } |
314 | } |
… | |
… | |
343 | } |
375 | } |
344 | |
376 | |
345 | encode_str (enc, upgrade_utf8, utf8, str, len); |
377 | encode_str (enc, upgrade_utf8, utf8, str, len); |
346 | } |
378 | } |
347 | |
379 | |
348 | static void encode_sv (enc_t *enc, SV *sv); |
380 | ecb_inline void |
|
|
381 | encode_float16 (enc_t *enc, NV nv) |
|
|
382 | { |
|
|
383 | need (enc, 1+2); |
|
|
384 | |
|
|
385 | *enc->cur++ = MAJOR_MISC | MISC_FLOAT16; |
|
|
386 | |
|
|
387 | uint16_t fp = ecb_float_to_binary16 (nv); |
|
|
388 | |
|
|
389 | if (!ecb_big_endian ()) |
|
|
390 | fp = ecb_bswap16 (fp); |
|
|
391 | |
|
|
392 | memcpy (enc->cur, &fp, 2); |
|
|
393 | enc->cur += 2; |
|
|
394 | } |
|
|
395 | |
|
|
396 | ecb_inline void |
|
|
397 | encode_float32 (enc_t *enc, NV nv) |
|
|
398 | { |
|
|
399 | need (enc, 1+4); |
|
|
400 | |
|
|
401 | *enc->cur++ = MAJOR_MISC | MISC_FLOAT32; |
|
|
402 | |
|
|
403 | uint32_t fp = ecb_float_to_binary32 (nv); |
|
|
404 | |
|
|
405 | if (!ecb_big_endian ()) |
|
|
406 | fp = ecb_bswap32 (fp); |
|
|
407 | |
|
|
408 | memcpy (enc->cur, &fp, 4); |
|
|
409 | enc->cur += 4; |
|
|
410 | } |
|
|
411 | |
|
|
412 | ecb_inline void |
|
|
413 | encode_float64 (enc_t *enc, NV nv) |
|
|
414 | { |
|
|
415 | need (enc, 1+8); |
|
|
416 | |
|
|
417 | *enc->cur++ = MAJOR_MISC | MISC_FLOAT64; |
|
|
418 | |
|
|
419 | uint64_t fp = ecb_double_to_binary64 (nv); |
|
|
420 | |
|
|
421 | if (!ecb_big_endian ()) |
|
|
422 | fp = ecb_bswap64 (fp); |
|
|
423 | |
|
|
424 | memcpy (enc->cur, &fp, 8); |
|
|
425 | enc->cur += 8; |
|
|
426 | } |
|
|
427 | |
|
|
428 | ecb_inline void |
|
|
429 | encode_bool (enc_t *enc, int istrue) |
|
|
430 | { |
|
|
431 | encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); |
|
|
432 | } |
|
|
433 | |
|
|
434 | // encodes an arrayref containing key-value pairs as CBOR map |
|
|
435 | ecb_inline void |
|
|
436 | encode_array_as_map (enc_t *enc, SV *sv) |
|
|
437 | { |
|
|
438 | if (enc->depth >= enc->cbor.max_depth) |
|
|
439 | croak (ERR_NESTING_EXCEEDED); |
|
|
440 | |
|
|
441 | ++enc->depth; |
|
|
442 | |
|
|
443 | // as_map does error checking for us, but we re-check in case |
|
|
444 | // things have changed. |
|
|
445 | |
|
|
446 | if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) |
|
|
447 | croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)"); |
|
|
448 | |
|
|
449 | AV *av = (AV *)SvRV (sv); |
|
|
450 | int i, len = av_len (av); |
|
|
451 | |
|
|
452 | if (!(len & 1)) |
|
|
453 | croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)"); |
|
|
454 | |
|
|
455 | encode_uint (enc, MAJOR_MAP, (len + 1) >> 1); |
|
|
456 | |
|
|
457 | for (i = 0; i <= len; ++i) |
|
|
458 | { |
|
|
459 | SV **svp = av_fetch (av, i, 0); |
|
|
460 | encode_sv (enc, svp ? *svp : &PL_sv_undef); |
|
|
461 | } |
|
|
462 | |
|
|
463 | --enc->depth; |
|
|
464 | } |
|
|
465 | |
|
|
466 | ecb_inline void |
|
|
467 | encode_forced (enc_t *enc, UV type, SV *sv) |
|
|
468 | { |
|
|
469 | switch (type) |
|
|
470 | { |
|
|
471 | case AS_CBOR: |
|
|
472 | { |
|
|
473 | STRLEN len; |
|
|
474 | char *str = SvPVbyte (sv, len); |
|
|
475 | |
|
|
476 | need (enc, len); |
|
|
477 | memcpy (enc->cur, str, len); |
|
|
478 | enc->cur += len; |
|
|
479 | } |
|
|
480 | break; |
|
|
481 | |
|
|
482 | case AS_BYTES: |
|
|
483 | { |
|
|
484 | STRLEN len; |
|
|
485 | char *str = SvPVbyte (sv, len); |
|
|
486 | encode_strref (enc, 0, 0, str, len); |
|
|
487 | } |
|
|
488 | break; |
|
|
489 | |
|
|
490 | case AS_TEXT: |
|
|
491 | { |
|
|
492 | STRLEN len; |
|
|
493 | char *str = SvPVutf8 (sv, len); |
|
|
494 | encode_strref (enc, 1, 1, str, len); |
|
|
495 | } |
|
|
496 | break; |
|
|
497 | |
|
|
498 | case AS_INT: encode_int (enc, sv); break; |
|
|
499 | |
|
|
500 | case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break; |
|
|
501 | case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break; |
|
|
502 | case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break; |
|
|
503 | |
|
|
504 | case AS_MAP: encode_array_as_map (enc, sv); break; |
|
|
505 | |
|
|
506 | default: |
|
|
507 | croak ("encountered malformed CBOR::XS::Tagged object"); |
|
|
508 | } |
|
|
509 | } |
349 | |
510 | |
350 | static void |
511 | static void |
351 | encode_av (enc_t *enc, AV *av) |
512 | encode_av (enc_t *enc, AV *av) |
352 | { |
513 | { |
353 | int i, len = av_len (av); |
514 | int i, len = av_len (av); |
… | |
… | |
431 | |
592 | |
432 | HV *stash = SvSTASH (sv); |
593 | HV *stash = SvSTASH (sv); |
433 | |
594 | |
434 | if (stash == boolean_stash) |
595 | if (stash == boolean_stash) |
435 | { |
596 | { |
436 | encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); |
597 | encode_bool (enc, SvIV (sv)); |
437 | return; |
598 | return; |
438 | } |
599 | } |
439 | else if (stash == error_stash) |
600 | else if (stash == error_stash) |
440 | { |
601 | { |
441 | encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF); |
602 | encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF); |
… | |
… | |
444 | else if (stash == tagged_stash) |
605 | else if (stash == tagged_stash) |
445 | { |
606 | { |
446 | if (svt != SVt_PVAV) |
607 | if (svt != SVt_PVAV) |
447 | croak ("encountered CBOR::XS::Tagged object that isn't an array"); |
608 | croak ("encountered CBOR::XS::Tagged object that isn't an array"); |
448 | |
609 | |
|
|
610 | switch (av_len ((AV *)sv)) |
|
|
611 | { |
|
|
612 | case 2-1: |
|
|
613 | // actually a tagged value |
449 | encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); |
614 | encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); |
450 | encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); |
615 | encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); |
|
|
616 | break; |
|
|
617 | |
|
|
618 | case 3-1: |
|
|
619 | // a forced type [value, type, undef] |
|
|
620 | encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1)); |
|
|
621 | break; |
|
|
622 | |
|
|
623 | default: |
|
|
624 | croak ("encountered malformed CBOR::XS::Tagged object"); |
|
|
625 | } |
451 | |
626 | |
452 | return; |
627 | return; |
453 | } |
628 | } |
454 | } |
629 | } |
455 | |
630 | |
… | |
… | |
567 | |
742 | |
568 | if (ecb_expect_false (nv == (NV)(U32)nv)) |
743 | if (ecb_expect_false (nv == (NV)(U32)nv)) |
569 | encode_uint (enc, MAJOR_POS_INT, (U32)nv); |
744 | encode_uint (enc, MAJOR_POS_INT, (U32)nv); |
570 | //TODO: maybe I32? |
745 | //TODO: maybe I32? |
571 | else if (ecb_expect_false (nv == (float)nv)) |
746 | else if (ecb_expect_false (nv == (float)nv)) |
572 | { |
747 | encode_float32 (enc, nv); |
573 | *enc->cur++ = MAJOR_MISC | MISC_FLOAT32; |
|
|
574 | |
|
|
575 | uint32_t fp = ecb_float_to_binary32 (nv); |
|
|
576 | |
|
|
577 | if (!ecb_big_endian ()) |
|
|
578 | fp = ecb_bswap32 (fp); |
|
|
579 | |
|
|
580 | memcpy (enc->cur, &fp, 4); |
|
|
581 | enc->cur += 4; |
|
|
582 | } |
|
|
583 | else |
748 | else |
584 | { |
749 | encode_float64 (enc, nv); |
585 | *enc->cur++ = MAJOR_MISC | MISC_FLOAT64; |
|
|
586 | |
|
|
587 | uint64_t fp = ecb_double_to_binary64 (nv); |
|
|
588 | |
|
|
589 | if (!ecb_big_endian ()) |
|
|
590 | fp = ecb_bswap64 (fp); |
|
|
591 | |
|
|
592 | memcpy (enc->cur, &fp, 8); |
|
|
593 | enc->cur += 8; |
|
|
594 | } |
|
|
595 | } |
750 | } |
596 | |
751 | |
597 | static void |
752 | static void |
598 | encode_sv (enc_t *enc, SV *sv) |
753 | encode_sv (enc_t *enc, SV *sv) |
599 | { |
754 | { |
… | |
… | |
606 | encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); |
761 | encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); |
607 | } |
762 | } |
608 | else if (SvNOKp (sv)) |
763 | else if (SvNOKp (sv)) |
609 | encode_nv (enc, sv); |
764 | encode_nv (enc, sv); |
610 | else if (SvIOKp (sv)) |
765 | else if (SvIOKp (sv)) |
611 | { |
766 | encode_int (enc, sv); |
612 | if (SvIsUV (sv)) |
|
|
613 | encode_uint (enc, MAJOR_POS_INT, SvUVX (sv)); |
|
|
614 | else if (SvIVX (sv) >= 0) |
|
|
615 | encode_uint (enc, MAJOR_POS_INT, SvIVX (sv)); |
|
|
616 | else |
|
|
617 | encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1)); |
|
|
618 | } |
|
|
619 | else if (SvROK (sv)) |
767 | else if (SvROK (sv)) |
620 | encode_rv (enc, SvRV (sv)); |
768 | encode_rv (enc, SvRV (sv)); |
621 | else if (!SvOK (sv)) |
769 | else if (!SvOK (sv)) |
622 | encode_ch (enc, MAJOR_MISC | SIMPLE_NULL); |
770 | encode_ch (enc, MAJOR_MISC | SIMPLE_NULL); |
623 | else if (enc->cbor.flags & F_ALLOW_UNKNOWN) |
771 | else if (enc->cbor.flags & F_ALLOW_UNKNOWN) |
… | |
… | |
842 | |
990 | |
843 | WANT (len); |
991 | WANT (len); |
844 | dec->cur += len; |
992 | dec->cur += len; |
845 | |
993 | |
846 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
994 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
847 | if (!is_utf8_string (key, len)) |
995 | if (!cbor_is_utf8_string ((U8 *)key, len)) |
848 | ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); |
996 | ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); |
849 | |
997 | |
850 | hv_store (hv, key, -len, decode_sv (dec), 0); |
998 | hv_store (hv, key, -len, decode_sv (dec), 0); |
851 | |
999 | |
852 | return; |
1000 | return; |
… | |
… | |
981 | } |
1129 | } |
982 | |
1130 | |
983 | if (utf8) |
1131 | if (utf8) |
984 | { |
1132 | { |
985 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
1133 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
986 | if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) |
1134 | if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv))) |
987 | ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); |
1135 | ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); |
988 | |
1136 | |
989 | SvUTF8_on (sv); |
1137 | SvUTF8_on (sv); |
990 | } |
1138 | } |
991 | |
1139 | |
… | |
… | |
1075 | UV idx = decode_uint (dec); |
1223 | UV idx = decode_uint (dec); |
1076 | |
1224 | |
1077 | if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) |
1225 | if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) |
1078 | ERR ("corrupted CBOR data (sharedref index out of bounds)"); |
1226 | ERR ("corrupted CBOR data (sharedref index out of bounds)"); |
1079 | |
1227 | |
1080 | sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); |
1228 | sv = newRV_inc (AvARRAY (dec->shareable)[idx]); |
1081 | |
1229 | |
1082 | if (sv == &PL_sv_undef) |
1230 | if (sv == &PL_sv_undef) |
1083 | ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); |
1231 | ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); |
1084 | } |
1232 | } |
1085 | break; |
1233 | break; |
… | |
… | |
1325 | SvREFCNT_dec_NN (sv); |
1473 | SvREFCNT_dec_NN (sv); |
1326 | |
1474 | |
1327 | if (dec.err_sv) |
1475 | if (dec.err_sv) |
1328 | sv_2mortal (dec.err_sv); |
1476 | sv_2mortal (dec.err_sv); |
1329 | |
1477 | |
1330 | croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); |
1478 | croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur); |
1331 | } |
1479 | } |
1332 | |
1480 | |
1333 | sv = sv_2mortal (sv); |
1481 | sv = sv_2mortal (sv); |
1334 | |
1482 | |
1335 | return sv; |
1483 | return sv; |
… | |
… | |
1424 | |
1572 | |
1425 | break; |
1573 | break; |
1426 | |
1574 | |
1427 | case MAJOR_MAP >> MAJOR_SHIFT: |
1575 | case MAJOR_MAP >> MAJOR_SHIFT: |
1428 | len <<= 1; |
1576 | len <<= 1; |
|
|
1577 | /* FALLTHROUGH */ |
1429 | case MAJOR_ARRAY >> MAJOR_SHIFT: |
1578 | case MAJOR_ARRAY >> MAJOR_SHIFT: |
1430 | if (len) |
1579 | if (len) |
1431 | { |
1580 | { |
1432 | av_push (self->incr_count, newSViv (len + 1)); //TODO: nest |
1581 | av_push (self->incr_count, newSViv (len + 1)); //TODO: nest |
1433 | count = len + 1; |
1582 | count = len + 1; |