… | |
… | |
28 | #endif |
28 | #endif |
29 | #ifndef SvREFCNT_dec_NN |
29 | #ifndef SvREFCNT_dec_NN |
30 | # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) |
30 | # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) |
31 | #endif |
31 | #endif |
32 | |
32 | |
|
|
33 | // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0 |
|
|
34 | #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len))) |
|
|
35 | |
33 | // known major and minor types |
36 | // known major and minor types |
34 | enum cbor_type |
37 | enum cbor_type |
35 | { |
38 | { |
36 | MAJOR_SHIFT = 5, |
39 | MAJOR_SHIFT = 5, |
37 | MINOR_MASK = 0x1f, |
40 | MINOR_MASK = 0x1f, |
… | |
… | |
109 | AS_BYTES = 2, |
112 | AS_BYTES = 2, |
110 | AS_TEXT = 3, |
113 | AS_TEXT = 3, |
111 | AS_FLOAT16 = 4, |
114 | AS_FLOAT16 = 4, |
112 | AS_FLOAT32 = 5, |
115 | AS_FLOAT32 = 5, |
113 | AS_FLOAT64 = 6, |
116 | AS_FLOAT64 = 6, |
|
|
117 | AS_MAP = 7, |
114 | // possibly future enhancements: (generic) float, (generic) string |
118 | // possibly future enhancements: (generic) float, (generic) string |
115 | }; |
119 | }; |
116 | |
120 | |
117 | #define F_SHRINK 0x00000001UL |
121 | #define F_SHRINK 0x00000001UL |
118 | #define F_ALLOW_UNKNOWN 0x00000002UL |
122 | #define F_ALLOW_UNKNOWN 0x00000002UL |
119 | #define F_ALLOW_SHARING 0x00000004UL |
123 | #define F_ALLOW_SHARING 0x00000004UL |
120 | #define F_ALLOW_CYCLES 0x00000008UL |
124 | #define F_ALLOW_CYCLES 0x00000008UL |
|
|
125 | #define F_ALLOW_WEAK_CYCLES 0x00000010UL |
121 | #define F_FORBID_OBJECTS 0x00000010UL |
126 | #define F_FORBID_OBJECTS 0x00000020UL |
122 | #define F_PACK_STRINGS 0x00000020UL |
127 | #define F_PACK_STRINGS 0x00000040UL |
123 | #define F_TEXT_KEYS 0x00000040UL |
128 | #define F_TEXT_KEYS 0x00000080UL |
124 | #define F_TEXT_STRINGS 0x00000080UL |
129 | #define F_TEXT_STRINGS 0x00000100UL |
125 | #define F_VALIDATE_UTF8 0x00000100UL |
130 | #define F_VALIDATE_UTF8 0x00000200UL |
126 | |
131 | |
127 | #define INIT_SIZE 32 // initial scalar size to be allocated |
132 | #define INIT_SIZE 32 // initial scalar size to be allocated |
128 | |
133 | |
129 | #define SB do { |
134 | #define SB do { |
130 | #define SE } while (0) |
135 | #define SE } while (0) |
… | |
… | |
200 | #endif |
205 | #endif |
201 | } |
206 | } |
202 | } |
207 | } |
203 | |
208 | |
204 | // minimum length of a string to be registered for stringref |
209 | // minimum length of a string to be registered for stringref |
205 | ecb_inline int |
210 | ecb_inline STRLEN |
206 | minimum_string_length (UV idx) |
211 | minimum_string_length (UV idx) |
207 | { |
212 | { |
208 | return idx <= 23 ? 3 |
213 | return idx <= 23 ? 3 |
209 | : idx <= 0xffU ? 4 |
214 | : idx <= 0xffU ? 4 |
210 | : idx <= 0xffffU ? 5 |
215 | : idx <= 0xffffU ? 5 |
… | |
… | |
238 | SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); |
243 | SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); |
239 | enc->cur = SvPVX (enc->sv) + cur; |
244 | enc->cur = SvPVX (enc->sv) + cur; |
240 | enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; |
245 | enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; |
241 | } |
246 | } |
242 | } |
247 | } |
|
|
248 | |
|
|
249 | static void encode_sv (enc_t *enc, SV *sv); |
243 | |
250 | |
244 | ecb_inline void |
251 | ecb_inline void |
245 | encode_ch (enc_t *enc, char ch) |
252 | encode_ch (enc_t *enc, char ch) |
246 | { |
253 | { |
247 | need (enc, 1); |
254 | need (enc, 1); |
… | |
… | |
423 | encode_bool (enc_t *enc, int istrue) |
430 | encode_bool (enc_t *enc, int istrue) |
424 | { |
431 | { |
425 | encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); |
432 | encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); |
426 | } |
433 | } |
427 | |
434 | |
|
|
435 | // encodes an arrayref containing key-value pairs as CBOR map |
|
|
436 | ecb_inline void |
|
|
437 | encode_array_as_map (enc_t *enc, SV *sv) |
|
|
438 | { |
|
|
439 | if (enc->depth >= enc->cbor.max_depth) |
|
|
440 | croak (ERR_NESTING_EXCEEDED); |
|
|
441 | |
|
|
442 | ++enc->depth; |
|
|
443 | |
|
|
444 | // as_map does error checking for us, but we re-check in case |
|
|
445 | // things have changed. |
|
|
446 | |
|
|
447 | if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) |
|
|
448 | croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)"); |
|
|
449 | |
|
|
450 | AV *av = (AV *)SvRV (sv); |
|
|
451 | int i, len = av_len (av); |
|
|
452 | |
|
|
453 | if (!(len & 1)) |
|
|
454 | croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)"); |
|
|
455 | |
|
|
456 | encode_uint (enc, MAJOR_MAP, (len + 1) >> 1); |
|
|
457 | |
|
|
458 | for (i = 0; i <= len; ++i) |
|
|
459 | { |
|
|
460 | SV **svp = av_fetch (av, i, 0); |
|
|
461 | encode_sv (enc, svp ? *svp : &PL_sv_undef); |
|
|
462 | } |
|
|
463 | |
|
|
464 | --enc->depth; |
|
|
465 | } |
|
|
466 | |
428 | ecb_inline void |
467 | ecb_inline void |
429 | encode_forced (enc_t *enc, UV type, SV *sv) |
468 | encode_forced (enc_t *enc, UV type, SV *sv) |
430 | { |
469 | { |
431 | switch (type) |
470 | switch (type) |
432 | { |
471 | { |
… | |
… | |
461 | |
500 | |
462 | case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break; |
501 | case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break; |
463 | case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break; |
502 | case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break; |
464 | case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break; |
503 | case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break; |
465 | |
504 | |
|
|
505 | case AS_MAP: encode_array_as_map (enc, sv); break; |
|
|
506 | |
466 | default: |
507 | default: |
467 | croak ("encountered malformed CBOR::XS::Tagged object"); |
508 | croak ("encountered malformed CBOR::XS::Tagged object"); |
468 | } |
509 | } |
469 | } |
510 | } |
470 | |
|
|
471 | static void encode_sv (enc_t *enc, SV *sv); |
|
|
472 | |
511 | |
473 | static void |
512 | static void |
474 | encode_av (enc_t *enc, AV *av) |
513 | encode_av (enc_t *enc, AV *av) |
475 | { |
514 | { |
476 | int i, len = av_len (av); |
515 | int i, len = av_len (av); |
… | |
… | |
952 | |
991 | |
953 | WANT (len); |
992 | WANT (len); |
954 | dec->cur += len; |
993 | dec->cur += len; |
955 | |
994 | |
956 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
995 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
957 | if (!is_utf8_string (key, len)) |
996 | if (!cbor_is_utf8_string ((U8 *)key, len)) |
958 | ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); |
997 | ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); |
959 | |
998 | |
960 | hv_store (hv, key, -len, decode_sv (dec), 0); |
999 | hv_store (hv, key, -len, decode_sv (dec), 0); |
961 | |
1000 | |
962 | return; |
1001 | return; |
… | |
… | |
1091 | } |
1130 | } |
1092 | |
1131 | |
1093 | if (utf8) |
1132 | if (utf8) |
1094 | { |
1133 | { |
1095 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
1134 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
1096 | if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) |
1135 | if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv))) |
1097 | ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); |
1136 | ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); |
1098 | |
1137 | |
1099 | SvUTF8_on (sv); |
1138 | SvUTF8_on (sv); |
1100 | } |
1139 | } |
1101 | |
1140 | |
… | |
… | |
1156 | case CBOR_TAG_VALUE_SHAREABLE: |
1195 | case CBOR_TAG_VALUE_SHAREABLE: |
1157 | { |
1196 | { |
1158 | if (ecb_expect_false (!dec->shareable)) |
1197 | if (ecb_expect_false (!dec->shareable)) |
1159 | dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); |
1198 | dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); |
1160 | |
1199 | |
1161 | if (dec->cbor.flags & F_ALLOW_CYCLES) |
1200 | if (ecb_expect_false (dec->cbor.flags & (F_ALLOW_CYCLES | F_ALLOW_WEAK_CYCLES))) |
1162 | { |
1201 | { |
|
|
1202 | // if cycles are allowed, then we store an AV as value |
|
|
1203 | // while it is being decoded, and gather unresolved |
|
|
1204 | // references in it, to be re4solved after decoding. |
|
|
1205 | int idx, i; |
1163 | sv = newSV (0); |
1206 | AV *av = newAV (); |
1164 | av_push (dec->shareable, SvREFCNT_inc_NN (sv)); |
1207 | av_push (dec->shareable, (SV *)av); |
|
|
1208 | idx = AvFILLp (dec->shareable); |
1165 | |
1209 | |
1166 | SV *osv = decode_sv (dec); |
1210 | sv = decode_sv (dec); |
1167 | sv_setsv (sv, osv); |
1211 | |
|
|
1212 | // the AV now contains \undef for all unresolved references, |
|
|
1213 | // so we fix them up here. |
|
|
1214 | for (i = 0; i <= AvFILLp (av); ++i) |
|
|
1215 | SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv))); |
|
|
1216 | |
|
|
1217 | // weaken all recursive references |
|
|
1218 | if (dec->cbor.flags & F_ALLOW_WEAK_CYCLES) |
|
|
1219 | for (i = 0; i <= AvFILLp (av); ++i) |
|
|
1220 | sv_rvweaken (AvARRAY (av)[i]); |
|
|
1221 | |
|
|
1222 | // now replace the AV by a reference to the completed value |
1168 | SvREFCNT_dec_NN (osv); |
1223 | SvREFCNT_dec_NN ((SV *)av); |
|
|
1224 | AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv); |
1169 | } |
1225 | } |
1170 | else |
1226 | else |
1171 | { |
1227 | { |
1172 | av_push (dec->shareable, &PL_sv_undef); |
1228 | av_push (dec->shareable, &PL_sv_undef); |
1173 | int idx = AvFILLp (dec->shareable); |
1229 | int idx = AvFILLp (dec->shareable); |
1174 | sv = decode_sv (dec); |
1230 | sv = decode_sv (dec); |
1175 | av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv)); |
1231 | AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv); |
1176 | } |
1232 | } |
1177 | } |
1233 | } |
1178 | break; |
1234 | break; |
1179 | |
1235 | |
1180 | case CBOR_TAG_VALUE_SHAREDREF: |
1236 | case CBOR_TAG_VALUE_SHAREDREF: |
… | |
… | |
1185 | UV idx = decode_uint (dec); |
1241 | UV idx = decode_uint (dec); |
1186 | |
1242 | |
1187 | if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) |
1243 | if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) |
1188 | ERR ("corrupted CBOR data (sharedref index out of bounds)"); |
1244 | ERR ("corrupted CBOR data (sharedref index out of bounds)"); |
1189 | |
1245 | |
1190 | sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); |
1246 | sv = AvARRAY (dec->shareable)[idx]; |
1191 | |
1247 | |
1192 | if (sv == &PL_sv_undef) |
1248 | // reference to cycle, we create a new \undef and use that, and also |
|
|
1249 | // registerr it in the AV for later fixing |
|
|
1250 | if (ecb_expect_false (SvTYPE (sv) == SVt_PVAV)) |
|
|
1251 | { |
|
|
1252 | AV *av = (AV *)sv; |
|
|
1253 | sv = newRV_noinc (&PL_sv_undef); |
|
|
1254 | av_push (av, SvREFCNT_inc_NN (sv)); |
|
|
1255 | } |
|
|
1256 | else if (ecb_expect_false (sv == &PL_sv_undef)) // not yet decoded, but cycles not allowed |
1193 | ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); |
1257 | ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); |
|
|
1258 | else // we decoded the object earlier, no cycle |
|
|
1259 | sv = newSVsv (sv); |
1194 | } |
1260 | } |
1195 | break; |
1261 | break; |
1196 | |
1262 | |
1197 | case CBOR_TAG_PERL_OBJECT: |
1263 | case CBOR_TAG_PERL_OBJECT: |
1198 | { |
1264 | { |
… | |
… | |
1435 | SvREFCNT_dec_NN (sv); |
1501 | SvREFCNT_dec_NN (sv); |
1436 | |
1502 | |
1437 | if (dec.err_sv) |
1503 | if (dec.err_sv) |
1438 | sv_2mortal (dec.err_sv); |
1504 | sv_2mortal (dec.err_sv); |
1439 | |
1505 | |
1440 | croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); |
1506 | croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur); |
1441 | } |
1507 | } |
1442 | |
1508 | |
1443 | sv = sv_2mortal (sv); |
1509 | sv = sv_2mortal (sv); |
1444 | |
1510 | |
1445 | return sv; |
1511 | return sv; |
… | |
… | |
1534 | |
1600 | |
1535 | break; |
1601 | break; |
1536 | |
1602 | |
1537 | case MAJOR_MAP >> MAJOR_SHIFT: |
1603 | case MAJOR_MAP >> MAJOR_SHIFT: |
1538 | len <<= 1; |
1604 | len <<= 1; |
|
|
1605 | /* FALLTHROUGH */ |
1539 | case MAJOR_ARRAY >> MAJOR_SHIFT: |
1606 | case MAJOR_ARRAY >> MAJOR_SHIFT: |
1540 | if (len) |
1607 | if (len) |
1541 | { |
1608 | { |
1542 | av_push (self->incr_count, newSViv (len + 1)); //TODO: nest |
1609 | av_push (self->incr_count, newSViv (len + 1)); //TODO: nest |
1543 | count = len + 1; |
1610 | count = len + 1; |
… | |
… | |
1615 | ))); |
1682 | ))); |
1616 | } |
1683 | } |
1617 | |
1684 | |
1618 | void shrink (CBOR *self, int enable = 1) |
1685 | void shrink (CBOR *self, int enable = 1) |
1619 | ALIAS: |
1686 | ALIAS: |
1620 | shrink = F_SHRINK |
1687 | shrink = F_SHRINK |
1621 | allow_unknown = F_ALLOW_UNKNOWN |
1688 | allow_unknown = F_ALLOW_UNKNOWN |
1622 | allow_sharing = F_ALLOW_SHARING |
1689 | allow_sharing = F_ALLOW_SHARING |
1623 | allow_cycles = F_ALLOW_CYCLES |
1690 | allow_cycles = F_ALLOW_CYCLES |
|
|
1691 | allow_weak_cycles = F_ALLOW_WEAK_CYCLES |
1624 | forbid_objects = F_FORBID_OBJECTS |
1692 | forbid_objects = F_FORBID_OBJECTS |
1625 | pack_strings = F_PACK_STRINGS |
1693 | pack_strings = F_PACK_STRINGS |
1626 | text_keys = F_TEXT_KEYS |
1694 | text_keys = F_TEXT_KEYS |
1627 | text_strings = F_TEXT_STRINGS |
1695 | text_strings = F_TEXT_STRINGS |
1628 | validate_utf8 = F_VALIDATE_UTF8 |
1696 | validate_utf8 = F_VALIDATE_UTF8 |
1629 | PPCODE: |
1697 | PPCODE: |
1630 | { |
1698 | { |
1631 | if (enable) |
1699 | if (enable) |
1632 | self->flags |= ix; |
1700 | self->flags |= ix; |
1633 | else |
1701 | else |
… | |
… | |
1636 | XPUSHs (ST (0)); |
1704 | XPUSHs (ST (0)); |
1637 | } |
1705 | } |
1638 | |
1706 | |
1639 | void get_shrink (CBOR *self) |
1707 | void get_shrink (CBOR *self) |
1640 | ALIAS: |
1708 | ALIAS: |
1641 | get_shrink = F_SHRINK |
1709 | get_shrink = F_SHRINK |
1642 | get_allow_unknown = F_ALLOW_UNKNOWN |
1710 | get_allow_unknown = F_ALLOW_UNKNOWN |
1643 | get_allow_sharing = F_ALLOW_SHARING |
1711 | get_allow_sharing = F_ALLOW_SHARING |
1644 | get_allow_cycles = F_ALLOW_CYCLES |
1712 | get_allow_cycles = F_ALLOW_CYCLES |
|
|
1713 | get_allow_weak_cycles = F_ALLOW_WEAK_CYCLES |
1645 | get_forbid_objects = F_FORBID_OBJECTS |
1714 | get_forbid_objects = F_FORBID_OBJECTS |
1646 | get_pack_strings = F_PACK_STRINGS |
1715 | get_pack_strings = F_PACK_STRINGS |
1647 | get_text_keys = F_TEXT_KEYS |
1716 | get_text_keys = F_TEXT_KEYS |
1648 | get_text_strings = F_TEXT_STRINGS |
1717 | get_text_strings = F_TEXT_STRINGS |
1649 | get_validate_utf8 = F_VALIDATE_UTF8 |
1718 | get_validate_utf8 = F_VALIDATE_UTF8 |
1650 | PPCODE: |
1719 | PPCODE: |
1651 | XPUSHs (boolSV (self->flags & ix)); |
1720 | XPUSHs (boolSV (self->flags & ix)); |
1652 | |
1721 | |
1653 | void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) |
1722 | void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) |
1654 | PPCODE: |
1723 | PPCODE: |