… | |
… | |
28 | #endif |
28 | #endif |
29 | #ifndef SvREFCNT_dec_NN |
29 | #ifndef SvREFCNT_dec_NN |
30 | # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) |
30 | # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) |
31 | #endif |
31 | #endif |
32 | |
32 | |
|
|
33 | // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0 |
|
|
34 | #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len))) |
|
|
35 | |
33 | // known major and minor types |
36 | // known major and minor types |
34 | enum cbor_type |
37 | enum cbor_type |
35 | { |
38 | { |
36 | MAJOR_SHIFT = 5, |
39 | MAJOR_SHIFT = 5, |
37 | MINOR_MASK = 0x1f, |
40 | MINOR_MASK = 0x1f, |
… | |
… | |
113 | AS_FLOAT64 = 6, |
116 | AS_FLOAT64 = 6, |
114 | AS_MAP = 7, |
117 | AS_MAP = 7, |
115 | // possibly future enhancements: (generic) float, (generic) string |
118 | // possibly future enhancements: (generic) float, (generic) string |
116 | }; |
119 | }; |
117 | |
120 | |
118 | #define F_SHRINK 0x00000001UL |
121 | #define F_SHRINK 0x00000001UL |
119 | #define F_ALLOW_UNKNOWN 0x00000002UL |
122 | #define F_ALLOW_UNKNOWN 0x00000002UL |
120 | #define F_ALLOW_SHARING 0x00000004UL |
123 | #define F_ALLOW_SHARING 0x00000004UL |
121 | #define F_ALLOW_CYCLES 0x00000008UL |
124 | #define F_ALLOW_CYCLES 0x00000008UL |
|
|
125 | #define F_ALLOW_WEAK_CYCLES 0x00000010UL |
122 | #define F_FORBID_OBJECTS 0x00000010UL |
126 | #define F_FORBID_OBJECTS 0x00000020UL |
123 | #define F_PACK_STRINGS 0x00000020UL |
127 | #define F_PACK_STRINGS 0x00000040UL |
124 | #define F_TEXT_KEYS 0x00000040UL |
128 | #define F_TEXT_KEYS 0x00000080UL |
125 | #define F_TEXT_STRINGS 0x00000080UL |
129 | #define F_TEXT_STRINGS 0x00000100UL |
126 | #define F_VALIDATE_UTF8 0x00000100UL |
130 | #define F_VALIDATE_UTF8 0x00000200UL |
127 | |
131 | |
128 | #define INIT_SIZE 32 // initial scalar size to be allocated |
132 | #define INIT_SIZE 32 // initial scalar size to be allocated |
129 | |
133 | |
130 | #define SB do { |
134 | #define SB do { |
131 | #define SE } while (0) |
135 | #define SE } while (0) |
… | |
… | |
987 | |
991 | |
988 | WANT (len); |
992 | WANT (len); |
989 | dec->cur += len; |
993 | dec->cur += len; |
990 | |
994 | |
991 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
995 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
992 | if (!is_utf8_string ((U8 *)key, len)) |
996 | if (!cbor_is_utf8_string ((U8 *)key, len)) |
993 | ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); |
997 | ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); |
994 | |
998 | |
995 | hv_store (hv, key, -len, decode_sv (dec), 0); |
999 | hv_store (hv, key, -len, decode_sv (dec), 0); |
996 | |
1000 | |
997 | return; |
1001 | return; |
… | |
… | |
1126 | } |
1130 | } |
1127 | |
1131 | |
1128 | if (utf8) |
1132 | if (utf8) |
1129 | { |
1133 | { |
1130 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
1134 | if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) |
1131 | if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) |
1135 | if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv))) |
1132 | ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); |
1136 | ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); |
1133 | |
1137 | |
1134 | SvUTF8_on (sv); |
1138 | SvUTF8_on (sv); |
1135 | } |
1139 | } |
1136 | |
1140 | |
… | |
… | |
1191 | case CBOR_TAG_VALUE_SHAREABLE: |
1195 | case CBOR_TAG_VALUE_SHAREABLE: |
1192 | { |
1196 | { |
1193 | if (ecb_expect_false (!dec->shareable)) |
1197 | if (ecb_expect_false (!dec->shareable)) |
1194 | dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); |
1198 | dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); |
1195 | |
1199 | |
1196 | if (dec->cbor.flags & F_ALLOW_CYCLES) |
1200 | if (ecb_expect_false (dec->cbor.flags & (F_ALLOW_CYCLES | F_ALLOW_WEAK_CYCLES))) |
1197 | { |
1201 | { |
|
|
1202 | // if cycles are allowed, then we store an AV as value |
|
|
1203 | // while it is being decoded, and gather unresolved |
|
|
1204 | // references in it, to be re4solved after decoding. |
|
|
1205 | int idx, i; |
1198 | sv = newSV (0); |
1206 | AV *av = newAV (); |
1199 | av_push (dec->shareable, SvREFCNT_inc_NN (sv)); |
1207 | av_push (dec->shareable, (SV *)av); |
|
|
1208 | idx = AvFILLp (dec->shareable); |
1200 | |
1209 | |
1201 | SV *osv = decode_sv (dec); |
1210 | sv = decode_sv (dec); |
1202 | sv_setsv (sv, osv); |
1211 | |
|
|
1212 | // the AV now contains \undef for all unresolved references, |
|
|
1213 | // so we fix them up here. |
|
|
1214 | for (i = 0; i <= AvFILLp (av); ++i) |
|
|
1215 | SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv))); |
|
|
1216 | |
|
|
1217 | // weaken all recursive references |
|
|
1218 | if (dec->cbor.flags & F_ALLOW_WEAK_CYCLES) |
|
|
1219 | for (i = 0; i <= AvFILLp (av); ++i) |
|
|
1220 | sv_rvweaken (AvARRAY (av)[i]); |
|
|
1221 | |
|
|
1222 | // now replace the AV by a reference to the completed value |
1203 | SvREFCNT_dec_NN (osv); |
1223 | SvREFCNT_dec_NN ((SV *)av); |
|
|
1224 | AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv); |
1204 | } |
1225 | } |
1205 | else |
1226 | else |
1206 | { |
1227 | { |
1207 | av_push (dec->shareable, &PL_sv_undef); |
1228 | av_push (dec->shareable, &PL_sv_undef); |
1208 | int idx = AvFILLp (dec->shareable); |
1229 | int idx = AvFILLp (dec->shareable); |
1209 | sv = decode_sv (dec); |
1230 | sv = decode_sv (dec); |
1210 | av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv)); |
1231 | AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv); |
1211 | } |
1232 | } |
1212 | } |
1233 | } |
1213 | break; |
1234 | break; |
1214 | |
1235 | |
1215 | case CBOR_TAG_VALUE_SHAREDREF: |
1236 | case CBOR_TAG_VALUE_SHAREDREF: |
… | |
… | |
1220 | UV idx = decode_uint (dec); |
1241 | UV idx = decode_uint (dec); |
1221 | |
1242 | |
1222 | if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) |
1243 | if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) |
1223 | ERR ("corrupted CBOR data (sharedref index out of bounds)"); |
1244 | ERR ("corrupted CBOR data (sharedref index out of bounds)"); |
1224 | |
1245 | |
1225 | sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); |
1246 | sv = AvARRAY (dec->shareable)[idx]; |
1226 | |
1247 | |
1227 | if (sv == &PL_sv_undef) |
1248 | // reference to cycle, we create a new \undef and use that, and also |
|
|
1249 | // registerr it in the AV for later fixing |
|
|
1250 | if (ecb_expect_false (SvTYPE (sv) == SVt_PVAV)) |
|
|
1251 | { |
|
|
1252 | AV *av = (AV *)sv; |
|
|
1253 | sv = newRV_noinc (&PL_sv_undef); |
|
|
1254 | av_push (av, SvREFCNT_inc_NN (sv)); |
|
|
1255 | } |
|
|
1256 | else if (ecb_expect_false (sv == &PL_sv_undef)) // not yet decoded, but cycles not allowed |
1228 | ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); |
1257 | ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); |
|
|
1258 | else // we decoded the object earlier, no cycle |
|
|
1259 | sv = newSVsv (sv); |
1229 | } |
1260 | } |
1230 | break; |
1261 | break; |
1231 | |
1262 | |
1232 | case CBOR_TAG_PERL_OBJECT: |
1263 | case CBOR_TAG_PERL_OBJECT: |
1233 | { |
1264 | { |
… | |
… | |
1470 | SvREFCNT_dec_NN (sv); |
1501 | SvREFCNT_dec_NN (sv); |
1471 | |
1502 | |
1472 | if (dec.err_sv) |
1503 | if (dec.err_sv) |
1473 | sv_2mortal (dec.err_sv); |
1504 | sv_2mortal (dec.err_sv); |
1474 | |
1505 | |
1475 | croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); |
1506 | croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur); |
1476 | } |
1507 | } |
1477 | |
1508 | |
1478 | sv = sv_2mortal (sv); |
1509 | sv = sv_2mortal (sv); |
1479 | |
1510 | |
1480 | return sv; |
1511 | return sv; |
… | |
… | |
1651 | ))); |
1682 | ))); |
1652 | } |
1683 | } |
1653 | |
1684 | |
1654 | void shrink (CBOR *self, int enable = 1) |
1685 | void shrink (CBOR *self, int enable = 1) |
1655 | ALIAS: |
1686 | ALIAS: |
1656 | shrink = F_SHRINK |
1687 | shrink = F_SHRINK |
1657 | allow_unknown = F_ALLOW_UNKNOWN |
1688 | allow_unknown = F_ALLOW_UNKNOWN |
1658 | allow_sharing = F_ALLOW_SHARING |
1689 | allow_sharing = F_ALLOW_SHARING |
1659 | allow_cycles = F_ALLOW_CYCLES |
1690 | allow_cycles = F_ALLOW_CYCLES |
|
|
1691 | allow_weak_cycles = F_ALLOW_WEAK_CYCLES |
1660 | forbid_objects = F_FORBID_OBJECTS |
1692 | forbid_objects = F_FORBID_OBJECTS |
1661 | pack_strings = F_PACK_STRINGS |
1693 | pack_strings = F_PACK_STRINGS |
1662 | text_keys = F_TEXT_KEYS |
1694 | text_keys = F_TEXT_KEYS |
1663 | text_strings = F_TEXT_STRINGS |
1695 | text_strings = F_TEXT_STRINGS |
1664 | validate_utf8 = F_VALIDATE_UTF8 |
1696 | validate_utf8 = F_VALIDATE_UTF8 |
1665 | PPCODE: |
1697 | PPCODE: |
1666 | { |
1698 | { |
1667 | if (enable) |
1699 | if (enable) |
1668 | self->flags |= ix; |
1700 | self->flags |= ix; |
1669 | else |
1701 | else |
… | |
… | |
1672 | XPUSHs (ST (0)); |
1704 | XPUSHs (ST (0)); |
1673 | } |
1705 | } |
1674 | |
1706 | |
1675 | void get_shrink (CBOR *self) |
1707 | void get_shrink (CBOR *self) |
1676 | ALIAS: |
1708 | ALIAS: |
1677 | get_shrink = F_SHRINK |
1709 | get_shrink = F_SHRINK |
1678 | get_allow_unknown = F_ALLOW_UNKNOWN |
1710 | get_allow_unknown = F_ALLOW_UNKNOWN |
1679 | get_allow_sharing = F_ALLOW_SHARING |
1711 | get_allow_sharing = F_ALLOW_SHARING |
1680 | get_allow_cycles = F_ALLOW_CYCLES |
1712 | get_allow_cycles = F_ALLOW_CYCLES |
|
|
1713 | get_allow_weak_cycles = F_ALLOW_WEAK_CYCLES |
1681 | get_forbid_objects = F_FORBID_OBJECTS |
1714 | get_forbid_objects = F_FORBID_OBJECTS |
1682 | get_pack_strings = F_PACK_STRINGS |
1715 | get_pack_strings = F_PACK_STRINGS |
1683 | get_text_keys = F_TEXT_KEYS |
1716 | get_text_keys = F_TEXT_KEYS |
1684 | get_text_strings = F_TEXT_STRINGS |
1717 | get_text_strings = F_TEXT_STRINGS |
1685 | get_validate_utf8 = F_VALIDATE_UTF8 |
1718 | get_validate_utf8 = F_VALIDATE_UTF8 |
1686 | PPCODE: |
1719 | PPCODE: |
1687 | XPUSHs (boolSV (self->flags & ix)); |
1720 | XPUSHs (boolSV (self->flags & ix)); |
1688 | |
1721 | |
1689 | void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) |
1722 | void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) |
1690 | PPCODE: |
1723 | PPCODE: |