ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.69 by root, Mon Nov 30 20:38:25 2020 UTC vs.
Revision 1.80 by root, Fri Sep 8 20:03:06 2023 UTC

28#endif 28#endif
29#ifndef SvREFCNT_dec_NN 29#ifndef SvREFCNT_dec_NN
30# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) 30# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31#endif 31#endif
32 32
33// perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34#define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
35
33// known major and minor types 36// known major and minor types
34enum cbor_type 37enum cbor_type
35{ 38{
36 MAJOR_SHIFT = 5, 39 MAJOR_SHIFT = 5,
37 MINOR_MASK = 0x1f, 40 MINOR_MASK = 0x1f,
109 AS_BYTES = 2, 112 AS_BYTES = 2,
110 AS_TEXT = 3, 113 AS_TEXT = 3,
111 AS_FLOAT16 = 4, 114 AS_FLOAT16 = 4,
112 AS_FLOAT32 = 5, 115 AS_FLOAT32 = 5,
113 AS_FLOAT64 = 6, 116 AS_FLOAT64 = 6,
117 AS_MAP = 7,
114 // possibly future enhancements: (generic) float, (generic) string 118 // possibly future enhancements: (generic) float, (generic) string
115}; 119};
116 120
117#define F_SHRINK 0x00000001UL 121#define F_SHRINK 0x00000001UL
118#define F_ALLOW_UNKNOWN 0x00000002UL 122#define F_ALLOW_UNKNOWN 0x00000002UL
119#define F_ALLOW_SHARING 0x00000004UL 123#define F_ALLOW_SHARING 0x00000004UL
120#define F_ALLOW_CYCLES 0x00000008UL 124#define F_ALLOW_CYCLES 0x00000008UL
125#define F_ALLOW_WEAK_CYCLES 0x00000010UL
121#define F_FORBID_OBJECTS 0x00000010UL 126#define F_FORBID_OBJECTS 0x00000020UL
122#define F_PACK_STRINGS 0x00000020UL 127#define F_PACK_STRINGS 0x00000040UL
123#define F_TEXT_KEYS 0x00000040UL 128#define F_TEXT_KEYS 0x00000080UL
124#define F_TEXT_STRINGS 0x00000080UL 129#define F_TEXT_STRINGS 0x00000100UL
125#define F_VALIDATE_UTF8 0x00000100UL 130#define F_VALIDATE_UTF8 0x00000200UL
126 131
127#define INIT_SIZE 32 // initial scalar size to be allocated 132#define INIT_SIZE 32 // initial scalar size to be allocated
128 133
129#define SB do { 134#define SB do {
130#define SE } while (0) 135#define SE } while (0)
200#endif 205#endif
201 } 206 }
202} 207}
203 208
204// minimum length of a string to be registered for stringref 209// minimum length of a string to be registered for stringref
205ecb_inline int 210ecb_inline STRLEN
206minimum_string_length (UV idx) 211minimum_string_length (UV idx)
207{ 212{
208 return idx <= 23 ? 3 213 return idx <= 23 ? 3
209 : idx <= 0xffU ? 4 214 : idx <= 0xffU ? 4
210 : idx <= 0xffffU ? 5 215 : idx <= 0xffffU ? 5
238 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); 243 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
239 enc->cur = SvPVX (enc->sv) + cur; 244 enc->cur = SvPVX (enc->sv) + cur;
240 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; 245 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
241 } 246 }
242} 247}
248
249static void encode_sv (enc_t *enc, SV *sv);
243 250
244ecb_inline void 251ecb_inline void
245encode_ch (enc_t *enc, char ch) 252encode_ch (enc_t *enc, char ch)
246{ 253{
247 need (enc, 1); 254 need (enc, 1);
423encode_bool (enc_t *enc, int istrue) 430encode_bool (enc_t *enc, int istrue)
424{ 431{
425 encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); 432 encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
426} 433}
427 434
435// encodes an arrayref containing key-value pairs as CBOR map
436ecb_inline void
437encode_array_as_map (enc_t *enc, SV *sv)
438{
439 if (enc->depth >= enc->cbor.max_depth)
440 croak (ERR_NESTING_EXCEEDED);
441
442 ++enc->depth;
443
444 // as_map does error checking for us, but we re-check in case
445 // things have changed.
446
447 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
448 croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
449
450 AV *av = (AV *)SvRV (sv);
451 int i, len = av_len (av);
452
453 if (!(len & 1))
454 croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
455
456 encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
457
458 for (i = 0; i <= len; ++i)
459 {
460 SV **svp = av_fetch (av, i, 0);
461 encode_sv (enc, svp ? *svp : &PL_sv_undef);
462 }
463
464 --enc->depth;
465}
466
428ecb_inline void 467ecb_inline void
429encode_forced (enc_t *enc, UV type, SV *sv) 468encode_forced (enc_t *enc, UV type, SV *sv)
430{ 469{
431 switch (type) 470 switch (type)
432 { 471 {
461 500
462 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break; 501 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
463 case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break; 502 case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
464 case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break; 503 case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
465 504
505 case AS_MAP: encode_array_as_map (enc, sv); break;
506
466 default: 507 default:
467 croak ("encountered malformed CBOR::XS::Tagged object"); 508 croak ("encountered malformed CBOR::XS::Tagged object");
468 } 509 }
469} 510}
470
471static void encode_sv (enc_t *enc, SV *sv);
472 511
473static void 512static void
474encode_av (enc_t *enc, AV *av) 513encode_av (enc_t *enc, AV *av)
475{ 514{
476 int i, len = av_len (av); 515 int i, len = av_len (av);
952 991
953 WANT (len); 992 WANT (len);
954 dec->cur += len; 993 dec->cur += len;
955 994
956 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 995 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
957 if (!is_utf8_string (key, len)) 996 if (!cbor_is_utf8_string ((U8 *)key, len))
958 ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); 997 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
959 998
960 hv_store (hv, key, -len, decode_sv (dec), 0); 999 hv_store (hv, key, -len, decode_sv (dec), 0);
961 1000
962 return; 1001 return;
1091 } 1130 }
1092 1131
1093 if (utf8) 1132 if (utf8)
1094 { 1133 {
1095 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 1134 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1096 if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) 1135 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
1097 ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); 1136 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1098 1137
1099 SvUTF8_on (sv); 1138 SvUTF8_on (sv);
1100 } 1139 }
1101 1140
1156 case CBOR_TAG_VALUE_SHAREABLE: 1195 case CBOR_TAG_VALUE_SHAREABLE:
1157 { 1196 {
1158 if (ecb_expect_false (!dec->shareable)) 1197 if (ecb_expect_false (!dec->shareable))
1159 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 1198 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1160 1199
1161 if (dec->cbor.flags & F_ALLOW_CYCLES) 1200 if (ecb_expect_false (dec->cbor.flags & (F_ALLOW_CYCLES | F_ALLOW_WEAK_CYCLES)))
1162 { 1201 {
1202 // if cycles are allowed, then we store an AV as value
1203 // while it is being decoded, and gather unresolved
1204 // references in it, to be re4solved after decoding.
1205 int idx, i;
1163 sv = newSV (0); 1206 AV *av = newAV ();
1164 av_push (dec->shareable, SvREFCNT_inc_NN (sv)); 1207 av_push (dec->shareable, (SV *)av);
1208 idx = AvFILLp (dec->shareable);
1165 1209
1166 SV *osv = decode_sv (dec); 1210 sv = decode_sv (dec);
1167 sv_setsv (sv, osv); 1211
1212 // the AV now contains \undef for all unresolved references,
1213 // so we fix them up here.
1214 for (i = 0; i <= AvFILLp (av); ++i)
1215 SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv)));
1216
1217 // weaken all recursive references
1218 if (dec->cbor.flags & F_ALLOW_WEAK_CYCLES)
1219 for (i = 0; i <= AvFILLp (av); ++i)
1220 sv_rvweaken (AvARRAY (av)[i]);
1221
1222 // now replace the AV by a reference to the completed value
1168 SvREFCNT_dec_NN (osv); 1223 SvREFCNT_dec_NN ((SV *)av);
1224 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1169 } 1225 }
1170 else 1226 else
1171 { 1227 {
1172 av_push (dec->shareable, &PL_sv_undef); 1228 av_push (dec->shareable, &PL_sv_undef);
1173 int idx = AvFILLp (dec->shareable); 1229 int idx = AvFILLp (dec->shareable);
1174 sv = decode_sv (dec); 1230 sv = decode_sv (dec);
1175 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv)); 1231 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1176 } 1232 }
1177 } 1233 }
1178 break; 1234 break;
1179 1235
1180 case CBOR_TAG_VALUE_SHAREDREF: 1236 case CBOR_TAG_VALUE_SHAREDREF:
1185 UV idx = decode_uint (dec); 1241 UV idx = decode_uint (dec);
1186 1242
1187 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) 1243 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1188 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 1244 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1189 1245
1190 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 1246 sv = AvARRAY (dec->shareable)[idx];
1191 1247
1192 if (sv == &PL_sv_undef) 1248 // reference to cycle, we create a new \undef and use that, and also
1249 // registerr it in the AV for later fixing
1250 if (ecb_expect_false (SvTYPE (sv) == SVt_PVAV))
1251 {
1252 AV *av = (AV *)sv;
1253 sv = newRV_noinc (&PL_sv_undef);
1254 av_push (av, SvREFCNT_inc_NN (sv));
1255 }
1256 else if (ecb_expect_false (sv == &PL_sv_undef)) // not yet decoded, but cycles not allowed
1193 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled"); 1257 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1258 else // we decoded the object earlier, no cycle
1259 sv = newSVsv (sv);
1194 } 1260 }
1195 break; 1261 break;
1196 1262
1197 case CBOR_TAG_PERL_OBJECT: 1263 case CBOR_TAG_PERL_OBJECT:
1198 { 1264 {
1435 SvREFCNT_dec_NN (sv); 1501 SvREFCNT_dec_NN (sv);
1436 1502
1437 if (dec.err_sv) 1503 if (dec.err_sv)
1438 sv_2mortal (dec.err_sv); 1504 sv_2mortal (dec.err_sv);
1439 1505
1440 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1506 croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur);
1441 } 1507 }
1442 1508
1443 sv = sv_2mortal (sv); 1509 sv = sv_2mortal (sv);
1444 1510
1445 return sv; 1511 return sv;
1534 1600
1535 break; 1601 break;
1536 1602
1537 case MAJOR_MAP >> MAJOR_SHIFT: 1603 case MAJOR_MAP >> MAJOR_SHIFT:
1538 len <<= 1; 1604 len <<= 1;
1605 /* FALLTHROUGH */
1539 case MAJOR_ARRAY >> MAJOR_SHIFT: 1606 case MAJOR_ARRAY >> MAJOR_SHIFT:
1540 if (len) 1607 if (len)
1541 { 1608 {
1542 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest 1609 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1543 count = len + 1; 1610 count = len + 1;
1615 ))); 1682 )));
1616} 1683}
1617 1684
1618void shrink (CBOR *self, int enable = 1) 1685void shrink (CBOR *self, int enable = 1)
1619 ALIAS: 1686 ALIAS:
1620 shrink = F_SHRINK 1687 shrink = F_SHRINK
1621 allow_unknown = F_ALLOW_UNKNOWN 1688 allow_unknown = F_ALLOW_UNKNOWN
1622 allow_sharing = F_ALLOW_SHARING 1689 allow_sharing = F_ALLOW_SHARING
1623 allow_cycles = F_ALLOW_CYCLES 1690 allow_cycles = F_ALLOW_CYCLES
1691 allow_weak_cycles = F_ALLOW_WEAK_CYCLES
1624 forbid_objects = F_FORBID_OBJECTS 1692 forbid_objects = F_FORBID_OBJECTS
1625 pack_strings = F_PACK_STRINGS 1693 pack_strings = F_PACK_STRINGS
1626 text_keys = F_TEXT_KEYS 1694 text_keys = F_TEXT_KEYS
1627 text_strings = F_TEXT_STRINGS 1695 text_strings = F_TEXT_STRINGS
1628 validate_utf8 = F_VALIDATE_UTF8 1696 validate_utf8 = F_VALIDATE_UTF8
1629 PPCODE: 1697 PPCODE:
1630{ 1698{
1631 if (enable) 1699 if (enable)
1632 self->flags |= ix; 1700 self->flags |= ix;
1633 else 1701 else
1636 XPUSHs (ST (0)); 1704 XPUSHs (ST (0));
1637} 1705}
1638 1706
1639void get_shrink (CBOR *self) 1707void get_shrink (CBOR *self)
1640 ALIAS: 1708 ALIAS:
1641 get_shrink = F_SHRINK 1709 get_shrink = F_SHRINK
1642 get_allow_unknown = F_ALLOW_UNKNOWN 1710 get_allow_unknown = F_ALLOW_UNKNOWN
1643 get_allow_sharing = F_ALLOW_SHARING 1711 get_allow_sharing = F_ALLOW_SHARING
1644 get_allow_cycles = F_ALLOW_CYCLES 1712 get_allow_cycles = F_ALLOW_CYCLES
1713 get_allow_weak_cycles = F_ALLOW_WEAK_CYCLES
1645 get_forbid_objects = F_FORBID_OBJECTS 1714 get_forbid_objects = F_FORBID_OBJECTS
1646 get_pack_strings = F_PACK_STRINGS 1715 get_pack_strings = F_PACK_STRINGS
1647 get_text_keys = F_TEXT_KEYS 1716 get_text_keys = F_TEXT_KEYS
1648 get_text_strings = F_TEXT_STRINGS 1717 get_text_strings = F_TEXT_STRINGS
1649 get_validate_utf8 = F_VALIDATE_UTF8 1718 get_validate_utf8 = F_VALIDATE_UTF8
1650 PPCODE: 1719 PPCODE:
1651 XPUSHs (boolSV (self->flags & ix)); 1720 XPUSHs (boolSV (self->flags & ix));
1652 1721
1653void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1722void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1654 PPCODE: 1723 PPCODE:

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines