ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.35 by root, Sat Nov 30 17:19:34 2013 UTC vs.
Revision 1.42 by root, Tue Dec 10 15:31:40 2013 UTC

97}; 97};
98 98
99#define F_SHRINK 0x00000001UL 99#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 100#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 101#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL
102#define F_PACK_STRINGS 0x00000008UL 103#define F_PACK_STRINGS 0x00000010UL
104#define F_VALIDATE_UTF8 0x00000020UL
103 105
104#define INIT_SIZE 32 // initial scalar size to be allocated 106#define INIT_SIZE 32 // initial scalar size to be allocated
105 107
106#define SB do { 108#define SB do {
107#define SE } while (0) 109#define SE } while (0)
126typedef struct { 128typedef struct {
127 U32 flags; 129 U32 flags;
128 U32 max_depth; 130 U32 max_depth;
129 STRLEN max_size; 131 STRLEN max_size;
130 SV *filter; 132 SV *filter;
133
134 // for the incremental parser
135 STRLEN incr_pos; // the current offset into the text
136 STRLEN incr_need; // minimum bytes needed to decode
137 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
131} CBOR; 138} CBOR;
132 139
133ecb_inline void 140ecb_inline void
134cbor_init (CBOR *cbor) 141cbor_init (CBOR *cbor)
135{ 142{
139 146
140ecb_inline void 147ecb_inline void
141cbor_free (CBOR *cbor) 148cbor_free (CBOR *cbor)
142{ 149{
143 SvREFCNT_dec (cbor->filter); 150 SvREFCNT_dec (cbor->filter);
151 SvREFCNT_dec (cbor->incr_count);
144} 152}
145 153
146///////////////////////////////////////////////////////////////////////////// 154/////////////////////////////////////////////////////////////////////////////
147// utility functions 155// utility functions
148 156
228{ 236{
229 need (enc, 9); 237 need (enc, 9);
230 238
231 if (ecb_expect_true (len < LENGTH_EXT1)) 239 if (ecb_expect_true (len < LENGTH_EXT1))
232 *enc->cur++ = major | len; 240 *enc->cur++ = major | len;
233 else if (ecb_expect_true (len <= 0xff)) 241 else if (ecb_expect_true (len <= 0xffU))
234 { 242 {
235 *enc->cur++ = major | LENGTH_EXT1; 243 *enc->cur++ = major | LENGTH_EXT1;
236 *enc->cur++ = len; 244 *enc->cur++ = len;
237 } 245 }
238 else if (len <= 0xffff) 246 else if (len <= 0xffffU)
239 { 247 {
240 *enc->cur++ = major | LENGTH_EXT2; 248 *enc->cur++ = major | LENGTH_EXT2;
241 *enc->cur++ = len >> 8; 249 *enc->cur++ = len >> 8;
242 *enc->cur++ = len; 250 *enc->cur++ = len;
243 } 251 }
244 else if (len <= 0xffffffff) 252 else if (len <= 0xffffffffU)
245 { 253 {
246 *enc->cur++ = major | LENGTH_EXT4; 254 *enc->cur++ = major | LENGTH_EXT4;
247 *enc->cur++ = len >> 24; 255 *enc->cur++ = len >> 24;
248 *enc->cur++ = len >> 16; 256 *enc->cur++ = len >> 16;
249 *enc->cur++ = len >> 8; 257 *enc->cur++ = len >> 8;
627 U8 m = *dec->cur & MINOR_MASK; 635 U8 m = *dec->cur & MINOR_MASK;
628 ++dec->cur; 636 ++dec->cur;
629 637
630 if (ecb_expect_true (m < LENGTH_EXT1)) 638 if (ecb_expect_true (m < LENGTH_EXT1))
631 return m; 639 return m;
632 640 else if (ecb_expect_true (m == LENGTH_EXT1))
633 switch (m)
634 { 641 {
635 case LENGTH_EXT1:
636 WANT (1); 642 WANT (1);
637 dec->cur += 1; 643 dec->cur += 1;
638 return dec->cur[-1]; 644 return dec->cur[-1];
639 645 }
640 case LENGTH_EXT2: 646 else if (ecb_expect_true (m == LENGTH_EXT2))
647 {
641 WANT (2); 648 WANT (2);
642 dec->cur += 2; 649 dec->cur += 2;
643 return (((UV)dec->cur[-2]) << 8) 650 return (((UV)dec->cur[-2]) << 8)
644 | ((UV)dec->cur[-1]); 651 | ((UV)dec->cur[-1]);
645 652 }
646 case LENGTH_EXT4: 653 else if (ecb_expect_true (m == LENGTH_EXT4))
654 {
647 WANT (4); 655 WANT (4);
648 dec->cur += 4; 656 dec->cur += 4;
649 return (((UV)dec->cur[-4]) << 24) 657 return (((UV)dec->cur[-4]) << 24)
650 | (((UV)dec->cur[-3]) << 16) 658 | (((UV)dec->cur[-3]) << 16)
651 | (((UV)dec->cur[-2]) << 8) 659 | (((UV)dec->cur[-2]) << 8)
652 | ((UV)dec->cur[-1]); 660 | ((UV)dec->cur[-1]);
653 661 }
654 case LENGTH_EXT8: 662 else if (ecb_expect_true (m == LENGTH_EXT8))
663 {
655 WANT (8); 664 WANT (8);
656 dec->cur += 8; 665 dec->cur += 8;
657 666
658 return 667 return
659#if UVSIZE < 8 668#if UVSIZE < 8
660 0 669 0
661#else 670#else
662 (((UV)dec->cur[-8]) << 56) 671 (((UV)dec->cur[-8]) << 56)
663 | (((UV)dec->cur[-7]) << 48) 672 | (((UV)dec->cur[-7]) << 48)
664 | (((UV)dec->cur[-6]) << 40) 673 | (((UV)dec->cur[-6]) << 40)
665 | (((UV)dec->cur[-5]) << 32) 674 | (((UV)dec->cur[-5]) << 32)
666#endif 675#endif
667 | (((UV)dec->cur[-4]) << 24) 676 | (((UV)dec->cur[-4]) << 24)
668 | (((UV)dec->cur[-3]) << 16) 677 | (((UV)dec->cur[-3]) << 16)
669 | (((UV)dec->cur[-2]) << 8) 678 | (((UV)dec->cur[-2]) << 8)
670 | ((UV)dec->cur[-1]); 679 | ((UV)dec->cur[-1]);
671 680 }
672 default: 681 else
673 ERR ("corrupted CBOR data (unsupported integer minor encoding)"); 682 ERR ("corrupted CBOR data (unsupported integer minor encoding)");
674 }
675 683
676fail: 684fail:
677 return 0; 685 return 0;
678} 686}
679 687
705 } 713 }
706 else 714 else
707 { 715 {
708 int i, len = decode_uint (dec); 716 int i, len = decode_uint (dec);
709 717
718 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
710 av_fill (av, len - 1); 719 av_fill (av, len - 1);
711 720
712 for (i = 0; i < len; ++i) 721 for (i = 0; i < len; ++i)
713 AvARRAY (av)[i] = decode_sv (dec); 722 AvARRAY (av)[i] = decode_sv (dec);
714 } 723 }
727{ 736{
728 // for speed reasons, we specialcase single-string 737 // for speed reasons, we specialcase single-string
729 // byte or utf-8 strings as keys, but only when !stringref 738 // byte or utf-8 strings as keys, but only when !stringref
730 739
731 if (ecb_expect_true (!dec->stringref)) 740 if (ecb_expect_true (!dec->stringref))
732 if ((*dec->cur - MAJOR_BYTES) <= 27) 741 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
733 { 742 {
734 I32 len = decode_uint (dec); 743 I32 len = decode_uint (dec);
735 char *key = (char *)dec->cur; 744 char *key = (char *)dec->cur;
736 745
737 dec->cur += len; 746 dec->cur += len;
738 747
739 if (ecb_expect_false (dec->stringref))
740 av_push (dec->stringref, newSVpvn (key, len));
741
742 hv_store (hv, key, len, decode_sv (dec), 0); 748 hv_store (hv, key, len, decode_sv (dec), 0);
743 749
744 return; 750 return;
745 } 751 }
746 else if ((*dec->cur - MAJOR_TEXT) <= 27) 752 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
747 { 753 {
748 I32 len = decode_uint (dec); 754 I32 len = decode_uint (dec);
749 char *key = (char *)dec->cur; 755 char *key = (char *)dec->cur;
750 756
751 dec->cur += len; 757 dec->cur += len;
752 758
753 if (ecb_expect_false (dec->stringref)) 759 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
754 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 760 if (!is_utf8_string (key, len))
761 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
755 762
756 hv_store (hv, key, -len, decode_sv (dec), 0); 763 hv_store (hv, key, -len, decode_sv (dec), 0);
757 764
758 return; 765 return;
759 } 766 }
761 SV *k = decode_sv (dec); 768 SV *k = decode_sv (dec);
762 SV *v = decode_sv (dec); 769 SV *v = decode_sv (dec);
763 770
764 hv_store_ent (hv, k, v, 0); 771 hv_store_ent (hv, k, v, 0);
765 SvREFCNT_dec (k); 772 SvREFCNT_dec (k);
773
774fail:
775 ;
766} 776}
767 777
768static SV * 778static SV *
769decode_hv (dec_t *dec) 779decode_hv (dec_t *dec)
770{ 780{
852 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 862 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
853 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 863 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
854 } 864 }
855 865
856 if (utf8) 866 if (utf8)
867 {
868 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
869 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
870 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
871
857 SvUTF8_on (sv); 872 SvUTF8_on (sv);
873 }
858 874
859 return sv; 875 return sv;
860 876
861fail: 877fail:
862 SvREFCNT_dec (sv); 878 SvREFCNT_dec (sv);
911 case CBOR_TAG_VALUE_SHAREABLE: 927 case CBOR_TAG_VALUE_SHAREABLE:
912 { 928 {
913 if (ecb_expect_false (!dec->shareable)) 929 if (ecb_expect_false (!dec->shareable))
914 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 930 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
915 931
932 if (dec->cbor.flags & F_ALLOW_CYCLES)
933 {
916 sv = newSV (0); 934 sv = newSV (0);
917 av_push (dec->shareable, SvREFCNT_inc_NN (sv)); 935 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
918 936
919 SV *osv = decode_sv (dec); 937 SV *osv = decode_sv (dec);
920 sv_setsv (sv, osv); 938 sv_setsv (sv, osv);
921 SvREFCNT_dec_NN (osv); 939 SvREFCNT_dec_NN (osv);
940 }
941 else
942 {
943 av_push (dec->shareable, &PL_sv_undef);
944 int idx = AvFILLp (dec->shareable);
945 sv = decode_sv (dec);
946 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
947 }
922 } 948 }
923 break; 949 break;
924 950
925 case CBOR_TAG_VALUE_SHAREDREF: 951 case CBOR_TAG_VALUE_SHAREDREF:
926 { 952 {
931 957
932 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) 958 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
933 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 959 ERR ("corrupted CBOR data (sharedref index out of bounds)");
934 960
935 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 961 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
962
963 if (sv == &PL_sv_undef)
964 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
936 } 965 }
937 break; 966 break;
938 967
939 case CBOR_TAG_PERL_OBJECT: 968 case CBOR_TAG_PERL_OBJECT:
940 { 969 {
1109 1138
1110 return newSVnv (ecb_binary64_to_double (fp)); 1139 return newSVnv (ecb_binary64_to_double (fp));
1111 } 1140 }
1112 1141
1113 // 0..19 unassigned simple 1142 // 0..19 unassigned simple
1114 // 24 reserved + unassigned (reserved values are not encodable) 1143 // 24 reserved + unassigned simple (reserved values are not encodable)
1144 // 28-30 unassigned misc
1145 // 31 break code
1115 default: 1146 default:
1116 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1147 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1117 } 1148 }
1118 1149
1119 break; 1150 break;
1120 } 1151 }
1121 1152
1148 if (dec.cur != dec.end && !dec.err) 1179 if (dec.cur != dec.end && !dec.err)
1149 dec.err = "garbage after CBOR object"; 1180 dec.err = "garbage after CBOR object";
1150 1181
1151 if (dec.err) 1182 if (dec.err)
1152 { 1183 {
1184 if (dec.shareable)
1185 {
1186 // need to break cyclic links, which whould all be in shareable
1187 int i;
1188 SV **svp;
1189
1190 for (i = av_len (dec.shareable) + 1; i--; )
1191 if ((svp = av_fetch (dec.shareable, i, 0)))
1192 sv_setsv (*svp, &PL_sv_undef);
1193 }
1194
1153 SvREFCNT_dec (sv); 1195 SvREFCNT_dec (sv);
1154 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1196 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1155 } 1197 }
1156 1198
1157 sv = sv_2mortal (sv); 1199 sv = sv_2mortal (sv);
1158 1200
1159 return sv; 1201 return sv;
1160} 1202}
1161 1203
1204/////////////////////////////////////////////////////////////////////////////
1205// incremental parser
1206
1207#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1208
1209// returns 0 for notyet, 1 for success or error
1210static int
1211incr_parse (CBOR *self, SV *cborstr)
1212{
1213 STRLEN cur;
1214 SvPV (cborstr, cur);
1215
1216 while (ecb_expect_true (self->incr_need <= cur))
1217 {
1218 // table of integer count bytes
1219 static I8 incr_len[MINOR_MASK + 1] = {
1220 0, 0, 0, 0, 0, 0, 0, 0,
1221 0, 0, 0, 0, 0, 0, 0, 0,
1222 0, 0, 0, 0, 0, 0, 0, 0,
1223 1, 2, 4, 8,-1,-1,-1,-2
1224 };
1225
1226 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1227 U8 m = *p & MINOR_MASK;
1228 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1229 I8 ilen = incr_len[m];
1230
1231 self->incr_need = self->incr_pos + 1;
1232
1233 if (ecb_expect_false (ilen < 0))
1234 {
1235 if (m != MINOR_INDEF)
1236 return 1; // error
1237
1238 if (*p == (MAJOR_MISC | MINOR_INDEF))
1239 {
1240 if (count >= 0)
1241 return 1; // error
1242
1243 count = 1;
1244 }
1245 else
1246 {
1247 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1248 count = -1;
1249 }
1250 }
1251 else
1252 {
1253 self->incr_need += ilen;
1254 if (ecb_expect_false (self->incr_need > cur))
1255 return 0;
1256
1257 int major = *p >> MAJOR_SHIFT;
1258
1259 switch (major)
1260 {
1261 case MAJOR_BYTES >> MAJOR_SHIFT:
1262 case MAJOR_TEXT >> MAJOR_SHIFT:
1263 case MAJOR_ARRAY >> MAJOR_SHIFT:
1264 case MAJOR_MAP >> MAJOR_SHIFT:
1265 {
1266 UV len;
1267
1268 if (ecb_expect_false (ilen))
1269 {
1270 len = 0;
1271
1272 do {
1273 len = (len << 8) | *++p;
1274 } while (--ilen);
1275 }
1276 else
1277 len = m;
1278
1279 switch (major)
1280 {
1281 case MAJOR_BYTES >> MAJOR_SHIFT:
1282 case MAJOR_TEXT >> MAJOR_SHIFT:
1283 self->incr_need += len;
1284 if (ecb_expect_false (self->incr_need > cur))
1285 return 0;
1286
1287 break;
1288
1289 case MAJOR_MAP >> MAJOR_SHIFT:
1290 len <<= 1;
1291 case MAJOR_ARRAY >> MAJOR_SHIFT:
1292 if (len)
1293 {
1294 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1295 count = len + 1;
1296 }
1297 break;
1298 }
1299 }
1300 }
1301 }
1302
1303 self->incr_pos = self->incr_need;
1304
1305 if (count > 0)
1306 {
1307 while (!--count)
1308 {
1309 if (!AvFILLp (self->incr_count))
1310 return 1; // done
1311
1312 SvREFCNT_dec_NN (av_pop (self->incr_count));
1313 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1314 }
1315
1316 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1317 }
1318 }
1319
1320 return 0;
1321}
1322
1323
1162///////////////////////////////////////////////////////////////////////////// 1324/////////////////////////////////////////////////////////////////////////////
1163// XS interface functions 1325// XS interface functions
1164 1326
1165MODULE = CBOR::XS PACKAGE = CBOR::XS 1327MODULE = CBOR::XS PACKAGE = CBOR::XS
1166 1328
1206void shrink (CBOR *self, int enable = 1) 1368void shrink (CBOR *self, int enable = 1)
1207 ALIAS: 1369 ALIAS:
1208 shrink = F_SHRINK 1370 shrink = F_SHRINK
1209 allow_unknown = F_ALLOW_UNKNOWN 1371 allow_unknown = F_ALLOW_UNKNOWN
1210 allow_sharing = F_ALLOW_SHARING 1372 allow_sharing = F_ALLOW_SHARING
1373 allow_cycles = F_ALLOW_CYCLES
1211 pack_strings = F_PACK_STRINGS 1374 pack_strings = F_PACK_STRINGS
1375 validate_utf8 = F_VALIDATE_UTF8
1212 PPCODE: 1376 PPCODE:
1213{ 1377{
1214 if (enable) 1378 if (enable)
1215 self->flags |= ix; 1379 self->flags |= ix;
1216 else 1380 else
1222void get_shrink (CBOR *self) 1386void get_shrink (CBOR *self)
1223 ALIAS: 1387 ALIAS:
1224 get_shrink = F_SHRINK 1388 get_shrink = F_SHRINK
1225 get_allow_unknown = F_ALLOW_UNKNOWN 1389 get_allow_unknown = F_ALLOW_UNKNOWN
1226 get_allow_sharing = F_ALLOW_SHARING 1390 get_allow_sharing = F_ALLOW_SHARING
1391 get_allow_cycles = F_ALLOW_CYCLES
1227 get_pack_strings = F_PACK_STRINGS 1392 get_pack_strings = F_PACK_STRINGS
1393 get_validate_utf8 = F_VALIDATE_UTF8
1228 PPCODE: 1394 PPCODE:
1229 XPUSHs (boolSV (self->flags & ix)); 1395 XPUSHs (boolSV (self->flags & ix));
1230 1396
1231void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1397void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1232 PPCODE: 1398 PPCODE:
1281 EXTEND (SP, 2); 1447 EXTEND (SP, 2);
1282 PUSHs (sv); 1448 PUSHs (sv);
1283 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1449 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1284} 1450}
1285 1451
1452void incr_parse (CBOR *self, SV *cborstr)
1453 ALIAS:
1454 incr_parse_multiple = 1
1455 PPCODE:
1456{
1457 if (SvUTF8 (cborstr))
1458 sv_utf8_downgrade (cborstr, 0);
1459
1460 if (!self->incr_count)
1461 {
1462 self->incr_count = newAV ();
1463 self->incr_pos = 0;
1464 self->incr_need = 1;
1465
1466 av_push (self->incr_count, newSViv (1));
1467 }
1468
1469 do
1470 {
1471 if (!incr_parse (self, cborstr))
1472 {
1473 if (self->incr_need > self->max_size && self->max_size)
1474 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1475 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1476
1477 break;
1478 }
1479
1480 SV *sv;
1481 char *offset;
1482
1483 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1484 XPUSHs (sv);
1485
1486 sv_chop (cborstr, offset);
1487
1488 av_clear (self->incr_count);
1489 av_push (self->incr_count, newSViv (1));
1490
1491 self->incr_pos = 0;
1492 self->incr_need = self->incr_pos + 1;
1493 }
1494 while (ix);
1495}
1496
1497void incr_reset (CBOR *self)
1498 CODE:
1499{
1500 SvREFCNT_dec (self->incr_count);
1501 self->incr_count = 0;
1502}
1503
1286void DESTROY (CBOR *self) 1504void DESTROY (CBOR *self)
1287 PPCODE: 1505 PPCODE:
1288 cbor_free (self); 1506 cbor_free (self);
1289 1507
1290PROTOTYPES: ENABLE 1508PROTOTYPES: ENABLE
1291 1509
1292void encode_cbor (SV *scalar) 1510void encode_cbor (SV *scalar)
1511 ALIAS:
1512 encode_cbor = 0
1513 encode_cbor_sharing = F_ALLOW_SHARING
1293 PPCODE: 1514 PPCODE:
1294{ 1515{
1295 CBOR cbor; 1516 CBOR cbor;
1296 cbor_init (&cbor); 1517 cbor_init (&cbor);
1518 cbor.flags |= ix;
1297 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN; 1519 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1298 XPUSHs (scalar); 1520 XPUSHs (scalar);
1299} 1521}
1300 1522
1301void decode_cbor (SV *cborstr) 1523void decode_cbor (SV *cborstr)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines