ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.36 by root, Sat Nov 30 17:37:45 2013 UTC vs.
Revision 1.40 by root, Tue Dec 10 13:54:40 2013 UTC

97}; 97};
98 98
99#define F_SHRINK 0x00000001UL 99#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 100#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 101#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL
102#define F_PACK_STRINGS 0x00000008UL 103#define F_PACK_STRINGS 0x00000010UL
104#define F_VALIDATE_UTF8 0x00000020UL
103 105
104#define INIT_SIZE 32 // initial scalar size to be allocated 106#define INIT_SIZE 32 // initial scalar size to be allocated
105 107
106#define SB do { 108#define SB do {
107#define SE } while (0) 109#define SE } while (0)
126typedef struct { 128typedef struct {
127 U32 flags; 129 U32 flags;
128 U32 max_depth; 130 U32 max_depth;
129 STRLEN max_size; 131 STRLEN max_size;
130 SV *filter; 132 SV *filter;
133
134 // for the incremental parser
135 STRLEN incr_pos; // the current offset into the text
136 STRLEN incr_need; // minimum bytes needed to decode
137 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
131} CBOR; 138} CBOR;
132 139
133ecb_inline void 140ecb_inline void
134cbor_init (CBOR *cbor) 141cbor_init (CBOR *cbor)
135{ 142{
139 146
140ecb_inline void 147ecb_inline void
141cbor_free (CBOR *cbor) 148cbor_free (CBOR *cbor)
142{ 149{
143 SvREFCNT_dec (cbor->filter); 150 SvREFCNT_dec (cbor->filter);
151 SvREFCNT_dec (cbor->incr_count);
144} 152}
145 153
146///////////////////////////////////////////////////////////////////////////// 154/////////////////////////////////////////////////////////////////////////////
147// utility functions 155// utility functions
148 156
735 I32 len = decode_uint (dec); 743 I32 len = decode_uint (dec);
736 char *key = (char *)dec->cur; 744 char *key = (char *)dec->cur;
737 745
738 dec->cur += len; 746 dec->cur += len;
739 747
740 if (ecb_expect_false (dec->stringref))
741 av_push (dec->stringref, newSVpvn (key, len));
742
743 hv_store (hv, key, len, decode_sv (dec), 0); 748 hv_store (hv, key, len, decode_sv (dec), 0);
744 749
745 return; 750 return;
746 } 751 }
747 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 752 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
749 I32 len = decode_uint (dec); 754 I32 len = decode_uint (dec);
750 char *key = (char *)dec->cur; 755 char *key = (char *)dec->cur;
751 756
752 dec->cur += len; 757 dec->cur += len;
753 758
754 if (ecb_expect_false (dec->stringref)) 759 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
755 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 760 if (!is_utf8_string (key, len))
761 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
756 762
757 hv_store (hv, key, -len, decode_sv (dec), 0); 763 hv_store (hv, key, -len, decode_sv (dec), 0);
758 764
759 return; 765 return;
760 } 766 }
762 SV *k = decode_sv (dec); 768 SV *k = decode_sv (dec);
763 SV *v = decode_sv (dec); 769 SV *v = decode_sv (dec);
764 770
765 hv_store_ent (hv, k, v, 0); 771 hv_store_ent (hv, k, v, 0);
766 SvREFCNT_dec (k); 772 SvREFCNT_dec (k);
773
774fail:
775 ;
767} 776}
768 777
769static SV * 778static SV *
770decode_hv (dec_t *dec) 779decode_hv (dec_t *dec)
771{ 780{
853 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 862 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
854 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 863 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
855 } 864 }
856 865
857 if (utf8) 866 if (utf8)
867 {
868 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
869 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
870 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
871
858 SvUTF8_on (sv); 872 SvUTF8_on (sv);
873 }
859 874
860 return sv; 875 return sv;
861 876
862fail: 877fail:
863 SvREFCNT_dec (sv); 878 SvREFCNT_dec (sv);
912 case CBOR_TAG_VALUE_SHAREABLE: 927 case CBOR_TAG_VALUE_SHAREABLE:
913 { 928 {
914 if (ecb_expect_false (!dec->shareable)) 929 if (ecb_expect_false (!dec->shareable))
915 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 930 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
916 931
932 if (dec->cbor.flags & F_ALLOW_CYCLES)
933 {
917 sv = newSV (0); 934 sv = newSV (0);
918 av_push (dec->shareable, SvREFCNT_inc_NN (sv)); 935 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
919 936
920 SV *osv = decode_sv (dec); 937 SV *osv = decode_sv (dec);
921 sv_setsv (sv, osv); 938 sv_setsv (sv, osv);
922 SvREFCNT_dec_NN (osv); 939 SvREFCNT_dec_NN (osv);
940 }
941 else
942 {
943 av_push (dec->shareable, &PL_sv_undef);
944 int idx = AvFILLp (dec->shareable);
945 sv = decode_sv (dec);
946 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
947 }
923 } 948 }
924 break; 949 break;
925 950
926 case CBOR_TAG_VALUE_SHAREDREF: 951 case CBOR_TAG_VALUE_SHAREDREF:
927 { 952 {
932 957
933 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) 958 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
934 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 959 ERR ("corrupted CBOR data (sharedref index out of bounds)");
935 960
936 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 961 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
962
963 if (sv == &PL_sv_undef)
964 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
937 } 965 }
938 break; 966 break;
939 967
940 case CBOR_TAG_PERL_OBJECT: 968 case CBOR_TAG_PERL_OBJECT:
941 { 969 {
1110 1138
1111 return newSVnv (ecb_binary64_to_double (fp)); 1139 return newSVnv (ecb_binary64_to_double (fp));
1112 } 1140 }
1113 1141
1114 // 0..19 unassigned simple 1142 // 0..19 unassigned simple
1115 // 24 reserved + unassigned (reserved values are not encodable) 1143 // 24 reserved + unassigned simple (reserved values are not encodable)
1144 // 28-30 unassigned misc
1145 // 31 break code
1116 default: 1146 default:
1117 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1147 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1118 } 1148 }
1119 1149
1120 break; 1150 break;
1121 } 1151 }
1122 1152
1149 if (dec.cur != dec.end && !dec.err) 1179 if (dec.cur != dec.end && !dec.err)
1150 dec.err = "garbage after CBOR object"; 1180 dec.err = "garbage after CBOR object";
1151 1181
1152 if (dec.err) 1182 if (dec.err)
1153 { 1183 {
1184 if (dec.shareable)
1185 {
1186 // need to break cyclic links, which whould all be in shareable
1187 int i;
1188 SV **svp;
1189
1190 for (i = av_len (dec.shareable) + 1; i--; )
1191 if ((svp = av_fetch (dec.shareable, i, 0)))
1192 sv_setsv (*svp, &PL_sv_undef);
1193 }
1194
1154 SvREFCNT_dec (sv); 1195 SvREFCNT_dec (sv);
1155 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1196 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1156 } 1197 }
1157 1198
1158 sv = sv_2mortal (sv); 1199 sv = sv_2mortal (sv);
1159 1200
1160 return sv; 1201 return sv;
1161} 1202}
1162 1203
1204/////////////////////////////////////////////////////////////////////////////
1205// incremental parser
1206
1207#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1208
1209// returns 0 for notyet, 1 for success or error
1210static int
1211incr_parse (CBOR *self, SV *cborstr)
1212{
1213 STRLEN cur;
1214 SvPV (cborstr, cur);
1215
1216 while (ecb_expect_true (self->incr_need <= cur))
1217 {
1218 // table of integer count bytes
1219 static I8 incr_len[MINOR_MASK + 1] = {
1220 0, 0, 0, 0, 0, 0, 0, 0,
1221 0, 0, 0, 0, 0, 0, 0, 0,
1222 0, 0, 0, 0, 0, 0, 0, 0,
1223 1, 2, 4, 8,-1,-1,-1,-2
1224 };
1225
1226 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1227 U8 m = *p & MINOR_MASK;
1228 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1229 I8 ilen = incr_len[m];
1230
1231 self->incr_need = self->incr_pos + 1;
1232
1233 if (ecb_expect_false (ilen < 0))
1234 {
1235 if (m != MINOR_INDEF)
1236 return 1; // error
1237
1238 if (*p == (MAJOR_MISC | MINOR_INDEF))
1239 {
1240 if (count >= 0)
1241 return 1; // error
1242
1243 count = 1;
1244 }
1245 else
1246 {
1247 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1248 count = -1;
1249 }
1250 }
1251 else
1252 {
1253 self->incr_need += ilen;
1254 if (ecb_expect_false (self->incr_need > cur))
1255 return 0;
1256
1257 int major = *p >> MAJOR_SHIFT;
1258
1259 switch (major)
1260 {
1261 case MAJOR_BYTES >> MAJOR_SHIFT:
1262 case MAJOR_TEXT >> MAJOR_SHIFT:
1263 case MAJOR_ARRAY >> MAJOR_SHIFT:
1264 case MAJOR_MAP >> MAJOR_SHIFT:
1265 {
1266 UV len;
1267
1268 if (ecb_expect_false (ilen))
1269 {
1270 len = 0;
1271
1272 do {
1273 len = (len << 8) | *++p;
1274 } while (--ilen);
1275 }
1276 else
1277 len = m;
1278
1279 switch (major)
1280 {
1281 case MAJOR_BYTES >> MAJOR_SHIFT:
1282 case MAJOR_TEXT >> MAJOR_SHIFT:
1283 self->incr_need += len;
1284 if (ecb_expect_false (self->incr_need > cur))
1285 return 0;
1286
1287 break;
1288
1289 case MAJOR_MAP >> MAJOR_SHIFT:
1290 len <<= 1;
1291 case MAJOR_ARRAY >> MAJOR_SHIFT:
1292 if (len)
1293 {
1294 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1295 count = len + 1;
1296 }
1297 break;
1298 }
1299 }
1300 }
1301 }
1302
1303 self->incr_pos = self->incr_need;
1304
1305 if (count > 0)
1306 {
1307 while (!--count)
1308 {
1309 if (!AvFILLp (self->incr_count))
1310 return 1; // done
1311
1312 SvREFCNT_dec_NN (av_pop (self->incr_count));
1313 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1314 }
1315
1316 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1317 }
1318 }
1319
1320 return 0;
1321}
1322
1323
1163///////////////////////////////////////////////////////////////////////////// 1324/////////////////////////////////////////////////////////////////////////////
1164// XS interface functions 1325// XS interface functions
1165 1326
1166MODULE = CBOR::XS PACKAGE = CBOR::XS 1327MODULE = CBOR::XS PACKAGE = CBOR::XS
1167 1328
1207void shrink (CBOR *self, int enable = 1) 1368void shrink (CBOR *self, int enable = 1)
1208 ALIAS: 1369 ALIAS:
1209 shrink = F_SHRINK 1370 shrink = F_SHRINK
1210 allow_unknown = F_ALLOW_UNKNOWN 1371 allow_unknown = F_ALLOW_UNKNOWN
1211 allow_sharing = F_ALLOW_SHARING 1372 allow_sharing = F_ALLOW_SHARING
1373 allow_cycles = F_ALLOW_CYCLES
1212 pack_strings = F_PACK_STRINGS 1374 pack_strings = F_PACK_STRINGS
1375 validate_utf8 = F_VALIDATE_UTF8
1213 PPCODE: 1376 PPCODE:
1214{ 1377{
1215 if (enable) 1378 if (enable)
1216 self->flags |= ix; 1379 self->flags |= ix;
1217 else 1380 else
1223void get_shrink (CBOR *self) 1386void get_shrink (CBOR *self)
1224 ALIAS: 1387 ALIAS:
1225 get_shrink = F_SHRINK 1388 get_shrink = F_SHRINK
1226 get_allow_unknown = F_ALLOW_UNKNOWN 1389 get_allow_unknown = F_ALLOW_UNKNOWN
1227 get_allow_sharing = F_ALLOW_SHARING 1390 get_allow_sharing = F_ALLOW_SHARING
1391 get_allow_cycles = F_ALLOW_CYCLES
1228 get_pack_strings = F_PACK_STRINGS 1392 get_pack_strings = F_PACK_STRINGS
1393 get_validate_utf8 = F_VALIDATE_UTF8
1229 PPCODE: 1394 PPCODE:
1230 XPUSHs (boolSV (self->flags & ix)); 1395 XPUSHs (boolSV (self->flags & ix));
1231 1396
1232void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1397void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1233 PPCODE: 1398 PPCODE:
1282 EXTEND (SP, 2); 1447 EXTEND (SP, 2);
1283 PUSHs (sv); 1448 PUSHs (sv);
1284 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1449 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1285} 1450}
1286 1451
1452void incr_parse (CBOR *self, SV *cborstr, int chop = 0)
1453 PPCODE:
1454{
1455 if (SvUTF8 (cborstr))
1456 sv_utf8_downgrade (cborstr, 0);
1457
1458 if (!self->incr_count)
1459 {
1460 self->incr_count = newAV ();
1461 self->incr_pos = 0;
1462 self->incr_need = 1;
1463
1464 av_push (self->incr_count, newSViv (1));
1465 }
1466
1467 for (;;)
1468 {
1469 if (!incr_parse (self, cborstr))
1470 {
1471 if (self->incr_need > self->max_size && self->max_size)
1472 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1473 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1474
1475 break;
1476 }
1477
1478 SV *sv;
1479 char *offset;
1480
1481 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1482 XPUSHs (sv);
1483
1484 av_clear (self->incr_count);
1485 av_push (self->incr_count, newSViv (1));
1486
1487 if (chop)
1488 {
1489 self->incr_pos = 0;
1490 sv_chop (cborstr, offset);
1491 }
1492 else
1493 self->incr_pos = offset - SvPVX (cborstr);
1494
1495 self->incr_need = self->incr_pos + 1;
1496 }
1497}
1498
1499void incr_reset (CBOR *self)
1500 CODE:
1501{
1502 SvREFCNT_dec (self->incr_count);
1503 self->incr_count = 0;
1504}
1505
1287void DESTROY (CBOR *self) 1506void DESTROY (CBOR *self)
1288 PPCODE: 1507 PPCODE:
1289 cbor_free (self); 1508 cbor_free (self);
1290 1509
1291PROTOTYPES: ENABLE 1510PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines