ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.37 by root, Sat Nov 30 18:13:53 2013 UTC vs.
Revision 1.52 by root, Sun Apr 24 19:31:55 2016 UTC

7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#define ECB_NO_THREADS 1
12#include "ecb.h" 13#include "ecb.h"
13 14
14// compatibility with perl <5.18 15// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 16#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 17# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
99#define F_SHRINK 0x00000001UL 100#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 101#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 102#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL 103#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL 104#define F_PACK_STRINGS 0x00000010UL
105#define F_UTF8_STRINGS 0x00000020UL
106#define F_VALIDATE_UTF8 0x00000040UL
104 107
105#define INIT_SIZE 32 // initial scalar size to be allocated 108#define INIT_SIZE 32 // initial scalar size to be allocated
106 109
107#define SB do { 110#define SB do {
108#define SE } while (0) 111#define SE } while (0)
127typedef struct { 130typedef struct {
128 U32 flags; 131 U32 flags;
129 U32 max_depth; 132 U32 max_depth;
130 STRLEN max_size; 133 STRLEN max_size;
131 SV *filter; 134 SV *filter;
135
136 // for the incremental parser
137 STRLEN incr_pos; // the current offset into the text
138 STRLEN incr_need; // minimum bytes needed to decode
139 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
132} CBOR; 140} CBOR;
133 141
134ecb_inline void 142ecb_inline void
135cbor_init (CBOR *cbor) 143cbor_init (CBOR *cbor)
136{ 144{
140 148
141ecb_inline void 149ecb_inline void
142cbor_free (CBOR *cbor) 150cbor_free (CBOR *cbor)
143{ 151{
144 SvREFCNT_dec (cbor->filter); 152 SvREFCNT_dec (cbor->filter);
153 SvREFCNT_dec (cbor->incr_count);
145} 154}
146 155
147///////////////////////////////////////////////////////////////////////////// 156/////////////////////////////////////////////////////////////////////////////
148// utility functions 157// utility functions
149 158
271} 280}
272 281
273ecb_inline void 282ecb_inline void
274encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 283encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
275{ 284{
285 if (ecb_expect_false (enc->cbor.flags & F_UTF8_STRINGS))
286 if (!utf8)
287 {
288 // exceptional path for bytze strings that need to be utf8-encoded
289 STRLEN ulen = len;
290 U8 *p, *pend = (U8 *)str + len;
291
292 for (p = (U8 *)str; p < pend; ++p)
293 ulen += *p >> 7; // count set high bits
294
295 encode_uint (enc, MAJOR_TEXT, ulen);
296
297 need (enc, ulen);
298 for (p = (U8 *)str; p < pend; ++p)
299 if (*p < 0x80)
300 *enc->cur++ = *p;
301 else
302 {
303 *enc->cur++ = 0xc0 + (*p >> 6);
304 *enc->cur++ = 0x80 + (*p & 63);
305 }
306
307 return;
308 }
309
276 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); 310 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
277 need (enc, len); 311 need (enc, len);
278 memcpy (enc->cur, str, len); 312 memcpy (enc->cur, str, len);
279 enc->cur += len; 313 enc->cur += len;
280} 314}
316 350
317 ++enc->depth; 351 ++enc->depth;
318 352
319 encode_uint (enc, MAJOR_ARRAY, len + 1); 353 encode_uint (enc, MAJOR_ARRAY, len + 1);
320 354
355 if (SvMAGICAL (av))
321 for (i = 0; i <= len; ++i) 356 for (i = 0; i <= len; ++i)
322 { 357 {
323 SV **svp = av_fetch (av, i, 0); 358 SV **svp = av_fetch (av, i, 0);
324 encode_sv (enc, svp ? *svp : &PL_sv_undef); 359 encode_sv (enc, svp ? *svp : &PL_sv_undef);
325 } 360 }
361 else
362 for (i = 0; i <= len; ++i)
363 {
364 SV *sv = AvARRAY (av)[i];
365 encode_sv (enc, sv ? sv : &PL_sv_undef);
366 }
326 367
327 --enc->depth; 368 --enc->depth;
328} 369}
329 370
330static void 371static void
434 475
435 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 476 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
436 { 477 {
437 dSP; 478 dSP;
438 479
439 ENTER; SAVETMPS; PUSHMARK (SP); 480 ENTER; SAVETMPS;
481 PUSHMARK (SP);
440 // we re-bless the reference to get overload and other niceties right 482 // we re-bless the reference to get overload and other niceties right
441 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 483 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
442 484
443 PUTBACK; 485 PUTBACK;
444 // G_SCALAR ensures that return value is 1 486 // G_SCALAR ensures that return value is 1
457 } 499 }
458 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 500 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
459 { 501 {
460 dSP; 502 dSP;
461 503
462 ENTER; SAVETMPS; PUSHMARK (SP); 504 ENTER; SAVETMPS;
505 SAVESTACK_POS ();
506 PUSHMARK (SP);
463 EXTEND (SP, 2); 507 EXTEND (SP, 2);
464 // we re-bless the reference to get overload and other niceties right 508 // we re-bless the reference to get overload and other niceties right
465 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 509 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
466 PUSHs (sv_cbor); 510 PUSHs (sv_cbor);
467 511
569} 613}
570 614
571static SV * 615static SV *
572encode_cbor (SV *scalar, CBOR *cbor) 616encode_cbor (SV *scalar, CBOR *cbor)
573{ 617{
574 enc_t enc = { }; 618 enc_t enc = { 0 };
575 619
576 enc.cbor = *cbor; 620 enc.cbor = *cbor;
577 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 621 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
578 enc.cur = SvPVX (enc.sv); 622 enc.cur = SvPVX (enc.sv);
579 enc.end = SvEND (enc.sv); 623 enc.end = SvEND (enc.sv);
729{ 773{
730 // for speed reasons, we specialcase single-string 774 // for speed reasons, we specialcase single-string
731 // byte or utf-8 strings as keys, but only when !stringref 775 // byte or utf-8 strings as keys, but only when !stringref
732 776
733 if (ecb_expect_true (!dec->stringref)) 777 if (ecb_expect_true (!dec->stringref))
734 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 778 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
735 { 779 {
736 I32 len = decode_uint (dec); 780 I32 len = decode_uint (dec);
737 char *key = (char *)dec->cur; 781 char *key = (char *)dec->cur;
738 782
783 WANT (len);
739 dec->cur += len; 784 dec->cur += len;
740 785
741 if (ecb_expect_false (dec->stringref))
742 av_push (dec->stringref, newSVpvn (key, len));
743
744 hv_store (hv, key, len, decode_sv (dec), 0); 786 hv_store (hv, key, len, decode_sv (dec), 0);
745 787
746 return; 788 return;
747 } 789 }
748 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 790 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
749 { 791 {
750 I32 len = decode_uint (dec); 792 I32 len = decode_uint (dec);
751 char *key = (char *)dec->cur; 793 char *key = (char *)dec->cur;
752 794
795 WANT (len);
753 dec->cur += len; 796 dec->cur += len;
754 797
755 if (ecb_expect_false (dec->stringref)) 798 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
756 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 799 if (!is_utf8_string (key, len))
800 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
757 801
758 hv_store (hv, key, -len, decode_sv (dec), 0); 802 hv_store (hv, key, -len, decode_sv (dec), 0);
759 803
760 return; 804 return;
761 } 805 }
763 SV *k = decode_sv (dec); 807 SV *k = decode_sv (dec);
764 SV *v = decode_sv (dec); 808 SV *v = decode_sv (dec);
765 809
766 hv_store_ent (hv, k, v, 0); 810 hv_store_ent (hv, k, v, 0);
767 SvREFCNT_dec (k); 811 SvREFCNT_dec (k);
812
813fail:
814 ;
768} 815}
769 816
770static SV * 817static SV *
771decode_hv (dec_t *dec) 818decode_hv (dec_t *dec)
772{ 819{
854 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 901 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
855 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 902 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
856 } 903 }
857 904
858 if (utf8) 905 if (utf8)
906 {
907 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
908 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
909 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
910
859 SvUTF8_on (sv); 911 SvUTF8_on (sv);
912 }
860 913
861 return sv; 914 return sv;
862 915
863fail: 916fail:
864 SvREFCNT_dec (sv); 917 SvREFCNT_dec (sv);
970 if (!method) 1023 if (!method)
971 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1024 ERR ("cannot decode perl-object (package does not have a THAW method)");
972 1025
973 dSP; 1026 dSP;
974 1027
975 ENTER; SAVETMPS; PUSHMARK (SP); 1028 ENTER; SAVETMPS;
1029 PUSHMARK (SP);
976 EXTEND (SP, len + 1); 1030 EXTEND (SP, len + 1);
977 // we re-bless the reference to get overload and other niceties right 1031 // we re-bless the reference to get overload and other niceties right
978 PUSHs (*av_fetch (av, 0, 1)); 1032 PUSHs (*av_fetch (av, 0, 1));
979 PUSHs (sv_cbor); 1033 PUSHs (sv_cbor);
980 1034
1005 default: 1059 default:
1006 { 1060 {
1007 sv = decode_sv (dec); 1061 sv = decode_sv (dec);
1008 1062
1009 dSP; 1063 dSP;
1010 ENTER; SAVETMPS; PUSHMARK (SP); 1064 ENTER; SAVETMPS;
1065 SAVESTACK_POS ();
1066 PUSHMARK (SP);
1011 EXTEND (SP, 2); 1067 EXTEND (SP, 2);
1012 PUSHs (newSVuv (tag)); 1068 PUSHs (newSVuv (tag));
1013 PUSHs (sv); 1069 PUSHs (sv);
1014 1070
1015 PUTBACK; 1071 PUTBACK;
1124 1180
1125 return newSVnv (ecb_binary64_to_double (fp)); 1181 return newSVnv (ecb_binary64_to_double (fp));
1126 } 1182 }
1127 1183
1128 // 0..19 unassigned simple 1184 // 0..19 unassigned simple
1129 // 24 reserved + unassigned (reserved values are not encodable) 1185 // 24 reserved + unassigned simple (reserved values are not encodable)
1186 // 28-30 unassigned misc
1187 // 31 break code
1130 default: 1188 default:
1131 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1189 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1132 } 1190 }
1133 1191
1134 break; 1192 break;
1135 } 1193 }
1136 1194
1139} 1197}
1140 1198
1141static SV * 1199static SV *
1142decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1200decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1143{ 1201{
1144 dec_t dec = { }; 1202 dec_t dec = { 0 };
1145 SV *sv; 1203 SV *sv;
1146 STRLEN len; 1204 STRLEN len;
1147 char *data = SvPVbyte (string, len); 1205 char *data = SvPVbyte (string, len);
1148 1206
1149 if (len > cbor->max_size && cbor->max_size) 1207 if (len > cbor->max_size && cbor->max_size)
1163 if (dec.cur != dec.end && !dec.err) 1221 if (dec.cur != dec.end && !dec.err)
1164 dec.err = "garbage after CBOR object"; 1222 dec.err = "garbage after CBOR object";
1165 1223
1166 if (dec.err) 1224 if (dec.err)
1167 { 1225 {
1226 if (dec.shareable)
1227 {
1228 // need to break cyclic links, which whould all be in shareable
1229 int i;
1230 SV **svp;
1231
1232 for (i = av_len (dec.shareable) + 1; i--; )
1233 if ((svp = av_fetch (dec.shareable, i, 0)))
1234 sv_setsv (*svp, &PL_sv_undef);
1235 }
1236
1168 SvREFCNT_dec (sv); 1237 SvREFCNT_dec (sv);
1169 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1238 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1170 } 1239 }
1171 1240
1172 sv = sv_2mortal (sv); 1241 sv = sv_2mortal (sv);
1173 1242
1174 return sv; 1243 return sv;
1175} 1244}
1176 1245
1246/////////////////////////////////////////////////////////////////////////////
1247// incremental parser
1248
1249#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1250
1251// returns 0 for notyet, 1 for success or error
1252static int
1253incr_parse (CBOR *self, SV *cborstr)
1254{
1255 STRLEN cur;
1256 SvPV (cborstr, cur);
1257
1258 while (ecb_expect_true (self->incr_need <= cur))
1259 {
1260 // table of integer count bytes
1261 static I8 incr_len[MINOR_MASK + 1] = {
1262 0, 0, 0, 0, 0, 0, 0, 0,
1263 0, 0, 0, 0, 0, 0, 0, 0,
1264 0, 0, 0, 0, 0, 0, 0, 0,
1265 1, 2, 4, 8,-1,-1,-1,-2
1266 };
1267
1268 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1269 U8 m = *p & MINOR_MASK;
1270 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1271 I8 ilen = incr_len[m];
1272
1273 self->incr_need = self->incr_pos + 1;
1274
1275 if (ecb_expect_false (ilen < 0))
1276 {
1277 if (m != MINOR_INDEF)
1278 return 1; // error
1279
1280 if (*p == (MAJOR_MISC | MINOR_INDEF))
1281 {
1282 if (count >= 0)
1283 return 1; // error
1284
1285 count = 1;
1286 }
1287 else
1288 {
1289 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1290 count = -1;
1291 }
1292 }
1293 else
1294 {
1295 self->incr_need += ilen;
1296 if (ecb_expect_false (self->incr_need > cur))
1297 return 0;
1298
1299 int major = *p >> MAJOR_SHIFT;
1300
1301 switch (major)
1302 {
1303 case MAJOR_TAG >> MAJOR_SHIFT:
1304 ++count; // tags merely prefix another value
1305 break;
1306
1307 case MAJOR_BYTES >> MAJOR_SHIFT:
1308 case MAJOR_TEXT >> MAJOR_SHIFT:
1309 case MAJOR_ARRAY >> MAJOR_SHIFT:
1310 case MAJOR_MAP >> MAJOR_SHIFT:
1311 {
1312 UV len;
1313
1314 if (ecb_expect_false (ilen))
1315 {
1316 len = 0;
1317
1318 do {
1319 len = (len << 8) | *++p;
1320 } while (--ilen);
1321 }
1322 else
1323 len = m;
1324
1325 switch (major)
1326 {
1327 case MAJOR_BYTES >> MAJOR_SHIFT:
1328 case MAJOR_TEXT >> MAJOR_SHIFT:
1329 self->incr_need += len;
1330 if (ecb_expect_false (self->incr_need > cur))
1331 return 0;
1332
1333 break;
1334
1335 case MAJOR_MAP >> MAJOR_SHIFT:
1336 len <<= 1;
1337 case MAJOR_ARRAY >> MAJOR_SHIFT:
1338 if (len)
1339 {
1340 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1341 count = len + 1;
1342 }
1343 break;
1344 }
1345 }
1346 }
1347 }
1348
1349 self->incr_pos = self->incr_need;
1350
1351 if (count > 0)
1352 {
1353 while (!--count)
1354 {
1355 if (!AvFILLp (self->incr_count))
1356 return 1; // done
1357
1358 SvREFCNT_dec_NN (av_pop (self->incr_count));
1359 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1360 }
1361
1362 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1363 }
1364 }
1365
1366 return 0;
1367}
1368
1369
1177///////////////////////////////////////////////////////////////////////////// 1370/////////////////////////////////////////////////////////////////////////////
1178// XS interface functions 1371// XS interface functions
1179 1372
1180MODULE = CBOR::XS PACKAGE = CBOR::XS 1373MODULE = CBOR::XS PACKAGE = CBOR::XS
1181 1374
1223 shrink = F_SHRINK 1416 shrink = F_SHRINK
1224 allow_unknown = F_ALLOW_UNKNOWN 1417 allow_unknown = F_ALLOW_UNKNOWN
1225 allow_sharing = F_ALLOW_SHARING 1418 allow_sharing = F_ALLOW_SHARING
1226 allow_cycles = F_ALLOW_CYCLES 1419 allow_cycles = F_ALLOW_CYCLES
1227 pack_strings = F_PACK_STRINGS 1420 pack_strings = F_PACK_STRINGS
1421 utf8_strings = F_UTF8_STRINGS
1422 validate_utf8 = F_VALIDATE_UTF8
1228 PPCODE: 1423 PPCODE:
1229{ 1424{
1230 if (enable) 1425 if (enable)
1231 self->flags |= ix; 1426 self->flags |= ix;
1232 else 1427 else
1240 get_shrink = F_SHRINK 1435 get_shrink = F_SHRINK
1241 get_allow_unknown = F_ALLOW_UNKNOWN 1436 get_allow_unknown = F_ALLOW_UNKNOWN
1242 get_allow_sharing = F_ALLOW_SHARING 1437 get_allow_sharing = F_ALLOW_SHARING
1243 get_allow_cycles = F_ALLOW_CYCLES 1438 get_allow_cycles = F_ALLOW_CYCLES
1244 get_pack_strings = F_PACK_STRINGS 1439 get_pack_strings = F_PACK_STRINGS
1440 get_validate_utf8 = F_VALIDATE_UTF8
1245 PPCODE: 1441 PPCODE:
1246 XPUSHs (boolSV (self->flags & ix)); 1442 XPUSHs (boolSV (self->flags & ix));
1247 1443
1248void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1444void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1249 PPCODE: 1445 PPCODE:
1298 EXTEND (SP, 2); 1494 EXTEND (SP, 2);
1299 PUSHs (sv); 1495 PUSHs (sv);
1300 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1496 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1301} 1497}
1302 1498
1499void incr_parse (CBOR *self, SV *cborstr)
1500 ALIAS:
1501 incr_parse_multiple = 1
1502 PPCODE:
1503{
1504 if (SvUTF8 (cborstr))
1505 sv_utf8_downgrade (cborstr, 0);
1506
1507 if (!self->incr_count)
1508 {
1509 self->incr_count = newAV ();
1510 self->incr_pos = 0;
1511 self->incr_need = 1;
1512
1513 av_push (self->incr_count, newSViv (1));
1514 }
1515
1516 do
1517 {
1518 if (!incr_parse (self, cborstr))
1519 {
1520 if (self->incr_need > self->max_size && self->max_size)
1521 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1522 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1523
1524 break;
1525 }
1526
1527 SV *sv;
1528 char *offset;
1529
1530 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1531 XPUSHs (sv);
1532
1533 sv_chop (cborstr, offset);
1534
1535 av_clear (self->incr_count);
1536 av_push (self->incr_count, newSViv (1));
1537
1538 self->incr_pos = 0;
1539 self->incr_need = self->incr_pos + 1;
1540 }
1541 while (ix);
1542}
1543
1544void incr_reset (CBOR *self)
1545 CODE:
1546{
1547 SvREFCNT_dec (self->incr_count);
1548 self->incr_count = 0;
1549}
1550
1303void DESTROY (CBOR *self) 1551void DESTROY (CBOR *self)
1304 PPCODE: 1552 PPCODE:
1305 cbor_free (self); 1553 cbor_free (self);
1306 1554
1307PROTOTYPES: ENABLE 1555PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines