ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.38 by root, Sun Dec 1 14:30:52 2013 UTC vs.
Revision 1.52 by root, Sun Apr 24 19:31:55 2016 UTC

7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#define ECB_NO_THREADS 1
12#include "ecb.h" 13#include "ecb.h"
13 14
14// compatibility with perl <5.18 15// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 16#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 17# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
99#define F_SHRINK 0x00000001UL 100#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 101#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 102#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL 103#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL 104#define F_PACK_STRINGS 0x00000010UL
105#define F_UTF8_STRINGS 0x00000020UL
104#define F_VALIDATE_UTF8 0x00000020UL 106#define F_VALIDATE_UTF8 0x00000040UL
105 107
106#define INIT_SIZE 32 // initial scalar size to be allocated 108#define INIT_SIZE 32 // initial scalar size to be allocated
107 109
108#define SB do { 110#define SB do {
109#define SE } while (0) 111#define SE } while (0)
128typedef struct { 130typedef struct {
129 U32 flags; 131 U32 flags;
130 U32 max_depth; 132 U32 max_depth;
131 STRLEN max_size; 133 STRLEN max_size;
132 SV *filter; 134 SV *filter;
135
136 // for the incremental parser
137 STRLEN incr_pos; // the current offset into the text
138 STRLEN incr_need; // minimum bytes needed to decode
139 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
133} CBOR; 140} CBOR;
134 141
135ecb_inline void 142ecb_inline void
136cbor_init (CBOR *cbor) 143cbor_init (CBOR *cbor)
137{ 144{
141 148
142ecb_inline void 149ecb_inline void
143cbor_free (CBOR *cbor) 150cbor_free (CBOR *cbor)
144{ 151{
145 SvREFCNT_dec (cbor->filter); 152 SvREFCNT_dec (cbor->filter);
153 SvREFCNT_dec (cbor->incr_count);
146} 154}
147 155
148///////////////////////////////////////////////////////////////////////////// 156/////////////////////////////////////////////////////////////////////////////
149// utility functions 157// utility functions
150 158
272} 280}
273 281
274ecb_inline void 282ecb_inline void
275encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 283encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
276{ 284{
285 if (ecb_expect_false (enc->cbor.flags & F_UTF8_STRINGS))
286 if (!utf8)
287 {
288 // exceptional path for bytze strings that need to be utf8-encoded
289 STRLEN ulen = len;
290 U8 *p, *pend = (U8 *)str + len;
291
292 for (p = (U8 *)str; p < pend; ++p)
293 ulen += *p >> 7; // count set high bits
294
295 encode_uint (enc, MAJOR_TEXT, ulen);
296
297 need (enc, ulen);
298 for (p = (U8 *)str; p < pend; ++p)
299 if (*p < 0x80)
300 *enc->cur++ = *p;
301 else
302 {
303 *enc->cur++ = 0xc0 + (*p >> 6);
304 *enc->cur++ = 0x80 + (*p & 63);
305 }
306
307 return;
308 }
309
277 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); 310 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
278 need (enc, len); 311 need (enc, len);
279 memcpy (enc->cur, str, len); 312 memcpy (enc->cur, str, len);
280 enc->cur += len; 313 enc->cur += len;
281} 314}
317 350
318 ++enc->depth; 351 ++enc->depth;
319 352
320 encode_uint (enc, MAJOR_ARRAY, len + 1); 353 encode_uint (enc, MAJOR_ARRAY, len + 1);
321 354
355 if (SvMAGICAL (av))
322 for (i = 0; i <= len; ++i) 356 for (i = 0; i <= len; ++i)
323 { 357 {
324 SV **svp = av_fetch (av, i, 0); 358 SV **svp = av_fetch (av, i, 0);
325 encode_sv (enc, svp ? *svp : &PL_sv_undef); 359 encode_sv (enc, svp ? *svp : &PL_sv_undef);
326 } 360 }
361 else
362 for (i = 0; i <= len; ++i)
363 {
364 SV *sv = AvARRAY (av)[i];
365 encode_sv (enc, sv ? sv : &PL_sv_undef);
366 }
327 367
328 --enc->depth; 368 --enc->depth;
329} 369}
330 370
331static void 371static void
435 475
436 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 476 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
437 { 477 {
438 dSP; 478 dSP;
439 479
440 ENTER; SAVETMPS; PUSHMARK (SP); 480 ENTER; SAVETMPS;
481 PUSHMARK (SP);
441 // we re-bless the reference to get overload and other niceties right 482 // we re-bless the reference to get overload and other niceties right
442 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 483 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
443 484
444 PUTBACK; 485 PUTBACK;
445 // G_SCALAR ensures that return value is 1 486 // G_SCALAR ensures that return value is 1
458 } 499 }
459 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 500 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
460 { 501 {
461 dSP; 502 dSP;
462 503
463 ENTER; SAVETMPS; PUSHMARK (SP); 504 ENTER; SAVETMPS;
505 SAVESTACK_POS ();
506 PUSHMARK (SP);
464 EXTEND (SP, 2); 507 EXTEND (SP, 2);
465 // we re-bless the reference to get overload and other niceties right 508 // we re-bless the reference to get overload and other niceties right
466 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 509 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
467 PUSHs (sv_cbor); 510 PUSHs (sv_cbor);
468 511
570} 613}
571 614
572static SV * 615static SV *
573encode_cbor (SV *scalar, CBOR *cbor) 616encode_cbor (SV *scalar, CBOR *cbor)
574{ 617{
575 enc_t enc = { }; 618 enc_t enc = { 0 };
576 619
577 enc.cbor = *cbor; 620 enc.cbor = *cbor;
578 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 621 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
579 enc.cur = SvPVX (enc.sv); 622 enc.cur = SvPVX (enc.sv);
580 enc.end = SvEND (enc.sv); 623 enc.end = SvEND (enc.sv);
730{ 773{
731 // for speed reasons, we specialcase single-string 774 // for speed reasons, we specialcase single-string
732 // byte or utf-8 strings as keys, but only when !stringref 775 // byte or utf-8 strings as keys, but only when !stringref
733 776
734 if (ecb_expect_true (!dec->stringref)) 777 if (ecb_expect_true (!dec->stringref))
735 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 778 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
736 { 779 {
737 I32 len = decode_uint (dec); 780 I32 len = decode_uint (dec);
738 char *key = (char *)dec->cur; 781 char *key = (char *)dec->cur;
739 782
783 WANT (len);
740 dec->cur += len; 784 dec->cur += len;
741 785
742 hv_store (hv, key, len, decode_sv (dec), 0); 786 hv_store (hv, key, len, decode_sv (dec), 0);
743 787
744 return; 788 return;
745 } 789 }
746 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 790 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
747 { 791 {
748 I32 len = decode_uint (dec); 792 I32 len = decode_uint (dec);
749 char *key = (char *)dec->cur; 793 char *key = (char *)dec->cur;
750 794
795 WANT (len);
751 dec->cur += len; 796 dec->cur += len;
752 797
753 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 798 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
754 if (!is_utf8_string (key, len)) 799 if (!is_utf8_string (key, len))
755 ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); 800 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
978 if (!method) 1023 if (!method)
979 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1024 ERR ("cannot decode perl-object (package does not have a THAW method)");
980 1025
981 dSP; 1026 dSP;
982 1027
983 ENTER; SAVETMPS; PUSHMARK (SP); 1028 ENTER; SAVETMPS;
1029 PUSHMARK (SP);
984 EXTEND (SP, len + 1); 1030 EXTEND (SP, len + 1);
985 // we re-bless the reference to get overload and other niceties right 1031 // we re-bless the reference to get overload and other niceties right
986 PUSHs (*av_fetch (av, 0, 1)); 1032 PUSHs (*av_fetch (av, 0, 1));
987 PUSHs (sv_cbor); 1033 PUSHs (sv_cbor);
988 1034
1013 default: 1059 default:
1014 { 1060 {
1015 sv = decode_sv (dec); 1061 sv = decode_sv (dec);
1016 1062
1017 dSP; 1063 dSP;
1018 ENTER; SAVETMPS; PUSHMARK (SP); 1064 ENTER; SAVETMPS;
1065 SAVESTACK_POS ();
1066 PUSHMARK (SP);
1019 EXTEND (SP, 2); 1067 EXTEND (SP, 2);
1020 PUSHs (newSVuv (tag)); 1068 PUSHs (newSVuv (tag));
1021 PUSHs (sv); 1069 PUSHs (sv);
1022 1070
1023 PUTBACK; 1071 PUTBACK;
1132 1180
1133 return newSVnv (ecb_binary64_to_double (fp)); 1181 return newSVnv (ecb_binary64_to_double (fp));
1134 } 1182 }
1135 1183
1136 // 0..19 unassigned simple 1184 // 0..19 unassigned simple
1137 // 24 reserved + unassigned (reserved values are not encodable) 1185 // 24 reserved + unassigned simple (reserved values are not encodable)
1186 // 28-30 unassigned misc
1187 // 31 break code
1138 default: 1188 default:
1139 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1189 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1140 } 1190 }
1141 1191
1142 break; 1192 break;
1143 } 1193 }
1144 1194
1147} 1197}
1148 1198
1149static SV * 1199static SV *
1150decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1200decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1151{ 1201{
1152 dec_t dec = { }; 1202 dec_t dec = { 0 };
1153 SV *sv; 1203 SV *sv;
1154 STRLEN len; 1204 STRLEN len;
1155 char *data = SvPVbyte (string, len); 1205 char *data = SvPVbyte (string, len);
1156 1206
1157 if (len > cbor->max_size && cbor->max_size) 1207 if (len > cbor->max_size && cbor->max_size)
1171 if (dec.cur != dec.end && !dec.err) 1221 if (dec.cur != dec.end && !dec.err)
1172 dec.err = "garbage after CBOR object"; 1222 dec.err = "garbage after CBOR object";
1173 1223
1174 if (dec.err) 1224 if (dec.err)
1175 { 1225 {
1226 if (dec.shareable)
1227 {
1228 // need to break cyclic links, which whould all be in shareable
1229 int i;
1230 SV **svp;
1231
1232 for (i = av_len (dec.shareable) + 1; i--; )
1233 if ((svp = av_fetch (dec.shareable, i, 0)))
1234 sv_setsv (*svp, &PL_sv_undef);
1235 }
1236
1176 SvREFCNT_dec (sv); 1237 SvREFCNT_dec (sv);
1177 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1238 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1178 } 1239 }
1179 1240
1180 sv = sv_2mortal (sv); 1241 sv = sv_2mortal (sv);
1181 1242
1182 return sv; 1243 return sv;
1183} 1244}
1184 1245
1246/////////////////////////////////////////////////////////////////////////////
1247// incremental parser
1248
1249#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1250
1251// returns 0 for notyet, 1 for success or error
1252static int
1253incr_parse (CBOR *self, SV *cborstr)
1254{
1255 STRLEN cur;
1256 SvPV (cborstr, cur);
1257
1258 while (ecb_expect_true (self->incr_need <= cur))
1259 {
1260 // table of integer count bytes
1261 static I8 incr_len[MINOR_MASK + 1] = {
1262 0, 0, 0, 0, 0, 0, 0, 0,
1263 0, 0, 0, 0, 0, 0, 0, 0,
1264 0, 0, 0, 0, 0, 0, 0, 0,
1265 1, 2, 4, 8,-1,-1,-1,-2
1266 };
1267
1268 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1269 U8 m = *p & MINOR_MASK;
1270 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1271 I8 ilen = incr_len[m];
1272
1273 self->incr_need = self->incr_pos + 1;
1274
1275 if (ecb_expect_false (ilen < 0))
1276 {
1277 if (m != MINOR_INDEF)
1278 return 1; // error
1279
1280 if (*p == (MAJOR_MISC | MINOR_INDEF))
1281 {
1282 if (count >= 0)
1283 return 1; // error
1284
1285 count = 1;
1286 }
1287 else
1288 {
1289 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1290 count = -1;
1291 }
1292 }
1293 else
1294 {
1295 self->incr_need += ilen;
1296 if (ecb_expect_false (self->incr_need > cur))
1297 return 0;
1298
1299 int major = *p >> MAJOR_SHIFT;
1300
1301 switch (major)
1302 {
1303 case MAJOR_TAG >> MAJOR_SHIFT:
1304 ++count; // tags merely prefix another value
1305 break;
1306
1307 case MAJOR_BYTES >> MAJOR_SHIFT:
1308 case MAJOR_TEXT >> MAJOR_SHIFT:
1309 case MAJOR_ARRAY >> MAJOR_SHIFT:
1310 case MAJOR_MAP >> MAJOR_SHIFT:
1311 {
1312 UV len;
1313
1314 if (ecb_expect_false (ilen))
1315 {
1316 len = 0;
1317
1318 do {
1319 len = (len << 8) | *++p;
1320 } while (--ilen);
1321 }
1322 else
1323 len = m;
1324
1325 switch (major)
1326 {
1327 case MAJOR_BYTES >> MAJOR_SHIFT:
1328 case MAJOR_TEXT >> MAJOR_SHIFT:
1329 self->incr_need += len;
1330 if (ecb_expect_false (self->incr_need > cur))
1331 return 0;
1332
1333 break;
1334
1335 case MAJOR_MAP >> MAJOR_SHIFT:
1336 len <<= 1;
1337 case MAJOR_ARRAY >> MAJOR_SHIFT:
1338 if (len)
1339 {
1340 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1341 count = len + 1;
1342 }
1343 break;
1344 }
1345 }
1346 }
1347 }
1348
1349 self->incr_pos = self->incr_need;
1350
1351 if (count > 0)
1352 {
1353 while (!--count)
1354 {
1355 if (!AvFILLp (self->incr_count))
1356 return 1; // done
1357
1358 SvREFCNT_dec_NN (av_pop (self->incr_count));
1359 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1360 }
1361
1362 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1363 }
1364 }
1365
1366 return 0;
1367}
1368
1369
1185///////////////////////////////////////////////////////////////////////////// 1370/////////////////////////////////////////////////////////////////////////////
1186// XS interface functions 1371// XS interface functions
1187 1372
1188MODULE = CBOR::XS PACKAGE = CBOR::XS 1373MODULE = CBOR::XS PACKAGE = CBOR::XS
1189 1374
1231 shrink = F_SHRINK 1416 shrink = F_SHRINK
1232 allow_unknown = F_ALLOW_UNKNOWN 1417 allow_unknown = F_ALLOW_UNKNOWN
1233 allow_sharing = F_ALLOW_SHARING 1418 allow_sharing = F_ALLOW_SHARING
1234 allow_cycles = F_ALLOW_CYCLES 1419 allow_cycles = F_ALLOW_CYCLES
1235 pack_strings = F_PACK_STRINGS 1420 pack_strings = F_PACK_STRINGS
1421 utf8_strings = F_UTF8_STRINGS
1236 validate_utf8 = F_VALIDATE_UTF8 1422 validate_utf8 = F_VALIDATE_UTF8
1237 PPCODE: 1423 PPCODE:
1238{ 1424{
1239 if (enable) 1425 if (enable)
1240 self->flags |= ix; 1426 self->flags |= ix;
1308 EXTEND (SP, 2); 1494 EXTEND (SP, 2);
1309 PUSHs (sv); 1495 PUSHs (sv);
1310 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1496 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1311} 1497}
1312 1498
1499void incr_parse (CBOR *self, SV *cborstr)
1500 ALIAS:
1501 incr_parse_multiple = 1
1502 PPCODE:
1503{
1504 if (SvUTF8 (cborstr))
1505 sv_utf8_downgrade (cborstr, 0);
1506
1507 if (!self->incr_count)
1508 {
1509 self->incr_count = newAV ();
1510 self->incr_pos = 0;
1511 self->incr_need = 1;
1512
1513 av_push (self->incr_count, newSViv (1));
1514 }
1515
1516 do
1517 {
1518 if (!incr_parse (self, cborstr))
1519 {
1520 if (self->incr_need > self->max_size && self->max_size)
1521 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1522 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1523
1524 break;
1525 }
1526
1527 SV *sv;
1528 char *offset;
1529
1530 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1531 XPUSHs (sv);
1532
1533 sv_chop (cborstr, offset);
1534
1535 av_clear (self->incr_count);
1536 av_push (self->incr_count, newSViv (1));
1537
1538 self->incr_pos = 0;
1539 self->incr_need = self->incr_pos + 1;
1540 }
1541 while (ix);
1542}
1543
1544void incr_reset (CBOR *self)
1545 CODE:
1546{
1547 SvREFCNT_dec (self->incr_count);
1548 self->incr_count = 0;
1549}
1550
1313void DESTROY (CBOR *self) 1551void DESTROY (CBOR *self)
1314 PPCODE: 1552 PPCODE:
1315 cbor_free (self); 1553 cbor_free (self);
1316 1554
1317PROTOTYPES: ENABLE 1555PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines