ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.39 by root, Sun Dec 1 14:45:03 2013 UTC vs.
Revision 1.53 by root, Sun Apr 24 19:51:41 2016 UTC

7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#define ECB_NO_THREADS 1
12#include "ecb.h" 13#include "ecb.h"
13 14
14// compatibility with perl <5.18 15// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 16#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 17# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
99#define F_SHRINK 0x00000001UL 100#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 101#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 102#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL 103#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL 104#define F_PACK_STRINGS 0x00000010UL
105#define F_UTF8_STRINGS 0x00000020UL
106#define F_UTF8_KEYS 0x00000040UL
104#define F_VALIDATE_UTF8 0x00000020UL 107#define F_VALIDATE_UTF8 0x00000080UL
105 108
106#define INIT_SIZE 32 // initial scalar size to be allocated 109#define INIT_SIZE 32 // initial scalar size to be allocated
107 110
108#define SB do { 111#define SB do {
109#define SE } while (0) 112#define SE } while (0)
128typedef struct { 131typedef struct {
129 U32 flags; 132 U32 flags;
130 U32 max_depth; 133 U32 max_depth;
131 STRLEN max_size; 134 STRLEN max_size;
132 SV *filter; 135 SV *filter;
136
137 // for the incremental parser
138 STRLEN incr_pos; // the current offset into the text
139 STRLEN incr_need; // minimum bytes needed to decode
140 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
133} CBOR; 141} CBOR;
134 142
135ecb_inline void 143ecb_inline void
136cbor_init (CBOR *cbor) 144cbor_init (CBOR *cbor)
137{ 145{
141 149
142ecb_inline void 150ecb_inline void
143cbor_free (CBOR *cbor) 151cbor_free (CBOR *cbor)
144{ 152{
145 SvREFCNT_dec (cbor->filter); 153 SvREFCNT_dec (cbor->filter);
154 SvREFCNT_dec (cbor->incr_count);
146} 155}
147 156
148///////////////////////////////////////////////////////////////////////////// 157/////////////////////////////////////////////////////////////////////////////
149// utility functions 158// utility functions
150 159
269encode_tag (enc_t *enc, UV tag) 278encode_tag (enc_t *enc, UV tag)
270{ 279{
271 encode_uint (enc, MAJOR_TAG, tag); 280 encode_uint (enc, MAJOR_TAG, tag);
272} 281}
273 282
283// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284ecb_noinline static void
285encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286{
287 STRLEN ulen = len;
288 U8 *p, *pend = (U8 *)str + len;
289
290 for (p = (U8 *)str; p < pend; ++p)
291 ulen += *p >> 7; // count set high bits
292
293 encode_uint (enc, MAJOR_TEXT, ulen);
294
295 need (enc, ulen);
296 for (p = (U8 *)str; p < pend; ++p)
297 if (*p < 0x80)
298 *enc->cur++ = *p;
299 else
300 {
301 *enc->cur++ = 0xc0 + (*p >> 6);
302 *enc->cur++ = 0x80 + (*p & 63);
303 }
304}
305
274ecb_inline void 306ecb_inline void
275encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 307encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
276{ 308{
309 if (ecb_expect_false (upgrade_utf8))
310 if (!utf8)
311 {
312 encode_str_utf8 (enc, utf8, str, len);
313 return;
314 }
315
277 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); 316 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
278 need (enc, len); 317 need (enc, len);
279 memcpy (enc->cur, str, len); 318 memcpy (enc->cur, str, len);
280 enc->cur += len; 319 enc->cur += len;
281} 320}
282 321
283static void 322ecb_inline
284encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) 323encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
285{ 324{
286 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) 325 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
287 { 326 {
288 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); 327 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
289 328
300 sv_setuv (*svp, enc->stringref_idx); 339 sv_setuv (*svp, enc->stringref_idx);
301 ++enc->stringref_idx; 340 ++enc->stringref_idx;
302 } 341 }
303 } 342 }
304 343
305 encode_str (enc, utf8, str, len); 344 encode_str (enc, upgrade_utf8, utf8, str, len);
306} 345}
307 346
308static void encode_sv (enc_t *enc, SV *sv); 347static void encode_sv (enc_t *enc, SV *sv);
309 348
310static void 349static void
317 356
318 ++enc->depth; 357 ++enc->depth;
319 358
320 encode_uint (enc, MAJOR_ARRAY, len + 1); 359 encode_uint (enc, MAJOR_ARRAY, len + 1);
321 360
361 if (SvMAGICAL (av))
322 for (i = 0; i <= len; ++i) 362 for (i = 0; i <= len; ++i)
323 { 363 {
324 SV **svp = av_fetch (av, i, 0); 364 SV **svp = av_fetch (av, i, 0);
325 encode_sv (enc, svp ? *svp : &PL_sv_undef); 365 encode_sv (enc, svp ? *svp : &PL_sv_undef);
326 } 366 }
367 else
368 for (i = 0; i <= len; ++i)
369 {
370 SV *sv = AvARRAY (av)[i];
371 encode_sv (enc, sv ? sv : &PL_sv_undef);
372 }
327 373
328 --enc->depth; 374 --enc->depth;
329} 375}
330 376
331static void 377static void
349 while ((he = hv_iternext (hv))) 395 while ((he = hv_iternext (hv)))
350 { 396 {
351 if (HeKLEN (he) == HEf_SVKEY) 397 if (HeKLEN (he) == HEf_SVKEY)
352 encode_sv (enc, HeSVKEY (he)); 398 encode_sv (enc, HeSVKEY (he));
353 else 399 else
354 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 400 encode_strref (enc, enc->cbor.flags & (F_UTF8_KEYS | F_UTF8_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
355 401
356 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 402 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
357 } 403 }
358 404
359 if (mg) 405 if (mg)
435 481
436 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 482 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
437 { 483 {
438 dSP; 484 dSP;
439 485
440 ENTER; SAVETMPS; PUSHMARK (SP); 486 ENTER; SAVETMPS;
487 PUSHMARK (SP);
441 // we re-bless the reference to get overload and other niceties right 488 // we re-bless the reference to get overload and other niceties right
442 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 489 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
443 490
444 PUTBACK; 491 PUTBACK;
445 // G_SCALAR ensures that return value is 1 492 // G_SCALAR ensures that return value is 1
458 } 505 }
459 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 506 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
460 { 507 {
461 dSP; 508 dSP;
462 509
463 ENTER; SAVETMPS; PUSHMARK (SP); 510 ENTER; SAVETMPS;
511 SAVESTACK_POS ();
512 PUSHMARK (SP);
464 EXTEND (SP, 2); 513 EXTEND (SP, 2);
465 // we re-bless the reference to get overload and other niceties right 514 // we re-bless the reference to get overload and other niceties right
466 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 515 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
467 PUSHs (sv_cbor); 516 PUSHs (sv_cbor);
468 517
474 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 523 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
475 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 524 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
476 525
477 encode_tag (enc, CBOR_TAG_PERL_OBJECT); 526 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
478 encode_uint (enc, MAJOR_ARRAY, count + 1); 527 encode_uint (enc, MAJOR_ARRAY, count + 1);
479 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 528 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
480 529
481 while (count) 530 while (count)
482 encode_sv (enc, SP[1 - count--]); 531 encode_sv (enc, SP[1 - count--]);
483 532
484 PUTBACK; 533 PUTBACK;
543 592
544 if (SvPOKp (sv)) 593 if (SvPOKp (sv))
545 { 594 {
546 STRLEN len; 595 STRLEN len;
547 char *str = SvPV (sv, len); 596 char *str = SvPV (sv, len);
548 encode_strref (enc, SvUTF8 (sv), str, len); 597 encode_strref (enc, enc->cbor.flags & F_UTF8_STRINGS, SvUTF8 (sv), str, len);
549 } 598 }
550 else if (SvNOKp (sv)) 599 else if (SvNOKp (sv))
551 encode_nv (enc, sv); 600 encode_nv (enc, sv);
552 else if (SvIOKp (sv)) 601 else if (SvIOKp (sv))
553 { 602 {
570} 619}
571 620
572static SV * 621static SV *
573encode_cbor (SV *scalar, CBOR *cbor) 622encode_cbor (SV *scalar, CBOR *cbor)
574{ 623{
575 enc_t enc = { }; 624 enc_t enc = { 0 };
576 625
577 enc.cbor = *cbor; 626 enc.cbor = *cbor;
578 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 627 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
579 enc.cur = SvPVX (enc.sv); 628 enc.cur = SvPVX (enc.sv);
580 enc.end = SvEND (enc.sv); 629 enc.end = SvEND (enc.sv);
730{ 779{
731 // for speed reasons, we specialcase single-string 780 // for speed reasons, we specialcase single-string
732 // byte or utf-8 strings as keys, but only when !stringref 781 // byte or utf-8 strings as keys, but only when !stringref
733 782
734 if (ecb_expect_true (!dec->stringref)) 783 if (ecb_expect_true (!dec->stringref))
735 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 784 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
736 { 785 {
737 I32 len = decode_uint (dec); 786 I32 len = decode_uint (dec);
738 char *key = (char *)dec->cur; 787 char *key = (char *)dec->cur;
739 788
789 WANT (len);
740 dec->cur += len; 790 dec->cur += len;
741 791
742 hv_store (hv, key, len, decode_sv (dec), 0); 792 hv_store (hv, key, len, decode_sv (dec), 0);
743 793
744 return; 794 return;
745 } 795 }
746 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 796 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
747 { 797 {
748 I32 len = decode_uint (dec); 798 I32 len = decode_uint (dec);
749 char *key = (char *)dec->cur; 799 char *key = (char *)dec->cur;
750 800
801 WANT (len);
751 dec->cur += len; 802 dec->cur += len;
752 803
753 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) 804 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
754 if (!is_utf8_string (key, len)) 805 if (!is_utf8_string (key, len))
755 ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); 806 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
978 if (!method) 1029 if (!method)
979 ERR ("cannot decode perl-object (package does not have a THAW method)"); 1030 ERR ("cannot decode perl-object (package does not have a THAW method)");
980 1031
981 dSP; 1032 dSP;
982 1033
983 ENTER; SAVETMPS; PUSHMARK (SP); 1034 ENTER; SAVETMPS;
1035 PUSHMARK (SP);
984 EXTEND (SP, len + 1); 1036 EXTEND (SP, len + 1);
985 // we re-bless the reference to get overload and other niceties right 1037 // we re-bless the reference to get overload and other niceties right
986 PUSHs (*av_fetch (av, 0, 1)); 1038 PUSHs (*av_fetch (av, 0, 1));
987 PUSHs (sv_cbor); 1039 PUSHs (sv_cbor);
988 1040
1013 default: 1065 default:
1014 { 1066 {
1015 sv = decode_sv (dec); 1067 sv = decode_sv (dec);
1016 1068
1017 dSP; 1069 dSP;
1018 ENTER; SAVETMPS; PUSHMARK (SP); 1070 ENTER; SAVETMPS;
1071 SAVESTACK_POS ();
1072 PUSHMARK (SP);
1019 EXTEND (SP, 2); 1073 EXTEND (SP, 2);
1020 PUSHs (newSVuv (tag)); 1074 PUSHs (newSVuv (tag));
1021 PUSHs (sv); 1075 PUSHs (sv);
1022 1076
1023 PUTBACK; 1077 PUTBACK;
1132 1186
1133 return newSVnv (ecb_binary64_to_double (fp)); 1187 return newSVnv (ecb_binary64_to_double (fp));
1134 } 1188 }
1135 1189
1136 // 0..19 unassigned simple 1190 // 0..19 unassigned simple
1137 // 24 reserved + unassigned (reserved values are not encodable) 1191 // 24 reserved + unassigned simple (reserved values are not encodable)
1192 // 28-30 unassigned misc
1193 // 31 break code
1138 default: 1194 default:
1139 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1195 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1140 } 1196 }
1141 1197
1142 break; 1198 break;
1143 } 1199 }
1144 1200
1147} 1203}
1148 1204
1149static SV * 1205static SV *
1150decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1206decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1151{ 1207{
1152 dec_t dec = { }; 1208 dec_t dec = { 0 };
1153 SV *sv; 1209 SV *sv;
1154 STRLEN len; 1210 STRLEN len;
1155 char *data = SvPVbyte (string, len); 1211 char *data = SvPVbyte (string, len);
1156 1212
1157 if (len > cbor->max_size && cbor->max_size) 1213 if (len > cbor->max_size && cbor->max_size)
1191 sv = sv_2mortal (sv); 1247 sv = sv_2mortal (sv);
1192 1248
1193 return sv; 1249 return sv;
1194} 1250}
1195 1251
1252/////////////////////////////////////////////////////////////////////////////
1253// incremental parser
1254
1255#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1256
1257// returns 0 for notyet, 1 for success or error
1258static int
1259incr_parse (CBOR *self, SV *cborstr)
1260{
1261 STRLEN cur;
1262 SvPV (cborstr, cur);
1263
1264 while (ecb_expect_true (self->incr_need <= cur))
1265 {
1266 // table of integer count bytes
1267 static I8 incr_len[MINOR_MASK + 1] = {
1268 0, 0, 0, 0, 0, 0, 0, 0,
1269 0, 0, 0, 0, 0, 0, 0, 0,
1270 0, 0, 0, 0, 0, 0, 0, 0,
1271 1, 2, 4, 8,-1,-1,-1,-2
1272 };
1273
1274 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1275 U8 m = *p & MINOR_MASK;
1276 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1277 I8 ilen = incr_len[m];
1278
1279 self->incr_need = self->incr_pos + 1;
1280
1281 if (ecb_expect_false (ilen < 0))
1282 {
1283 if (m != MINOR_INDEF)
1284 return 1; // error
1285
1286 if (*p == (MAJOR_MISC | MINOR_INDEF))
1287 {
1288 if (count >= 0)
1289 return 1; // error
1290
1291 count = 1;
1292 }
1293 else
1294 {
1295 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1296 count = -1;
1297 }
1298 }
1299 else
1300 {
1301 self->incr_need += ilen;
1302 if (ecb_expect_false (self->incr_need > cur))
1303 return 0;
1304
1305 int major = *p >> MAJOR_SHIFT;
1306
1307 switch (major)
1308 {
1309 case MAJOR_TAG >> MAJOR_SHIFT:
1310 ++count; // tags merely prefix another value
1311 break;
1312
1313 case MAJOR_BYTES >> MAJOR_SHIFT:
1314 case MAJOR_TEXT >> MAJOR_SHIFT:
1315 case MAJOR_ARRAY >> MAJOR_SHIFT:
1316 case MAJOR_MAP >> MAJOR_SHIFT:
1317 {
1318 UV len;
1319
1320 if (ecb_expect_false (ilen))
1321 {
1322 len = 0;
1323
1324 do {
1325 len = (len << 8) | *++p;
1326 } while (--ilen);
1327 }
1328 else
1329 len = m;
1330
1331 switch (major)
1332 {
1333 case MAJOR_BYTES >> MAJOR_SHIFT:
1334 case MAJOR_TEXT >> MAJOR_SHIFT:
1335 self->incr_need += len;
1336 if (ecb_expect_false (self->incr_need > cur))
1337 return 0;
1338
1339 break;
1340
1341 case MAJOR_MAP >> MAJOR_SHIFT:
1342 len <<= 1;
1343 case MAJOR_ARRAY >> MAJOR_SHIFT:
1344 if (len)
1345 {
1346 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1347 count = len + 1;
1348 }
1349 break;
1350 }
1351 }
1352 }
1353 }
1354
1355 self->incr_pos = self->incr_need;
1356
1357 if (count > 0)
1358 {
1359 while (!--count)
1360 {
1361 if (!AvFILLp (self->incr_count))
1362 return 1; // done
1363
1364 SvREFCNT_dec_NN (av_pop (self->incr_count));
1365 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1366 }
1367
1368 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1369 }
1370 }
1371
1372 return 0;
1373}
1374
1375
1196///////////////////////////////////////////////////////////////////////////// 1376/////////////////////////////////////////////////////////////////////////////
1197// XS interface functions 1377// XS interface functions
1198 1378
1199MODULE = CBOR::XS PACKAGE = CBOR::XS 1379MODULE = CBOR::XS PACKAGE = CBOR::XS
1200 1380
1242 shrink = F_SHRINK 1422 shrink = F_SHRINK
1243 allow_unknown = F_ALLOW_UNKNOWN 1423 allow_unknown = F_ALLOW_UNKNOWN
1244 allow_sharing = F_ALLOW_SHARING 1424 allow_sharing = F_ALLOW_SHARING
1245 allow_cycles = F_ALLOW_CYCLES 1425 allow_cycles = F_ALLOW_CYCLES
1246 pack_strings = F_PACK_STRINGS 1426 pack_strings = F_PACK_STRINGS
1427 utf8_strings = F_UTF8_STRINGS
1247 validate_utf8 = F_VALIDATE_UTF8 1428 validate_utf8 = F_VALIDATE_UTF8
1248 PPCODE: 1429 PPCODE:
1249{ 1430{
1250 if (enable) 1431 if (enable)
1251 self->flags |= ix; 1432 self->flags |= ix;
1319 EXTEND (SP, 2); 1500 EXTEND (SP, 2);
1320 PUSHs (sv); 1501 PUSHs (sv);
1321 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1502 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1322} 1503}
1323 1504
1505void incr_parse (CBOR *self, SV *cborstr)
1506 ALIAS:
1507 incr_parse_multiple = 1
1508 PPCODE:
1509{
1510 if (SvUTF8 (cborstr))
1511 sv_utf8_downgrade (cborstr, 0);
1512
1513 if (!self->incr_count)
1514 {
1515 self->incr_count = newAV ();
1516 self->incr_pos = 0;
1517 self->incr_need = 1;
1518
1519 av_push (self->incr_count, newSViv (1));
1520 }
1521
1522 do
1523 {
1524 if (!incr_parse (self, cborstr))
1525 {
1526 if (self->incr_need > self->max_size && self->max_size)
1527 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1528 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1529
1530 break;
1531 }
1532
1533 SV *sv;
1534 char *offset;
1535
1536 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1537 XPUSHs (sv);
1538
1539 sv_chop (cborstr, offset);
1540
1541 av_clear (self->incr_count);
1542 av_push (self->incr_count, newSViv (1));
1543
1544 self->incr_pos = 0;
1545 self->incr_need = self->incr_pos + 1;
1546 }
1547 while (ix);
1548}
1549
1550void incr_reset (CBOR *self)
1551 CODE:
1552{
1553 SvREFCNT_dec (self->incr_count);
1554 self->incr_count = 0;
1555}
1556
1324void DESTROY (CBOR *self) 1557void DESTROY (CBOR *self)
1325 PPCODE: 1558 PPCODE:
1326 cbor_free (self); 1559 cbor_free (self);
1327 1560
1328PROTOTYPES: ENABLE 1561PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines