ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.37 by root, Sat Nov 30 18:13:53 2013 UTC vs.
Revision 1.50 by root, Thu Feb 25 02:29:22 2016 UTC

7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#define ECB_NO_THREADS 1
12#include "ecb.h" 13#include "ecb.h"
13 14
14// compatibility with perl <5.18 15// compatibility with perl <5.18
15#ifndef HvNAMELEN_get 16#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) 17# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
99#define F_SHRINK 0x00000001UL 100#define F_SHRINK 0x00000001UL
100#define F_ALLOW_UNKNOWN 0x00000002UL 101#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL 102#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL 103#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL 104#define F_PACK_STRINGS 0x00000010UL
105#define F_VALIDATE_UTF8 0x00000020UL
104 106
105#define INIT_SIZE 32 // initial scalar size to be allocated 107#define INIT_SIZE 32 // initial scalar size to be allocated
106 108
107#define SB do { 109#define SB do {
108#define SE } while (0) 110#define SE } while (0)
127typedef struct { 129typedef struct {
128 U32 flags; 130 U32 flags;
129 U32 max_depth; 131 U32 max_depth;
130 STRLEN max_size; 132 STRLEN max_size;
131 SV *filter; 133 SV *filter;
134
135 // for the incremental parser
136 STRLEN incr_pos; // the current offset into the text
137 STRLEN incr_need; // minimum bytes needed to decode
138 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
132} CBOR; 139} CBOR;
133 140
134ecb_inline void 141ecb_inline void
135cbor_init (CBOR *cbor) 142cbor_init (CBOR *cbor)
136{ 143{
140 147
141ecb_inline void 148ecb_inline void
142cbor_free (CBOR *cbor) 149cbor_free (CBOR *cbor)
143{ 150{
144 SvREFCNT_dec (cbor->filter); 151 SvREFCNT_dec (cbor->filter);
152 SvREFCNT_dec (cbor->incr_count);
145} 153}
146 154
147///////////////////////////////////////////////////////////////////////////// 155/////////////////////////////////////////////////////////////////////////////
148// utility functions 156// utility functions
149 157
316 324
317 ++enc->depth; 325 ++enc->depth;
318 326
319 encode_uint (enc, MAJOR_ARRAY, len + 1); 327 encode_uint (enc, MAJOR_ARRAY, len + 1);
320 328
329 if (SvMAGICAL (av))
321 for (i = 0; i <= len; ++i) 330 for (i = 0; i <= len; ++i)
322 { 331 {
323 SV **svp = av_fetch (av, i, 0); 332 SV **svp = av_fetch (av, i, 0);
324 encode_sv (enc, svp ? *svp : &PL_sv_undef); 333 encode_sv (enc, svp ? *svp : &PL_sv_undef);
325 } 334 }
335 else
336 for (i = 0; i <= len; ++i)
337 {
338 SV *sv = AvARRAY (av)[i];
339 encode_sv (enc, sv ? sv : &PL_sv_undef);
340 }
326 341
327 --enc->depth; 342 --enc->depth;
328} 343}
329 344
330static void 345static void
434 449
435 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) 450 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
436 { 451 {
437 dSP; 452 dSP;
438 453
439 ENTER; SAVETMPS; PUSHMARK (SP); 454 ENTER; SAVETMPS;
455 PUSHMARK (SP);
440 // we re-bless the reference to get overload and other niceties right 456 // we re-bless the reference to get overload and other niceties right
441 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 457 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
442 458
443 PUTBACK; 459 PUTBACK;
444 // G_SCALAR ensures that return value is 1 460 // G_SCALAR ensures that return value is 1
457 } 473 }
458 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) 474 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
459 { 475 {
460 dSP; 476 dSP;
461 477
462 ENTER; SAVETMPS; PUSHMARK (SP); 478 ENTER; SAVETMPS;
479 SAVESTACK_POS ();
480 PUSHMARK (SP);
463 EXTEND (SP, 2); 481 EXTEND (SP, 2);
464 // we re-bless the reference to get overload and other niceties right 482 // we re-bless the reference to get overload and other niceties right
465 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 483 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
466 PUSHs (sv_cbor); 484 PUSHs (sv_cbor);
467 485
569} 587}
570 588
571static SV * 589static SV *
572encode_cbor (SV *scalar, CBOR *cbor) 590encode_cbor (SV *scalar, CBOR *cbor)
573{ 591{
574 enc_t enc = { }; 592 enc_t enc = { 0 };
575 593
576 enc.cbor = *cbor; 594 enc.cbor = *cbor;
577 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 595 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
578 enc.cur = SvPVX (enc.sv); 596 enc.cur = SvPVX (enc.sv);
579 enc.end = SvEND (enc.sv); 597 enc.end = SvEND (enc.sv);
729{ 747{
730 // for speed reasons, we specialcase single-string 748 // for speed reasons, we specialcase single-string
731 // byte or utf-8 strings as keys, but only when !stringref 749 // byte or utf-8 strings as keys, but only when !stringref
732 750
733 if (ecb_expect_true (!dec->stringref)) 751 if (ecb_expect_true (!dec->stringref))
734 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) 752 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
735 { 753 {
736 I32 len = decode_uint (dec); 754 I32 len = decode_uint (dec);
737 char *key = (char *)dec->cur; 755 char *key = (char *)dec->cur;
738 756
757 WANT (len);
739 dec->cur += len; 758 dec->cur += len;
740 759
741 if (ecb_expect_false (dec->stringref))
742 av_push (dec->stringref, newSVpvn (key, len));
743
744 hv_store (hv, key, len, decode_sv (dec), 0); 760 hv_store (hv, key, len, decode_sv (dec), 0);
745 761
746 return; 762 return;
747 } 763 }
748 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) 764 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
749 { 765 {
750 I32 len = decode_uint (dec); 766 I32 len = decode_uint (dec);
751 char *key = (char *)dec->cur; 767 char *key = (char *)dec->cur;
752 768
769 WANT (len);
753 dec->cur += len; 770 dec->cur += len;
754 771
755 if (ecb_expect_false (dec->stringref)) 772 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
756 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); 773 if (!is_utf8_string (key, len))
774 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
757 775
758 hv_store (hv, key, -len, decode_sv (dec), 0); 776 hv_store (hv, key, -len, decode_sv (dec), 0);
759 777
760 return; 778 return;
761 } 779 }
763 SV *k = decode_sv (dec); 781 SV *k = decode_sv (dec);
764 SV *v = decode_sv (dec); 782 SV *v = decode_sv (dec);
765 783
766 hv_store_ent (hv, k, v, 0); 784 hv_store_ent (hv, k, v, 0);
767 SvREFCNT_dec (k); 785 SvREFCNT_dec (k);
786
787fail:
788 ;
768} 789}
769 790
770static SV * 791static SV *
771decode_hv (dec_t *dec) 792decode_hv (dec_t *dec)
772{ 793{
854 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) 875 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
855 av_push (dec->stringref, SvREFCNT_inc_NN (sv)); 876 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
856 } 877 }
857 878
858 if (utf8) 879 if (utf8)
880 {
881 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
882 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
883 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
884
859 SvUTF8_on (sv); 885 SvUTF8_on (sv);
886 }
860 887
861 return sv; 888 return sv;
862 889
863fail: 890fail:
864 SvREFCNT_dec (sv); 891 SvREFCNT_dec (sv);
970 if (!method) 997 if (!method)
971 ERR ("cannot decode perl-object (package does not have a THAW method)"); 998 ERR ("cannot decode perl-object (package does not have a THAW method)");
972 999
973 dSP; 1000 dSP;
974 1001
975 ENTER; SAVETMPS; PUSHMARK (SP); 1002 ENTER; SAVETMPS;
1003 PUSHMARK (SP);
976 EXTEND (SP, len + 1); 1004 EXTEND (SP, len + 1);
977 // we re-bless the reference to get overload and other niceties right 1005 // we re-bless the reference to get overload and other niceties right
978 PUSHs (*av_fetch (av, 0, 1)); 1006 PUSHs (*av_fetch (av, 0, 1));
979 PUSHs (sv_cbor); 1007 PUSHs (sv_cbor);
980 1008
1005 default: 1033 default:
1006 { 1034 {
1007 sv = decode_sv (dec); 1035 sv = decode_sv (dec);
1008 1036
1009 dSP; 1037 dSP;
1010 ENTER; SAVETMPS; PUSHMARK (SP); 1038 ENTER; SAVETMPS;
1039 SAVESTACK_POS ();
1040 PUSHMARK (SP);
1011 EXTEND (SP, 2); 1041 EXTEND (SP, 2);
1012 PUSHs (newSVuv (tag)); 1042 PUSHs (newSVuv (tag));
1013 PUSHs (sv); 1043 PUSHs (sv);
1014 1044
1015 PUTBACK; 1045 PUTBACK;
1124 1154
1125 return newSVnv (ecb_binary64_to_double (fp)); 1155 return newSVnv (ecb_binary64_to_double (fp));
1126 } 1156 }
1127 1157
1128 // 0..19 unassigned simple 1158 // 0..19 unassigned simple
1129 // 24 reserved + unassigned (reserved values are not encodable) 1159 // 24 reserved + unassigned simple (reserved values are not encodable)
1160 // 28-30 unassigned misc
1161 // 31 break code
1130 default: 1162 default:
1131 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1163 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1132 } 1164 }
1133 1165
1134 break; 1166 break;
1135 } 1167 }
1136 1168
1139} 1171}
1140 1172
1141static SV * 1173static SV *
1142decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1174decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1143{ 1175{
1144 dec_t dec = { }; 1176 dec_t dec = { 0 };
1145 SV *sv; 1177 SV *sv;
1146 STRLEN len; 1178 STRLEN len;
1147 char *data = SvPVbyte (string, len); 1179 char *data = SvPVbyte (string, len);
1148 1180
1149 if (len > cbor->max_size && cbor->max_size) 1181 if (len > cbor->max_size && cbor->max_size)
1163 if (dec.cur != dec.end && !dec.err) 1195 if (dec.cur != dec.end && !dec.err)
1164 dec.err = "garbage after CBOR object"; 1196 dec.err = "garbage after CBOR object";
1165 1197
1166 if (dec.err) 1198 if (dec.err)
1167 { 1199 {
1200 if (dec.shareable)
1201 {
1202 // need to break cyclic links, which whould all be in shareable
1203 int i;
1204 SV **svp;
1205
1206 for (i = av_len (dec.shareable) + 1; i--; )
1207 if ((svp = av_fetch (dec.shareable, i, 0)))
1208 sv_setsv (*svp, &PL_sv_undef);
1209 }
1210
1168 SvREFCNT_dec (sv); 1211 SvREFCNT_dec (sv);
1169 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); 1212 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1170 } 1213 }
1171 1214
1172 sv = sv_2mortal (sv); 1215 sv = sv_2mortal (sv);
1173 1216
1174 return sv; 1217 return sv;
1175} 1218}
1176 1219
1220/////////////////////////////////////////////////////////////////////////////
1221// incremental parser
1222
1223#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1224
1225// returns 0 for notyet, 1 for success or error
1226static int
1227incr_parse (CBOR *self, SV *cborstr)
1228{
1229 STRLEN cur;
1230 SvPV (cborstr, cur);
1231
1232 while (ecb_expect_true (self->incr_need <= cur))
1233 {
1234 // table of integer count bytes
1235 static I8 incr_len[MINOR_MASK + 1] = {
1236 0, 0, 0, 0, 0, 0, 0, 0,
1237 0, 0, 0, 0, 0, 0, 0, 0,
1238 0, 0, 0, 0, 0, 0, 0, 0,
1239 1, 2, 4, 8,-1,-1,-1,-2
1240 };
1241
1242 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1243 U8 m = *p & MINOR_MASK;
1244 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1245 I8 ilen = incr_len[m];
1246
1247 self->incr_need = self->incr_pos + 1;
1248
1249 if (ecb_expect_false (ilen < 0))
1250 {
1251 if (m != MINOR_INDEF)
1252 return 1; // error
1253
1254 if (*p == (MAJOR_MISC | MINOR_INDEF))
1255 {
1256 if (count >= 0)
1257 return 1; // error
1258
1259 count = 1;
1260 }
1261 else
1262 {
1263 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1264 count = -1;
1265 }
1266 }
1267 else
1268 {
1269 self->incr_need += ilen;
1270 if (ecb_expect_false (self->incr_need > cur))
1271 return 0;
1272
1273 int major = *p >> MAJOR_SHIFT;
1274
1275 switch (major)
1276 {
1277 case MAJOR_TAG >> MAJOR_SHIFT:
1278 ++count; // tags merely prefix another value
1279 break;
1280
1281 case MAJOR_BYTES >> MAJOR_SHIFT:
1282 case MAJOR_TEXT >> MAJOR_SHIFT:
1283 case MAJOR_ARRAY >> MAJOR_SHIFT:
1284 case MAJOR_MAP >> MAJOR_SHIFT:
1285 {
1286 UV len;
1287
1288 if (ecb_expect_false (ilen))
1289 {
1290 len = 0;
1291
1292 do {
1293 len = (len << 8) | *++p;
1294 } while (--ilen);
1295 }
1296 else
1297 len = m;
1298
1299 switch (major)
1300 {
1301 case MAJOR_BYTES >> MAJOR_SHIFT:
1302 case MAJOR_TEXT >> MAJOR_SHIFT:
1303 self->incr_need += len;
1304 if (ecb_expect_false (self->incr_need > cur))
1305 return 0;
1306
1307 break;
1308
1309 case MAJOR_MAP >> MAJOR_SHIFT:
1310 len <<= 1;
1311 case MAJOR_ARRAY >> MAJOR_SHIFT:
1312 if (len)
1313 {
1314 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1315 count = len + 1;
1316 }
1317 break;
1318 }
1319 }
1320 }
1321 }
1322
1323 self->incr_pos = self->incr_need;
1324
1325 if (count > 0)
1326 {
1327 while (!--count)
1328 {
1329 if (!AvFILLp (self->incr_count))
1330 return 1; // done
1331
1332 SvREFCNT_dec_NN (av_pop (self->incr_count));
1333 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1334 }
1335
1336 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1337 }
1338 }
1339
1340 return 0;
1341}
1342
1343
1177///////////////////////////////////////////////////////////////////////////// 1344/////////////////////////////////////////////////////////////////////////////
1178// XS interface functions 1345// XS interface functions
1179 1346
1180MODULE = CBOR::XS PACKAGE = CBOR::XS 1347MODULE = CBOR::XS PACKAGE = CBOR::XS
1181 1348
1223 shrink = F_SHRINK 1390 shrink = F_SHRINK
1224 allow_unknown = F_ALLOW_UNKNOWN 1391 allow_unknown = F_ALLOW_UNKNOWN
1225 allow_sharing = F_ALLOW_SHARING 1392 allow_sharing = F_ALLOW_SHARING
1226 allow_cycles = F_ALLOW_CYCLES 1393 allow_cycles = F_ALLOW_CYCLES
1227 pack_strings = F_PACK_STRINGS 1394 pack_strings = F_PACK_STRINGS
1395 validate_utf8 = F_VALIDATE_UTF8
1228 PPCODE: 1396 PPCODE:
1229{ 1397{
1230 if (enable) 1398 if (enable)
1231 self->flags |= ix; 1399 self->flags |= ix;
1232 else 1400 else
1240 get_shrink = F_SHRINK 1408 get_shrink = F_SHRINK
1241 get_allow_unknown = F_ALLOW_UNKNOWN 1409 get_allow_unknown = F_ALLOW_UNKNOWN
1242 get_allow_sharing = F_ALLOW_SHARING 1410 get_allow_sharing = F_ALLOW_SHARING
1243 get_allow_cycles = F_ALLOW_CYCLES 1411 get_allow_cycles = F_ALLOW_CYCLES
1244 get_pack_strings = F_PACK_STRINGS 1412 get_pack_strings = F_PACK_STRINGS
1413 get_validate_utf8 = F_VALIDATE_UTF8
1245 PPCODE: 1414 PPCODE:
1246 XPUSHs (boolSV (self->flags & ix)); 1415 XPUSHs (boolSV (self->flags & ix));
1247 1416
1248void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1417void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1249 PPCODE: 1418 PPCODE:
1298 EXTEND (SP, 2); 1467 EXTEND (SP, 2);
1299 PUSHs (sv); 1468 PUSHs (sv);
1300 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1469 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1301} 1470}
1302 1471
1472void incr_parse (CBOR *self, SV *cborstr)
1473 ALIAS:
1474 incr_parse_multiple = 1
1475 PPCODE:
1476{
1477 if (SvUTF8 (cborstr))
1478 sv_utf8_downgrade (cborstr, 0);
1479
1480 if (!self->incr_count)
1481 {
1482 self->incr_count = newAV ();
1483 self->incr_pos = 0;
1484 self->incr_need = 1;
1485
1486 av_push (self->incr_count, newSViv (1));
1487 }
1488
1489 do
1490 {
1491 if (!incr_parse (self, cborstr))
1492 {
1493 if (self->incr_need > self->max_size && self->max_size)
1494 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1495 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1496
1497 break;
1498 }
1499
1500 SV *sv;
1501 char *offset;
1502
1503 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1504 XPUSHs (sv);
1505
1506 sv_chop (cborstr, offset);
1507
1508 av_clear (self->incr_count);
1509 av_push (self->incr_count, newSViv (1));
1510
1511 self->incr_pos = 0;
1512 self->incr_need = self->incr_pos + 1;
1513 }
1514 while (ix);
1515}
1516
1517void incr_reset (CBOR *self)
1518 CODE:
1519{
1520 SvREFCNT_dec (self->incr_count);
1521 self->incr_count = 0;
1522}
1523
1303void DESTROY (CBOR *self) 1524void DESTROY (CBOR *self)
1304 PPCODE: 1525 PPCODE:
1305 cbor_free (self); 1526 cbor_free (self);
1306 1527
1307PROTOTYPES: ENABLE 1528PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines