ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.9 by root, Sun Oct 27 10:17:12 2013 UTC vs.
Revision 1.41 by root, Tue Dec 10 14:01:52 2013 UTC

9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#include "ecb.h" 12#include "ecb.h"
13 13
14// compatibility with perl <5.18
15#ifndef HvNAMELEN_get
16# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17#endif
18#ifndef HvNAMELEN
19# define HvNAMELEN(hv) HvNAMELEN_get (hv)
20#endif
21#ifndef HvNAMEUTF8
22# define HvNAMEUTF8(hv) 0
23#endif
24#ifndef SvREFCNT_dec_NN
25# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
26#endif
27
28// known major and minor types
29enum cbor_type
30{
31 MAJOR_SHIFT = 5,
32 MINOR_MASK = 0x1f,
33
34 MAJOR_POS_INT = 0 << MAJOR_SHIFT,
35 MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
36 MAJOR_BYTES = 2 << MAJOR_SHIFT,
37 MAJOR_TEXT = 3 << MAJOR_SHIFT,
38 MAJOR_ARRAY = 4 << MAJOR_SHIFT,
39 MAJOR_MAP = 5 << MAJOR_SHIFT,
40 MAJOR_TAG = 6 << MAJOR_SHIFT,
41 MAJOR_MISC = 7 << MAJOR_SHIFT,
42
43 // INT/STRING/ARRAY/MAP subtypes
44 LENGTH_EXT1 = 24,
45 LENGTH_EXT2 = 25,
46 LENGTH_EXT4 = 26,
47 LENGTH_EXT8 = 27,
48
49 // SIMPLE types (effectively MISC subtypes)
50 SIMPLE_FALSE = 20,
51 SIMPLE_TRUE = 21,
52 SIMPLE_NULL = 22,
53 SIMPLE_UNDEF = 23,
54
55 // MISC subtype (unused)
56 MISC_EXT1 = 24,
57 MISC_FLOAT16 = 25,
58 MISC_FLOAT32 = 26,
59 MISC_FLOAT64 = 27,
60
61 // BYTES/TEXT/ARRAY/MAP
62 MINOR_INDEF = 31,
63};
64
14// known tags 65// known tags
15enum cbor_tag 66enum cbor_tag
16{ 67{
17 // inofficial extensions (pending iana registration) 68 // extensions
18 CBOR_TAG_PERL_OBJECT = 256, 69 CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
19 CBOR_TAG_GENERIC_OBJECT = 257, 70 CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
71 CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
72 CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
73 CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
74 CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
75 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
20 76
21 // rfc7049 77 // rfc7049
22 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 78 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
23 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any 79 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
24 CBOR_TAG_POS_BIGNUM = 2, // byte string 80 CBOR_TAG_POS_BIGNUM = 2, // byte string
25 CBOR_TAG_NEG_BIGNUM = 3, // byte string 81 CBOR_TAG_NEG_BIGNUM = 3, // byte string
26 CBOR_TAG_DECIMAL = 4, // decimal fraction, array 82 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
27 CBOR_TAG_BIGFLOAT = 5, // array 83 CBOR_TAG_BIGFLOAT = 5, // array
28 84
29 CBOR_TAG_CONV_B64U = 21, // base64url, any 85 CBOR_TAG_CONV_B64U = 21, // base64url, any
30 CBOR_TAG_CONV_B64 = 22, // base64, any 86 CBOR_TAG_CONV_B64 = 22, // base64, any
31 CBOR_TAG_CONV_HEX = 23, // base16, any 87 CBOR_TAG_CONV_HEX = 23, // base16, any
32 CBOR_TAG_CBOR = 24, // embedded cbor, byte string 88 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
33 89
34 CBOR_TAG_URI = 32, // URI rfc3986, utf-8 90 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
35 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8 91 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
36 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 92 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
37 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 93 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
38 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 94 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
39 95
40 CBOR_TAG_MAGIC = 55799 // self-describe cbor 96 CBOR_TAG_MAGIC = 55799, // self-describe cbor
41}; 97};
42 98
43#define F_SHRINK 0x00000200UL 99#define F_SHRINK 0x00000001UL
44#define F_ALLOW_UNKNOWN 0x00002000UL 100#define F_ALLOW_UNKNOWN 0x00000002UL
101#define F_ALLOW_SHARING 0x00000004UL
102#define F_ALLOW_CYCLES 0x00000008UL
103#define F_PACK_STRINGS 0x00000010UL
104#define F_VALIDATE_UTF8 0x00000020UL
45 105
46#define INIT_SIZE 32 // initial scalar size to be allocated 106#define INIT_SIZE 32 // initial scalar size to be allocated
47 107
48#define SB do { 108#define SB do {
49#define SE } while (0) 109#define SE } while (0)
60#else 120#else
61# define CBOR_SLOW 0 121# define CBOR_SLOW 0
62# define CBOR_STASH cbor_stash 122# define CBOR_STASH cbor_stash
63#endif 123#endif
64 124
65static HV *cbor_stash, *cbor_boolean_stash, *cbor_tagged_stash; // CBOR::XS:: 125static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
66static SV *cbor_true, *cbor_false; 126static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
67 127
68typedef struct { 128typedef struct {
69 U32 flags; 129 U32 flags;
70 U32 max_depth; 130 U32 max_depth;
71 STRLEN max_size; 131 STRLEN max_size;
132 SV *filter;
133
134 // for the incremental parser
135 STRLEN incr_pos; // the current offset into the text
136 STRLEN incr_need; // minimum bytes needed to decode
137 AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
72} CBOR; 138} CBOR;
73 139
74ecb_inline void 140ecb_inline void
75cbor_init (CBOR *cbor) 141cbor_init (CBOR *cbor)
76{ 142{
77 Zero (cbor, 1, CBOR); 143 Zero (cbor, 1, CBOR);
78 cbor->max_depth = 512; 144 cbor->max_depth = 512;
145}
146
147ecb_inline void
148cbor_free (CBOR *cbor)
149{
150 SvREFCNT_dec (cbor->filter);
151 SvREFCNT_dec (cbor->incr_count);
79} 152}
80 153
81///////////////////////////////////////////////////////////////////////////// 154/////////////////////////////////////////////////////////////////////////////
82// utility functions 155// utility functions
83 156
105 SvPV_renew (sv, SvCUR (sv) + 1); 178 SvPV_renew (sv, SvCUR (sv) + 1);
106#endif 179#endif
107 } 180 }
108} 181}
109 182
110///////////////////////////////////////////////////////////////////////////// 183// minimum length of a string to be registered for stringref
111// fp hell 184ecb_inline int
112 185minimum_string_length (UV idx)
113//TODO 186{
187 return idx > 23
188 ? idx > 0xffU
189 ? idx > 0xffffU
190 ? idx > 0xffffffffU
191 ? 11
192 : 7
193 : 5
194 : 4
195 : 3;
196}
114 197
115///////////////////////////////////////////////////////////////////////////// 198/////////////////////////////////////////////////////////////////////////////
116// encoder 199// encoder
117 200
118// structure used for encoding CBOR 201// structure used for encoding CBOR
121 char *cur; // SvPVX (sv) + current output position 204 char *cur; // SvPVX (sv) + current output position
122 char *end; // SvEND (sv) 205 char *end; // SvEND (sv)
123 SV *sv; // result scalar 206 SV *sv; // result scalar
124 CBOR cbor; 207 CBOR cbor;
125 U32 depth; // recursion level 208 U32 depth; // recursion level
209 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
210 UV stringref_idx;
211 HV *shareable; // ptr => index, or 0
212 UV shareable_idx;
126} enc_t; 213} enc_t;
127 214
128ecb_inline void 215ecb_inline void
129need (enc_t *enc, STRLEN len) 216need (enc_t *enc, STRLEN len)
130{ 217{
147static void 234static void
148encode_uint (enc_t *enc, int major, UV len) 235encode_uint (enc_t *enc, int major, UV len)
149{ 236{
150 need (enc, 9); 237 need (enc, 9);
151 238
152 if (len < 24) 239 if (ecb_expect_true (len < LENGTH_EXT1))
153 *enc->cur++ = major | len; 240 *enc->cur++ = major | len;
154 else if (len <= 0xff) 241 else if (ecb_expect_true (len <= 0xffU))
155 { 242 {
156 *enc->cur++ = major | 24; 243 *enc->cur++ = major | LENGTH_EXT1;
157 *enc->cur++ = len; 244 *enc->cur++ = len;
158 } 245 }
159 else if (len <= 0xffff) 246 else if (len <= 0xffffU)
160 { 247 {
161 *enc->cur++ = major | 25; 248 *enc->cur++ = major | LENGTH_EXT2;
162 *enc->cur++ = len >> 8; 249 *enc->cur++ = len >> 8;
163 *enc->cur++ = len; 250 *enc->cur++ = len;
164 } 251 }
165 else if (len <= 0xffffffff) 252 else if (len <= 0xffffffffU)
166 { 253 {
167 *enc->cur++ = major | 26; 254 *enc->cur++ = major | LENGTH_EXT4;
168 *enc->cur++ = len >> 24; 255 *enc->cur++ = len >> 24;
169 *enc->cur++ = len >> 16; 256 *enc->cur++ = len >> 16;
170 *enc->cur++ = len >> 8; 257 *enc->cur++ = len >> 8;
171 *enc->cur++ = len; 258 *enc->cur++ = len;
172 } 259 }
173 else 260 else
174 { 261 {
175 *enc->cur++ = major | 27; 262 *enc->cur++ = major | LENGTH_EXT8;
176 *enc->cur++ = len >> 56; 263 *enc->cur++ = len >> 56;
177 *enc->cur++ = len >> 48; 264 *enc->cur++ = len >> 48;
178 *enc->cur++ = len >> 40; 265 *enc->cur++ = len >> 40;
179 *enc->cur++ = len >> 32; 266 *enc->cur++ = len >> 32;
180 *enc->cur++ = len >> 24; 267 *enc->cur++ = len >> 24;
182 *enc->cur++ = len >> 8; 269 *enc->cur++ = len >> 8;
183 *enc->cur++ = len; 270 *enc->cur++ = len;
184 } 271 }
185} 272}
186 273
187static void 274ecb_inline void
275encode_tag (enc_t *enc, UV tag)
276{
277 encode_uint (enc, MAJOR_TAG, tag);
278}
279
280ecb_inline void
188encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 281encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
189{ 282{
190 encode_uint (enc, utf8 ? 0x60 : 0x40, len); 283 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
191 need (enc, len); 284 need (enc, len);
192 memcpy (enc->cur, str, len); 285 memcpy (enc->cur, str, len);
193 enc->cur += len; 286 enc->cur += len;
194} 287}
195 288
289static void
290encode_strref (enc_t *enc, int utf8, char *str, STRLEN len)
291{
292 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
293 {
294 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
295
296 if (SvOK (*svp))
297 {
298 // already registered, use stringref
299 encode_tag (enc, CBOR_TAG_STRINGREF);
300 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
301 return;
302 }
303 else if (len >= minimum_string_length (enc->stringref_idx))
304 {
305 // register only
306 sv_setuv (*svp, enc->stringref_idx);
307 ++enc->stringref_idx;
308 }
309 }
310
311 encode_str (enc, utf8, str, len);
312}
313
196static void encode_sv (enc_t *enc, SV *sv); 314static void encode_sv (enc_t *enc, SV *sv);
197 315
198static void 316static void
199encode_av (enc_t *enc, AV *av) 317encode_av (enc_t *enc, AV *av)
200{ 318{
203 if (enc->depth >= enc->cbor.max_depth) 321 if (enc->depth >= enc->cbor.max_depth)
204 croak (ERR_NESTING_EXCEEDED); 322 croak (ERR_NESTING_EXCEEDED);
205 323
206 ++enc->depth; 324 ++enc->depth;
207 325
208 encode_uint (enc, 0x80, len + 1); 326 encode_uint (enc, MAJOR_ARRAY, len + 1);
209 327
210 for (i = 0; i <= len; ++i) 328 for (i = 0; i <= len; ++i)
211 { 329 {
212 SV **svp = av_fetch (av, i, 0); 330 SV **svp = av_fetch (av, i, 0);
213 encode_sv (enc, svp ? *svp : &PL_sv_undef); 331 encode_sv (enc, svp ? *svp : &PL_sv_undef);
228 346
229 int pairs = hv_iterinit (hv); 347 int pairs = hv_iterinit (hv);
230 int mg = SvMAGICAL (hv); 348 int mg = SvMAGICAL (hv);
231 349
232 if (mg) 350 if (mg)
233 encode_ch (enc, 0xa0 | 31); 351 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
234 else 352 else
235 encode_uint (enc, 0xa0, pairs); 353 encode_uint (enc, MAJOR_MAP, pairs);
236 354
237 while ((he = hv_iternext (hv))) 355 while ((he = hv_iternext (hv)))
238 { 356 {
239 if (HeKLEN (he) == HEf_SVKEY) 357 if (HeKLEN (he) == HEf_SVKEY)
240 encode_sv (enc, HeSVKEY (he)); 358 encode_sv (enc, HeSVKEY (he));
241 else 359 else
242 encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); 360 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
243 361
244 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); 362 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
245 } 363 }
246 364
247 if (mg) 365 if (mg)
248 encode_ch (enc, 0xe0 | 31); 366 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
249 367
250 --enc->depth; 368 --enc->depth;
251} 369}
252 370
253// encode objects, arrays and special \0=false and \1=true values. 371// encode objects, arrays and special \0=false and \1=true values.
254static void 372static void
255encode_rv (enc_t *enc, SV *sv) 373encode_rv (enc_t *enc, SV *sv)
256{ 374{
257 svtype svt;
258
259 SvGETMAGIC (sv); 375 SvGETMAGIC (sv);
376
260 svt = SvTYPE (sv); 377 svtype svt = SvTYPE (sv);
261 378
262 if (ecb_expect_false (SvOBJECT (sv))) 379 if (ecb_expect_false (SvOBJECT (sv)))
263 { 380 {
264 HV *boolean_stash = !CBOR_SLOW || cbor_boolean_stash 381 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
265 ? cbor_boolean_stash 382 ? types_boolean_stash
266 : gv_stashpv ("CBOR::XS::Boolean", 1); 383 : gv_stashpv ("Types::Serialiser::Boolean", 1);
384 HV *error_stash = !CBOR_SLOW || types_error_stash
385 ? types_error_stash
386 : gv_stashpv ("Types::Serialiser::Error", 1);
267 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 387 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
268 ? cbor_tagged_stash 388 ? cbor_tagged_stash
269 : gv_stashpv ("CBOR::XS::Tagged" , 1); 389 : gv_stashpv ("CBOR::XS::Tagged" , 1);
270 390
391 HV *stash = SvSTASH (sv);
392
271 if (SvSTASH (sv) == boolean_stash) 393 if (stash == boolean_stash)
272 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); 394 {
395 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
396 return;
397 }
398 else if (stash == error_stash)
399 {
400 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
401 return;
402 }
273 else if (SvSTASH (sv) == tagged_stash) 403 else if (stash == tagged_stash)
274 { 404 {
275 if (svt != SVt_PVAV) 405 if (svt != SVt_PVAV)
276 croak ("encountered CBOR::XS::Tagged object that isn't an array"); 406 croak ("encountered CBOR::XS::Tagged object that isn't an array");
277 407
278 encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1))); 408 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
279 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); 409 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
410
411 return;
412 }
413 }
414
415 if (ecb_expect_false (SvREFCNT (sv) > 1)
416 && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
417 {
418 if (!enc->shareable)
419 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
420
421 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
422
423 if (SvOK (*svp))
424 {
425 encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
426 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
427 return;
280 } 428 }
281 else 429 else
282 { 430 {
431 sv_setuv (*svp, enc->shareable_idx);
432 ++enc->shareable_idx;
433 encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
434 }
435 }
436
437 if (ecb_expect_false (SvOBJECT (sv)))
438 {
439 HV *stash = SvSTASH (sv);
440 GV *method;
441
442 if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
443 {
444 dSP;
445
446 ENTER; SAVETMPS; PUSHMARK (SP);
283 // we re-bless the reference to get overload and other niceties right 447 // we re-bless the reference to get overload and other niceties right
284 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
285
286 if (to_cbor)
287 {
288 dSP;
289
290 ENTER; SAVETMPS; PUSHMARK (SP);
291 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); 448 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
292 449
293 // calling with G_SCALAR ensures that we always get a 1 return value
294 PUTBACK; 450 PUTBACK;
451 // G_SCALAR ensures that return value is 1
295 call_sv ((SV *)GvCV (to_cbor), G_SCALAR); 452 call_sv ((SV *)GvCV (method), G_SCALAR);
296 SPAGAIN; 453 SPAGAIN;
297 454
298 // catch this surprisingly common error 455 // catch this surprisingly common error
299 if (SvROK (TOPs) && SvRV (TOPs) == sv) 456 if (SvROK (TOPs) && SvRV (TOPs) == sv)
300 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); 457 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
301 458
302 sv = POPs; 459 encode_sv (enc, POPs);
460
303 PUTBACK; 461 PUTBACK;
304 462
305 encode_sv (enc, sv);
306
307 FREETMPS; LEAVE; 463 FREETMPS; LEAVE;
308 } 464 }
465 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
466 {
467 dSP;
468
469 ENTER; SAVETMPS; PUSHMARK (SP);
470 EXTEND (SP, 2);
471 // we re-bless the reference to get overload and other niceties right
472 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
473 PUSHs (sv_cbor);
474
475 PUTBACK;
476 int count = call_sv ((SV *)GvCV (method), G_ARRAY);
477 SPAGAIN;
478
479 // catch this surprisingly common error
480 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
481 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
482
483 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
484 encode_uint (enc, MAJOR_ARRAY, count + 1);
485 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
486
487 while (count)
488 encode_sv (enc, SP[1 - count--]);
489
490 PUTBACK;
491
492 FREETMPS; LEAVE;
493 }
309 else 494 else
310 croak ("encountered object '%s', but no TO_CBOR method available on it", 495 croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
311 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 496 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
312 }
313 } 497 }
314 else if (svt == SVt_PVHV) 498 else if (svt == SVt_PVHV)
315 encode_hv (enc, (HV *)sv); 499 encode_hv (enc, (HV *)sv);
316 else if (svt == SVt_PVAV) 500 else if (svt == SVt_PVAV)
317 encode_av (enc, (AV *)sv); 501 encode_av (enc, (AV *)sv);
318 else if (svt < SVt_PVAV)
319 {
320 STRLEN len = 0;
321 char *pv = svt ? SvPV (sv, len) : 0;
322
323 if (len == 1 && *pv == '1')
324 encode_ch (enc, 0xe0 | 21);
325 else if (len == 1 && *pv == '0')
326 encode_ch (enc, 0xe0 | 20);
327 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
328 encode_ch (enc, 0xe0 | 23);
329 else
330 croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
331 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
332 }
333 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
334 encode_ch (enc, 0xe0 | 23);
335 else 502 else
336 croak ("encountered %s, but CBOR can only represent references to arrays or hashes", 503 {
337 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 504 encode_tag (enc, CBOR_TAG_INDIRECTION);
505 encode_sv (enc, sv);
506 }
338} 507}
339 508
340static void 509static void
341encode_nv (enc_t *enc, SV *sv) 510encode_nv (enc_t *enc, SV *sv)
342{ 511{
343 double nv = SvNVX (sv); 512 double nv = SvNVX (sv);
344 513
345 need (enc, 9); 514 need (enc, 9);
346 515
347 if (ecb_expect_false (nv == (U32)nv)) 516 if (ecb_expect_false (nv == (NV)(U32)nv))
348 encode_uint (enc, 0x00, (U32)nv); 517 encode_uint (enc, MAJOR_POS_INT, (U32)nv);
349 //TODO: maybe I32? 518 //TODO: maybe I32?
350 else if (ecb_expect_false (nv == (float)nv)) 519 else if (ecb_expect_false (nv == (float)nv))
351 { 520 {
352 uint32_t fp = ecb_float_to_binary32 (nv); 521 uint32_t fp = ecb_float_to_binary32 (nv);
353 522
354 *enc->cur++ = 0xe0 | 26; 523 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
355 524
356 if (!ecb_big_endian ()) 525 if (!ecb_big_endian ())
357 fp = ecb_bswap32 (fp); 526 fp = ecb_bswap32 (fp);
358 527
359 memcpy (enc->cur, &fp, 4); 528 memcpy (enc->cur, &fp, 4);
361 } 530 }
362 else 531 else
363 { 532 {
364 uint64_t fp = ecb_double_to_binary64 (nv); 533 uint64_t fp = ecb_double_to_binary64 (nv);
365 534
366 *enc->cur++ = 0xe0 | 27; 535 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
367 536
368 if (!ecb_big_endian ()) 537 if (!ecb_big_endian ())
369 fp = ecb_bswap64 (fp); 538 fp = ecb_bswap64 (fp);
370 539
371 memcpy (enc->cur, &fp, 8); 540 memcpy (enc->cur, &fp, 8);
380 549
381 if (SvPOKp (sv)) 550 if (SvPOKp (sv))
382 { 551 {
383 STRLEN len; 552 STRLEN len;
384 char *str = SvPV (sv, len); 553 char *str = SvPV (sv, len);
385 encode_str (enc, SvUTF8 (sv), str, len); 554 encode_strref (enc, SvUTF8 (sv), str, len);
386 } 555 }
387 else if (SvNOKp (sv)) 556 else if (SvNOKp (sv))
388 encode_nv (enc, sv); 557 encode_nv (enc, sv);
389 else if (SvIOKp (sv)) 558 else if (SvIOKp (sv))
390 { 559 {
391 if (SvIsUV (sv)) 560 if (SvIsUV (sv))
392 encode_uint (enc, 0x00, SvUVX (sv)); 561 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
393 else if (SvIVX (sv) >= 0) 562 else if (SvIVX (sv) >= 0)
394 encode_uint (enc, 0x00, SvIVX (sv)); 563 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
395 else 564 else
396 encode_uint (enc, 0x20, -(SvIVX (sv) + 1)); 565 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
397 } 566 }
398 else if (SvROK (sv)) 567 else if (SvROK (sv))
399 encode_rv (enc, SvRV (sv)); 568 encode_rv (enc, SvRV (sv));
400 else if (!SvOK (sv)) 569 else if (!SvOK (sv))
401 encode_ch (enc, 0xe0 | 22); 570 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
402 else if (enc->cbor.flags & F_ALLOW_UNKNOWN) 571 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
403 encode_ch (enc, 0xe0 | 23); 572 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
404 else 573 else
405 croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data", 574 croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
406 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv)); 575 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
407} 576}
408 577
409static SV * 578static SV *
410encode_cbor (SV *scalar, CBOR *cbor) 579encode_cbor (SV *scalar, CBOR *cbor)
411{ 580{
412 enc_t enc; 581 enc_t enc = { };
413 582
414 enc.cbor = *cbor; 583 enc.cbor = *cbor;
415 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 584 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
416 enc.cur = SvPVX (enc.sv); 585 enc.cur = SvPVX (enc.sv);
417 enc.end = SvEND (enc.sv); 586 enc.end = SvEND (enc.sv);
418 enc.depth = 0;
419 587
420 SvPOK_only (enc.sv); 588 SvPOK_only (enc.sv);
589
590 if (cbor->flags & F_PACK_STRINGS)
591 {
592 encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
593 enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
594 enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
595 }
596
421 encode_sv (&enc, scalar); 597 encode_sv (&enc, scalar);
422 598
423 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 599 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
424 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings 600 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
425 601
439 U8 *end; // end of input string 615 U8 *end; // end of input string
440 const char *err; // parse error, if != 0 616 const char *err; // parse error, if != 0
441 CBOR cbor; 617 CBOR cbor;
442 U32 depth; // recursion depth 618 U32 depth; // recursion depth
443 U32 maxdepth; // recursion depth limit 619 U32 maxdepth; // recursion depth limit
620 AV *shareable;
621 AV *stringref;
622 SV *decode_tagged;
444} dec_t; 623} dec_t;
445 624
446#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 625#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
447 626
448#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 627#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
451#define DEC_DEC_DEPTH --dec->depth 630#define DEC_DEC_DEPTH --dec->depth
452 631
453static UV 632static UV
454decode_uint (dec_t *dec) 633decode_uint (dec_t *dec)
455{ 634{
456 switch (*dec->cur & 31) 635 U8 m = *dec->cur & MINOR_MASK;
457 { 636 ++dec->cur;
458 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
459 case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
460 case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
461 return *dec->cur++ & 31;
462 637
463 case 24: 638 if (ecb_expect_true (m < LENGTH_EXT1))
639 return m;
640 else if (ecb_expect_true (m == LENGTH_EXT1))
641 {
464 WANT (2); 642 WANT (1);
465 dec->cur += 2; 643 dec->cur += 1;
466 return dec->cur[-1]; 644 return dec->cur[-1];
467 645 }
468 case 25: 646 else if (ecb_expect_true (m == LENGTH_EXT2))
647 {
469 WANT (3); 648 WANT (2);
470 dec->cur += 3; 649 dec->cur += 2;
471 return (((UV)dec->cur[-2]) << 8) 650 return (((UV)dec->cur[-2]) << 8)
472 | ((UV)dec->cur[-1]); 651 | ((UV)dec->cur[-1]);
473 652 }
474 case 26: 653 else if (ecb_expect_true (m == LENGTH_EXT4))
654 {
475 WANT (5); 655 WANT (4);
476 dec->cur += 5; 656 dec->cur += 4;
477 return (((UV)dec->cur[-4]) << 24) 657 return (((UV)dec->cur[-4]) << 24)
478 | (((UV)dec->cur[-3]) << 16) 658 | (((UV)dec->cur[-3]) << 16)
479 | (((UV)dec->cur[-2]) << 8) 659 | (((UV)dec->cur[-2]) << 8)
480 | ((UV)dec->cur[-1]); 660 | ((UV)dec->cur[-1]);
481 661 }
482 case 27: 662 else if (ecb_expect_true (m == LENGTH_EXT8))
663 {
483 WANT (9); 664 WANT (8);
484 dec->cur += 9; 665 dec->cur += 8;
666
667 return
668#if UVSIZE < 8
669 0
670#else
485 return (((UV)dec->cur[-8]) << 56) 671 (((UV)dec->cur[-8]) << 56)
486 | (((UV)dec->cur[-7]) << 48) 672 | (((UV)dec->cur[-7]) << 48)
487 | (((UV)dec->cur[-6]) << 40) 673 | (((UV)dec->cur[-6]) << 40)
488 | (((UV)dec->cur[-5]) << 32) 674 | (((UV)dec->cur[-5]) << 32)
675#endif
489 | (((UV)dec->cur[-4]) << 24) 676 | (((UV)dec->cur[-4]) << 24)
490 | (((UV)dec->cur[-3]) << 16) 677 | (((UV)dec->cur[-3]) << 16)
491 | (((UV)dec->cur[-2]) << 8) 678 | (((UV)dec->cur[-2]) << 8)
492 | ((UV)dec->cur[-1]); 679 | ((UV)dec->cur[-1]);
493 680 }
494 default: 681 else
495 ERR ("corrupted CBOR data (unsupported integer minor encoding)"); 682 ERR ("corrupted CBOR data (unsupported integer minor encoding)");
496 }
497 683
498fail: 684fail:
499 return 0; 685 return 0;
500} 686}
501 687
506{ 692{
507 AV *av = newAV (); 693 AV *av = newAV ();
508 694
509 DEC_INC_DEPTH; 695 DEC_INC_DEPTH;
510 696
511 if ((*dec->cur & 31) == 31) 697 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
512 { 698 {
513 ++dec->cur; 699 ++dec->cur;
514 700
515 for (;;) 701 for (;;)
516 { 702 {
517 WANT (1); 703 WANT (1);
518 704
519 if (*dec->cur == (0xe0 | 31)) 705 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
520 { 706 {
521 ++dec->cur; 707 ++dec->cur;
522 break; 708 break;
523 } 709 }
524 710
527 } 713 }
528 else 714 else
529 { 715 {
530 int i, len = decode_uint (dec); 716 int i, len = decode_uint (dec);
531 717
718 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
532 av_fill (av, len - 1); 719 av_fill (av, len - 1);
533 720
534 for (i = 0; i < len; ++i) 721 for (i = 0; i < len; ++i)
535 AvARRAY (av)[i] = decode_sv (dec); 722 AvARRAY (av)[i] = decode_sv (dec);
536 } 723 }
542 SvREFCNT_dec (av); 729 SvREFCNT_dec (av);
543 DEC_DEC_DEPTH; 730 DEC_DEC_DEPTH;
544 return &PL_sv_undef; 731 return &PL_sv_undef;
545} 732}
546 733
734static void
735decode_he (dec_t *dec, HV *hv)
736{
737 // for speed reasons, we specialcase single-string
738 // byte or utf-8 strings as keys, but only when !stringref
739
740 if (ecb_expect_true (!dec->stringref))
741 if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
742 {
743 I32 len = decode_uint (dec);
744 char *key = (char *)dec->cur;
745
746 dec->cur += len;
747
748 hv_store (hv, key, len, decode_sv (dec), 0);
749
750 return;
751 }
752 else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
753 {
754 I32 len = decode_uint (dec);
755 char *key = (char *)dec->cur;
756
757 dec->cur += len;
758
759 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
760 if (!is_utf8_string (key, len))
761 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
762
763 hv_store (hv, key, -len, decode_sv (dec), 0);
764
765 return;
766 }
767
768 SV *k = decode_sv (dec);
769 SV *v = decode_sv (dec);
770
771 hv_store_ent (hv, k, v, 0);
772 SvREFCNT_dec (k);
773
774fail:
775 ;
776}
777
547static SV * 778static SV *
548decode_hv (dec_t *dec) 779decode_hv (dec_t *dec)
549{ 780{
550 HV *hv = newHV (); 781 HV *hv = newHV ();
551 782
552 DEC_INC_DEPTH; 783 DEC_INC_DEPTH;
553 784
554 if ((*dec->cur & 31) == 31) 785 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
555 { 786 {
556 ++dec->cur; 787 ++dec->cur;
557 788
558 for (;;) 789 for (;;)
559 { 790 {
560 WANT (1); 791 WANT (1);
561 792
562 if (*dec->cur == (0xe0 | 31)) 793 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
563 { 794 {
564 ++dec->cur; 795 ++dec->cur;
565 break; 796 break;
566 } 797 }
567 798
568 SV *k = decode_sv (dec); 799 decode_he (dec, hv);
569 SV *v = decode_sv (dec);
570
571 hv_store_ent (hv, k, v, 0);
572 } 800 }
573 } 801 }
574 else 802 else
575 { 803 {
576 int len = decode_uint (dec); 804 int pairs = decode_uint (dec);
577 805
578 while (len--) 806 while (pairs--)
579 { 807 decode_he (dec, hv);
580 SV *k = decode_sv (dec);
581 SV *v = decode_sv (dec);
582
583 hv_store_ent (hv, k, v, 0);
584 }
585 } 808 }
586 809
587 DEC_DEC_DEPTH; 810 DEC_DEC_DEPTH;
588 return newRV_noinc ((SV *)hv); 811 return newRV_noinc ((SV *)hv);
589 812
596static SV * 819static SV *
597decode_str (dec_t *dec, int utf8) 820decode_str (dec_t *dec, int utf8)
598{ 821{
599 SV *sv = 0; 822 SV *sv = 0;
600 823
601 if ((*dec->cur & 31) == 31) 824 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
602 { 825 {
826 // indefinite length strings
603 ++dec->cur; 827 ++dec->cur;
604 828
829 U8 major = *dec->cur & MAJOR_MISC;
830
605 sv = newSVpvn ("", 0); 831 sv = newSVpvn ("", 0);
606 832
607 // not very fast, and certainly not robust against illegal input
608 for (;;) 833 for (;;)
609 { 834 {
610 WANT (1); 835 WANT (1);
611 836
612 if (*dec->cur == (0xe0 | 31)) 837 if ((*dec->cur - major) > LENGTH_EXT8)
838 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
613 { 839 {
614 ++dec->cur; 840 ++dec->cur;
615 break; 841 break;
616 } 842 }
843 else
844 ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
617 845
618 sv_catsv (sv, decode_sv (dec)); 846 STRLEN len = decode_uint (dec);
847
848 WANT (len);
849 sv_catpvn (sv, dec->cur, len);
850 dec->cur += len;
619 } 851 }
620 } 852 }
621 else 853 else
622 { 854 {
623 STRLEN len = decode_uint (dec); 855 STRLEN len = decode_uint (dec);
624 856
625 WANT (len); 857 WANT (len);
626 sv = newSVpvn (dec->cur, len); 858 sv = newSVpvn (dec->cur, len);
627 dec->cur += len; 859 dec->cur += len;
860
861 if (ecb_expect_false (dec->stringref)
862 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
863 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
628 } 864 }
629 865
630 if (utf8) 866 if (utf8)
867 {
868 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
869 if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
870 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
871
631 SvUTF8_on (sv); 872 SvUTF8_on (sv);
873 }
632 874
633 return sv; 875 return sv;
634 876
635fail: 877fail:
636 SvREFCNT_dec (sv); 878 SvREFCNT_dec (sv);
638} 880}
639 881
640static SV * 882static SV *
641decode_tagged (dec_t *dec) 883decode_tagged (dec_t *dec)
642{ 884{
885 SV *sv = 0;
643 UV tag = decode_uint (dec); 886 UV tag = decode_uint (dec);
887
888 WANT (1);
889
890 switch (tag)
891 {
892 case CBOR_TAG_MAGIC:
644 SV *sv = decode_sv (dec); 893 sv = decode_sv (dec);
894 break;
645 895
646 if (tag == CBOR_TAG_MAGIC) 896 case CBOR_TAG_INDIRECTION:
647 return sv; 897 sv = newRV_noinc (decode_sv (dec));
898 break;
648 899
649 if (tag == CBOR_TAG_PERL_OBJECT) 900 case CBOR_TAG_STRINGREF_NAMESPACE:
650 { 901 {
902 ENTER; SAVETMPS;
903
904 SAVESPTR (dec->stringref);
905 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
906
907 sv = decode_sv (dec);
908
909 FREETMPS; LEAVE;
910 }
911 break;
912
913 case CBOR_TAG_STRINGREF:
914 {
915 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
916 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
917
918 UV idx = decode_uint (dec);
919
920 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
921 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
922
923 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
924 }
925 break;
926
927 case CBOR_TAG_VALUE_SHAREABLE:
928 {
929 if (ecb_expect_false (!dec->shareable))
930 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
931
932 if (dec->cbor.flags & F_ALLOW_CYCLES)
933 {
934 sv = newSV (0);
935 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
936
937 SV *osv = decode_sv (dec);
938 sv_setsv (sv, osv);
939 SvREFCNT_dec_NN (osv);
940 }
941 else
942 {
943 av_push (dec->shareable, &PL_sv_undef);
944 int idx = AvFILLp (dec->shareable);
945 sv = decode_sv (dec);
946 av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
947 }
948 }
949 break;
950
951 case CBOR_TAG_VALUE_SHAREDREF:
952 {
953 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
954 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
955
956 UV idx = decode_uint (dec);
957
958 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
959 ERR ("corrupted CBOR data (sharedref index out of bounds)");
960
961 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
962
963 if (sv == &PL_sv_undef)
964 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
965 }
966 break;
967
968 case CBOR_TAG_PERL_OBJECT:
969 {
970 sv = decode_sv (dec);
971
651 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) 972 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
652 ERR ("corrupted CBOR data (non-array perl object)"); 973 ERR ("corrupted CBOR data (non-array perl object)");
974
975 AV *av = (AV *)SvRV (sv);
976 int len = av_len (av) + 1;
977 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
978
979 if (!stash)
980 ERR ("cannot decode perl-object (package does not exist)");
981
982 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
983
984 if (!method)
985 ERR ("cannot decode perl-object (package does not have a THAW method)");
986
987 dSP;
988
989 ENTER; SAVETMPS; PUSHMARK (SP);
990 EXTEND (SP, len + 1);
991 // we re-bless the reference to get overload and other niceties right
992 PUSHs (*av_fetch (av, 0, 1));
993 PUSHs (sv_cbor);
994
995 int i;
996
997 for (i = 1; i < len; ++i)
998 PUSHs (*av_fetch (av, i, 1));
999
1000 PUTBACK;
1001 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1002 SPAGAIN;
1003
1004 if (SvTRUE (ERRSV))
1005 {
1006 FREETMPS; LEAVE;
1007 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1008 }
1009
1010 SvREFCNT_dec (sv);
1011 sv = SvREFCNT_inc (POPs);
1012
1013 PUTBACK;
1014
1015 FREETMPS; LEAVE;
653 1016 }
654 // TODO 1017 break;
655 }
656 1018
1019 default:
1020 {
1021 sv = decode_sv (dec);
1022
1023 dSP;
1024 ENTER; SAVETMPS; PUSHMARK (SP);
1025 EXTEND (SP, 2);
1026 PUSHs (newSVuv (tag));
1027 PUSHs (sv);
1028
1029 PUTBACK;
1030 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1031 SPAGAIN;
1032
1033 if (SvTRUE (ERRSV))
1034 {
1035 FREETMPS; LEAVE;
1036 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1037 }
1038
1039 if (count)
1040 {
1041 SvREFCNT_dec (sv);
1042 sv = SvREFCNT_inc (POPs);
1043 }
1044 else
1045 {
657 AV *av = newAV (); 1046 AV *av = newAV ();
658 av_push (av, newSVuv (tag)); 1047 av_push (av, newSVuv (tag));
659 av_push (av, sv); 1048 av_push (av, sv);
660 1049
661 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 1050 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
662 ? cbor_tagged_stash 1051 ? cbor_tagged_stash
663 : gv_stashpv ("CBOR::XS::Tagged" , 1); 1052 : gv_stashpv ("CBOR::XS::Tagged" , 1);
664
665 return sv_bless (newRV_noinc ((SV *)av), tagged_stash); 1053 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1054 }
1055
1056 PUTBACK;
1057
1058 FREETMPS; LEAVE;
1059 }
1060 break;
1061 }
1062
1063 return sv;
666 1064
667fail: 1065fail:
668 SvREFCNT_dec (sv); 1066 SvREFCNT_dec (sv);
669 return &PL_sv_undef; 1067 return &PL_sv_undef;
670} 1068}
672static SV * 1070static SV *
673decode_sv (dec_t *dec) 1071decode_sv (dec_t *dec)
674{ 1072{
675 WANT (1); 1073 WANT (1);
676 1074
677 switch (*dec->cur >> 5) 1075 switch (*dec->cur >> MAJOR_SHIFT)
678 { 1076 {
679 case 0: // unsigned int 1077 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
680 return newSVuv (decode_uint (dec)); 1078 case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
681 case 1: // negative int 1079 case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
682 return newSViv (-1 - (IV)decode_uint (dec)); 1080 case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
683 case 2: // octet string 1081 case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
684 return decode_str (dec, 0); 1082 case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
685 case 3: // utf-8 string 1083 case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
686 return decode_str (dec, 1); 1084
687 case 4: // array 1085 case MAJOR_MISC >> MAJOR_SHIFT:
688 return decode_av (dec);
689 case 5: // map
690 return decode_hv (dec);
691 case 6: // tag
692 return decode_tagged (dec);
693 case 7: // misc
694 switch (*dec->cur++ & 31) 1086 switch (*dec->cur++ & MINOR_MASK)
695 { 1087 {
696 case 20: 1088 case SIMPLE_FALSE:
697#if CBOR_SLOW 1089#if CBOR_SLOW
698 cbor_false = get_bool ("CBOR::XS::false"); 1090 types_false = get_bool ("Types::Serialiser::false");
699#endif 1091#endif
700 return newSVsv (cbor_false); 1092 return newSVsv (types_false);
701 case 21: 1093 case SIMPLE_TRUE:
702#if CBOR_SLOW 1094#if CBOR_SLOW
703 cbor_true = get_bool ("CBOR::XS::true"); 1095 types_true = get_bool ("Types::Serialiser::true");
704#endif 1096#endif
705 return newSVsv (cbor_true); 1097 return newSVsv (types_true);
706 case 22: 1098 case SIMPLE_NULL:
707 return newSVsv (&PL_sv_undef); 1099 return newSVsv (&PL_sv_undef);
1100 case SIMPLE_UNDEF:
1101#if CBOR_SLOW
1102 types_error = get_bool ("Types::Serialiser::error");
1103#endif
1104 return newSVsv (types_error);
708 1105
709 case 25: 1106 case MISC_FLOAT16:
710 { 1107 {
711 WANT (2); 1108 WANT (2);
712 1109
713 uint16_t fp = (dec->cur[0] << 8) | dec->cur[1]; 1110 uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
714 dec->cur += 2; 1111 dec->cur += 2;
715 1112
716 return newSVnv (ecb_binary16_to_float (fp)); 1113 return newSVnv (ecb_binary16_to_float (fp));
717 } 1114 }
718 1115
719 case 26: 1116 case MISC_FLOAT32:
720 { 1117 {
721 uint32_t fp; 1118 uint32_t fp;
722 WANT (4); 1119 WANT (4);
723 memcpy (&fp, dec->cur, 4); 1120 memcpy (&fp, dec->cur, 4);
724 dec->cur += 4; 1121 dec->cur += 4;
727 fp = ecb_bswap32 (fp); 1124 fp = ecb_bswap32 (fp);
728 1125
729 return newSVnv (ecb_binary32_to_float (fp)); 1126 return newSVnv (ecb_binary32_to_float (fp));
730 } 1127 }
731 1128
732 case 27: 1129 case MISC_FLOAT64:
733 { 1130 {
734 uint64_t fp; 1131 uint64_t fp;
735 WANT (8); 1132 WANT (8);
736 memcpy (&fp, dec->cur, 8); 1133 memcpy (&fp, dec->cur, 8);
737 dec->cur += 8; 1134 dec->cur += 8;
740 fp = ecb_bswap64 (fp); 1137 fp = ecb_bswap64 (fp);
741 1138
742 return newSVnv (ecb_binary64_to_double (fp)); 1139 return newSVnv (ecb_binary64_to_double (fp));
743 } 1140 }
744 1141
745 // 0..19 unassigned 1142 // 0..19 unassigned simple
746 // 24 reserved + unassigned (reserved values are not encodable) 1143 // 24 reserved + unassigned simple (reserved values are not encodable)
1144 // 28-30 unassigned misc
1145 // 31 break code
747 default: 1146 default:
748 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 1147 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
749 } 1148 }
750 1149
751 break; 1150 break;
752 } 1151 }
753 1152
756} 1155}
757 1156
758static SV * 1157static SV *
759decode_cbor (SV *string, CBOR *cbor, char **offset_return) 1158decode_cbor (SV *string, CBOR *cbor, char **offset_return)
760{ 1159{
761 dec_t dec; 1160 dec_t dec = { };
762 SV *sv; 1161 SV *sv;
1162 STRLEN len;
1163 char *data = SvPVbyte (string, len);
763 1164
764 /* work around bugs in 5.10 where manipulating magic values
765 * makes perl ignore the magic in subsequent accesses.
766 * also make a copy of non-PV values, to get them into a clean
767 * state (SvPV should do that, but it's buggy, see below).
768 */
769 /*SvGETMAGIC (string);*/
770 if (SvMAGICAL (string) || !SvPOK (string))
771 string = sv_2mortal (newSVsv (string));
772
773 SvUPGRADE (string, SVt_PV);
774
775 /* work around a bug in perl 5.10, which causes SvCUR to fail an
776 * assertion with -DDEBUGGING, although SvCUR is documented to
777 * return the xpv_cur field which certainly exists after upgrading.
778 * according to nicholas clark, calling SvPOK fixes this.
779 * But it doesn't fix it, so try another workaround, call SvPV_nolen
780 * and hope for the best.
781 * Damnit, SvPV_nolen still trips over yet another assertion. This
782 * assertion business is seriously broken, try yet another workaround
783 * for the broken -DDEBUGGING.
784 */
785 {
786#ifdef DEBUGGING
787 STRLEN offset = SvOK (string) ? sv_len (string) : 0;
788#else
789 STRLEN offset = SvCUR (string);
790#endif
791
792 if (offset > cbor->max_size && cbor->max_size) 1165 if (len > cbor->max_size && cbor->max_size)
793 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", 1166 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
794 (unsigned long)SvCUR (string), (unsigned long)cbor->max_size); 1167 (unsigned long)len, (unsigned long)cbor->max_size);
795 }
796
797 sv_utf8_downgrade (string, 0);
798 1168
799 dec.cbor = *cbor; 1169 dec.cbor = *cbor;
800 dec.cur = (U8 *)SvPVX (string); 1170 dec.cur = (U8 *)data;
801 dec.end = (U8 *)SvEND (string); 1171 dec.end = (U8 *)data + len;
802 dec.err = 0;
803 dec.depth = 0;
804 1172
805 sv = decode_sv (&dec); 1173 sv = decode_sv (&dec);
806 1174
807 if (offset_return) 1175 if (offset_return)
808 *offset_return = dec.cur; 1176 *offset_return = dec.cur;
811 if (dec.cur != dec.end && !dec.err) 1179 if (dec.cur != dec.end && !dec.err)
812 dec.err = "garbage after CBOR object"; 1180 dec.err = "garbage after CBOR object";
813 1181
814 if (dec.err) 1182 if (dec.err)
815 { 1183 {
1184 if (dec.shareable)
1185 {
1186 // need to break cyclic links, which whould all be in shareable
1187 int i;
1188 SV **svp;
1189
1190 for (i = av_len (dec.shareable) + 1; i--; )
1191 if ((svp = av_fetch (dec.shareable, i, 0)))
1192 sv_setsv (*svp, &PL_sv_undef);
1193 }
1194
816 SvREFCNT_dec (sv); 1195 SvREFCNT_dec (sv);
817 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)SvPVX (string), (int)(uint8_t)*dec.cur); 1196 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
818 } 1197 }
819 1198
820 sv = sv_2mortal (sv); 1199 sv = sv_2mortal (sv);
821 1200
822 return sv; 1201 return sv;
823} 1202}
824 1203
1204/////////////////////////////////////////////////////////////////////////////
1205// incremental parser
1206
1207#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1208
1209// returns 0 for notyet, 1 for success or error
1210static int
1211incr_parse (CBOR *self, SV *cborstr)
1212{
1213 STRLEN cur;
1214 SvPV (cborstr, cur);
1215
1216 while (ecb_expect_true (self->incr_need <= cur))
1217 {
1218 // table of integer count bytes
1219 static I8 incr_len[MINOR_MASK + 1] = {
1220 0, 0, 0, 0, 0, 0, 0, 0,
1221 0, 0, 0, 0, 0, 0, 0, 0,
1222 0, 0, 0, 0, 0, 0, 0, 0,
1223 1, 2, 4, 8,-1,-1,-1,-2
1224 };
1225
1226 const U8 *p = SvPVX (cborstr) + self->incr_pos;
1227 U8 m = *p & MINOR_MASK;
1228 IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1229 I8 ilen = incr_len[m];
1230
1231 self->incr_need = self->incr_pos + 1;
1232
1233 if (ecb_expect_false (ilen < 0))
1234 {
1235 if (m != MINOR_INDEF)
1236 return 1; // error
1237
1238 if (*p == (MAJOR_MISC | MINOR_INDEF))
1239 {
1240 if (count >= 0)
1241 return 1; // error
1242
1243 count = 1;
1244 }
1245 else
1246 {
1247 av_push (self->incr_count, newSViv (-1)); //TODO: nest
1248 count = -1;
1249 }
1250 }
1251 else
1252 {
1253 self->incr_need += ilen;
1254 if (ecb_expect_false (self->incr_need > cur))
1255 return 0;
1256
1257 int major = *p >> MAJOR_SHIFT;
1258
1259 switch (major)
1260 {
1261 case MAJOR_BYTES >> MAJOR_SHIFT:
1262 case MAJOR_TEXT >> MAJOR_SHIFT:
1263 case MAJOR_ARRAY >> MAJOR_SHIFT:
1264 case MAJOR_MAP >> MAJOR_SHIFT:
1265 {
1266 UV len;
1267
1268 if (ecb_expect_false (ilen))
1269 {
1270 len = 0;
1271
1272 do {
1273 len = (len << 8) | *++p;
1274 } while (--ilen);
1275 }
1276 else
1277 len = m;
1278
1279 switch (major)
1280 {
1281 case MAJOR_BYTES >> MAJOR_SHIFT:
1282 case MAJOR_TEXT >> MAJOR_SHIFT:
1283 self->incr_need += len;
1284 if (ecb_expect_false (self->incr_need > cur))
1285 return 0;
1286
1287 break;
1288
1289 case MAJOR_MAP >> MAJOR_SHIFT:
1290 len <<= 1;
1291 case MAJOR_ARRAY >> MAJOR_SHIFT:
1292 if (len)
1293 {
1294 av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1295 count = len + 1;
1296 }
1297 break;
1298 }
1299 }
1300 }
1301 }
1302
1303 self->incr_pos = self->incr_need;
1304
1305 if (count > 0)
1306 {
1307 while (!--count)
1308 {
1309 if (!AvFILLp (self->incr_count))
1310 return 1; // done
1311
1312 SvREFCNT_dec_NN (av_pop (self->incr_count));
1313 count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1314 }
1315
1316 SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1317 }
1318 }
1319
1320 return 0;
1321}
1322
1323
825///////////////////////////////////////////////////////////////////////////// 1324/////////////////////////////////////////////////////////////////////////////
826// XS interface functions 1325// XS interface functions
827 1326
828MODULE = CBOR::XS PACKAGE = CBOR::XS 1327MODULE = CBOR::XS PACKAGE = CBOR::XS
829 1328
830BOOT: 1329BOOT:
831{ 1330{
832 cbor_stash = gv_stashpv ("CBOR::XS" , 1); 1331 cbor_stash = gv_stashpv ("CBOR::XS" , 1);
833 cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1);
834 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1); 1332 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
835 1333
836 cbor_true = get_bool ("CBOR::XS::true"); 1334 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
837 cbor_false = get_bool ("CBOR::XS::false"); 1335 types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1336
1337 types_true = get_bool ("Types::Serialiser::true" );
1338 types_false = get_bool ("Types::Serialiser::false");
1339 types_error = get_bool ("Types::Serialiser::error");
1340
1341 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1342
1343 sv_cbor = newSVpv ("CBOR", 0);
1344 SvREADONLY_on (sv_cbor);
838} 1345}
839 1346
840PROTOTYPES: DISABLE 1347PROTOTYPES: DISABLE
841 1348
842void CLONE (...) 1349void CLONE (...)
843 CODE: 1350 CODE:
844 cbor_stash = 0; 1351 cbor_stash = 0;
845 cbor_boolean_stash = 0;
846 cbor_tagged_stash = 0; 1352 cbor_tagged_stash = 0;
1353 types_error_stash = 0;
1354 types_boolean_stash = 0;
847 1355
848void new (char *klass) 1356void new (char *klass)
849 PPCODE: 1357 PPCODE:
850{ 1358{
851 SV *pv = NEWSV (0, sizeof (CBOR)); 1359 SV *pv = NEWSV (0, sizeof (CBOR));
859 1367
860void shrink (CBOR *self, int enable = 1) 1368void shrink (CBOR *self, int enable = 1)
861 ALIAS: 1369 ALIAS:
862 shrink = F_SHRINK 1370 shrink = F_SHRINK
863 allow_unknown = F_ALLOW_UNKNOWN 1371 allow_unknown = F_ALLOW_UNKNOWN
1372 allow_sharing = F_ALLOW_SHARING
1373 allow_cycles = F_ALLOW_CYCLES
1374 pack_strings = F_PACK_STRINGS
1375 validate_utf8 = F_VALIDATE_UTF8
864 PPCODE: 1376 PPCODE:
865{ 1377{
866 if (enable) 1378 if (enable)
867 self->flags |= ix; 1379 self->flags |= ix;
868 else 1380 else
873 1385
874void get_shrink (CBOR *self) 1386void get_shrink (CBOR *self)
875 ALIAS: 1387 ALIAS:
876 get_shrink = F_SHRINK 1388 get_shrink = F_SHRINK
877 get_allow_unknown = F_ALLOW_UNKNOWN 1389 get_allow_unknown = F_ALLOW_UNKNOWN
1390 get_allow_sharing = F_ALLOW_SHARING
1391 get_allow_cycles = F_ALLOW_CYCLES
1392 get_pack_strings = F_PACK_STRINGS
1393 get_validate_utf8 = F_VALIDATE_UTF8
878 PPCODE: 1394 PPCODE:
879 XPUSHs (boolSV (self->flags & ix)); 1395 XPUSHs (boolSV (self->flags & ix));
880 1396
881void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1397void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
882 PPCODE: 1398 PPCODE:
898 CODE: 1414 CODE:
899 RETVAL = self->max_size; 1415 RETVAL = self->max_size;
900 OUTPUT: 1416 OUTPUT:
901 RETVAL 1417 RETVAL
902 1418
903#if 0 //TODO 1419void filter (CBOR *self, SV *filter = 0)
904
905void filter_cbor_object (CBOR *self, SV *cb = &PL_sv_undef)
906 PPCODE: 1420 PPCODE:
907{
908 SvREFCNT_dec (self->cb_object); 1421 SvREFCNT_dec (self->filter);
909 self->cb_object = SvOK (cb) ? newSVsv (cb) : 0; 1422 self->filter = filter ? newSVsv (filter) : filter;
910
911 XPUSHs (ST (0)); 1423 XPUSHs (ST (0));
912}
913 1424
914void filter_cbor_single_key_object (CBOR *self, SV *key, SV *cb = &PL_sv_undef) 1425SV *get_filter (CBOR *self)
915 PPCODE: 1426 CODE:
916{ 1427 RETVAL = self->filter ? self->filter : NEWSV (0, 0);
917 if (!self->cb_sk_object) 1428 OUTPUT:
918 self->cb_sk_object = newHV (); 1429 RETVAL
919
920 if (SvOK (cb))
921 hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0);
922 else
923 {
924 hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0);
925
926 if (!HvKEYS (self->cb_sk_object))
927 {
928 SvREFCNT_dec (self->cb_sk_object);
929 self->cb_sk_object = 0;
930 }
931 }
932
933 XPUSHs (ST (0));
934}
935
936#endif
937 1430
938void encode (CBOR *self, SV *scalar) 1431void encode (CBOR *self, SV *scalar)
939 PPCODE: 1432 PPCODE:
940 PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN; 1433 PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
941 XPUSHs (scalar); 1434 XPUSHs (scalar);
954 EXTEND (SP, 2); 1447 EXTEND (SP, 2);
955 PUSHs (sv); 1448 PUSHs (sv);
956 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1449 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
957} 1450}
958 1451
959#if 0 1452void incr_parse (CBOR *self, SV *cborstr)
1453 PPCODE:
1454{
1455 if (SvUTF8 (cborstr))
1456 sv_utf8_downgrade (cborstr, 0);
1457
1458 if (!self->incr_count)
1459 {
1460 self->incr_count = newAV ();
1461 self->incr_pos = 0;
1462 self->incr_need = 1;
1463
1464 av_push (self->incr_count, newSViv (1));
1465 }
1466
1467 do
1468 {
1469 if (!incr_parse (self, cborstr))
1470 {
1471 if (self->incr_need > self->max_size && self->max_size)
1472 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1473 (unsigned long)self->incr_need, (unsigned long)self->max_size);
1474
1475 break;
1476 }
1477
1478 SV *sv;
1479 char *offset;
1480
1481 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1482 XPUSHs (sv);
1483
1484 sv_chop (cborstr, offset);
1485
1486 av_clear (self->incr_count);
1487 av_push (self->incr_count, newSViv (1));
1488
1489 self->incr_pos = 0;
1490 self->incr_need = self->incr_pos + 1;
1491 }
1492 while (GIMME_V == G_ARRAY);
1493}
1494
1495void incr_reset (CBOR *self)
1496 CODE:
1497{
1498 SvREFCNT_dec (self->incr_count);
1499 self->incr_count = 0;
1500}
960 1501
961void DESTROY (CBOR *self) 1502void DESTROY (CBOR *self)
962 CODE: 1503 PPCODE:
963 SvREFCNT_dec (self->cb_sk_object); 1504 cbor_free (self);
964 SvREFCNT_dec (self->cb_object);
965
966#endif
967 1505
968PROTOTYPES: ENABLE 1506PROTOTYPES: ENABLE
969 1507
970void encode_cbor (SV *scalar) 1508void encode_cbor (SV *scalar)
1509 ALIAS:
1510 encode_cbor = 0
1511 encode_cbor_sharing = F_ALLOW_SHARING
971 PPCODE: 1512 PPCODE:
972{ 1513{
973 CBOR cbor; 1514 CBOR cbor;
974 cbor_init (&cbor); 1515 cbor_init (&cbor);
1516 cbor.flags |= ix;
975 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN; 1517 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
976 XPUSHs (scalar); 1518 XPUSHs (scalar);
977} 1519}
978 1520
979void decode_cbor (SV *cborstr) 1521void decode_cbor (SV *cborstr)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines