ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.20
Committed: Wed Nov 20 11:06:42 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.19: +91 -13 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #include <assert.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <limits.h>
10 #include <float.h>
11
12 #include "ecb.h"
13
14 // compatibility with perl <5.18
15 #ifndef HvNAMELEN_get
16 # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17 #endif
18 #ifndef HvNAMELEN
19 # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20 #endif
21 #ifndef HvNAMEUTF8
22 # define HvNAMEUTF8(hv) 0
23 #endif
24
25 // known tags
26 enum cbor_tag
27 {
28 // inofficial extensions (pending iana registration)
29 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30 CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing
32 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33 CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34 CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
36
37 // rfc7049
38 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
39 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
40 CBOR_TAG_POS_BIGNUM = 2, // byte string
41 CBOR_TAG_NEG_BIGNUM = 3, // byte string
42 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
43 CBOR_TAG_BIGFLOAT = 5, // array
44
45 CBOR_TAG_CONV_B64U = 21, // base64url, any
46 CBOR_TAG_CONV_B64 = 22, // base64, any
47 CBOR_TAG_CONV_HEX = 23, // base16, any
48 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
49
50 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
51 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
52 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
53 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
54 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55
56 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57 };
58
59 #define F_SHRINK 0x00000001UL
60 #define F_ALLOW_UNKNOWN 0x00000002UL
61 #define F_ALLOW_SHARING 0x00000004UL //TODO
62 #define F_DEDUP_STRINGS 0x00000008UL //TODO
63 #define F_DEDUP_KEYS 0x00000010UL //TODO
64
65 #define INIT_SIZE 32 // initial scalar size to be allocated
66
67 #define SB do {
68 #define SE } while (0)
69
70 #define IN_RANGE_INC(type,val,beg,end) \
71 ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
72 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
73
74 #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
75
76 #ifdef USE_ITHREADS
77 # define CBOR_SLOW 1
78 # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
79 #else
80 # define CBOR_SLOW 0
81 # define CBOR_STASH cbor_stash
82 #endif
83
84 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
85 static SV *types_true, *types_false, *types_error, *sv_cbor;
86
87 typedef struct {
88 U32 flags;
89 U32 max_depth;
90 STRLEN max_size;
91 } CBOR;
92
93 ecb_inline void
94 cbor_init (CBOR *cbor)
95 {
96 Zero (cbor, 1, CBOR);
97 cbor->max_depth = 512;
98 }
99
100 /////////////////////////////////////////////////////////////////////////////
101 // utility functions
102
103 ecb_inline SV *
104 get_bool (const char *name)
105 {
106 SV *sv = get_sv (name, 1);
107
108 SvREADONLY_on (sv);
109 SvREADONLY_on (SvRV (sv));
110
111 return sv;
112 }
113
114 ecb_inline void
115 shrink (SV *sv)
116 {
117 sv_utf8_downgrade (sv, 1);
118
119 if (SvLEN (sv) > SvCUR (sv) + 1)
120 {
121 #ifdef SvPV_shrink_to_cur
122 SvPV_shrink_to_cur (sv);
123 #elif defined (SvPV_renew)
124 SvPV_renew (sv, SvCUR (sv) + 1);
125 #endif
126 }
127 }
128
129 /////////////////////////////////////////////////////////////////////////////
130 // encoder
131
132 // structure used for encoding CBOR
133 typedef struct
134 {
135 char *cur; // SvPVX (sv) + current output position
136 char *end; // SvEND (sv)
137 SV *sv; // result scalar
138 CBOR cbor;
139 U32 depth; // recursion level
140 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
141 UV stringref_idx;
142 HV *shareable; // ptr => index, or 0
143 UV shareable_idx;
144 } enc_t;
145
146 ecb_inline void
147 need (enc_t *enc, STRLEN len)
148 {
149 if (ecb_expect_false (enc->cur + len >= enc->end))
150 {
151 STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
152 SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
153 enc->cur = SvPVX (enc->sv) + cur;
154 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
155 }
156 }
157
158 ecb_inline void
159 encode_ch (enc_t *enc, char ch)
160 {
161 need (enc, 1);
162 *enc->cur++ = ch;
163 }
164
165 static void
166 encode_uint (enc_t *enc, int major, UV len)
167 {
168 need (enc, 9);
169
170 if (len < 24)
171 *enc->cur++ = major | len;
172 else if (len <= 0xff)
173 {
174 *enc->cur++ = major | 24;
175 *enc->cur++ = len;
176 }
177 else if (len <= 0xffff)
178 {
179 *enc->cur++ = major | 25;
180 *enc->cur++ = len >> 8;
181 *enc->cur++ = len;
182 }
183 else if (len <= 0xffffffff)
184 {
185 *enc->cur++ = major | 26;
186 *enc->cur++ = len >> 24;
187 *enc->cur++ = len >> 16;
188 *enc->cur++ = len >> 8;
189 *enc->cur++ = len;
190 }
191 else
192 {
193 *enc->cur++ = major | 27;
194 *enc->cur++ = len >> 56;
195 *enc->cur++ = len >> 48;
196 *enc->cur++ = len >> 40;
197 *enc->cur++ = len >> 32;
198 *enc->cur++ = len >> 24;
199 *enc->cur++ = len >> 16;
200 *enc->cur++ = len >> 8;
201 *enc->cur++ = len;
202 }
203 }
204
205 static void
206 encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
207 {
208 encode_uint (enc, utf8 ? 0x60 : 0x40, len);
209 need (enc, len);
210 memcpy (enc->cur, str, len);
211 enc->cur += len;
212 }
213
214 ecb_inline void
215 encode_tag (enc_t *enc, UV tag)
216 {
217 encode_uint (enc, 0xc0, tag);
218 }
219
220 static void encode_sv (enc_t *enc, SV *sv);
221
222 static void
223 encode_av (enc_t *enc, AV *av)
224 {
225 int i, len = av_len (av);
226
227 if (enc->depth >= enc->cbor.max_depth)
228 croak (ERR_NESTING_EXCEEDED);
229
230 ++enc->depth;
231
232 encode_uint (enc, 0x80, len + 1);
233
234 for (i = 0; i <= len; ++i)
235 {
236 SV **svp = av_fetch (av, i, 0);
237 encode_sv (enc, svp ? *svp : &PL_sv_undef);
238 }
239
240 --enc->depth;
241 }
242
243 ecb_inline void
244 encode_he (enc_t *enc, HE *he)
245 {
246 if (HeKLEN (he) == HEf_SVKEY)
247 encode_sv (enc, HeSVKEY (he));
248 else
249 encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
250 }
251
252 static void
253 encode_hv (enc_t *enc, HV *hv)
254 {
255 HE *he;
256
257 if (enc->depth >= enc->cbor.max_depth)
258 croak (ERR_NESTING_EXCEEDED);
259
260 ++enc->depth;
261
262 int pairs = hv_iterinit (hv);
263 int mg = SvMAGICAL (hv);
264
265 if (mg)
266 encode_ch (enc, 0xa0 | 31);
267 else
268 encode_uint (enc, 0xa0, pairs);
269
270 while ((he = hv_iternext (hv)))
271 {
272 if (ecb_expect_false (enc->cbor.flags & (F_DEDUP_STRINGS | F_DEDUP_KEYS)))
273 {
274 SV **svp;
275
276 if (HeKLEN (he) == HEf_SVKEY)
277 svp = hv_fetch_ent (enc->stringref[!! SvUTF8 (HeSVKEY (he))], HeSVKEY (he) , 1, 0);//TODO return HE :/
278 else
279 svp = hv_fetch (enc->stringref[!! HeKUTF8 (he) ], HeKEY (he), HeKLEN (he), 1);
280
281 if (SvOK (*svp))
282 {
283 encode_tag (enc, CBOR_TAG_STRINGREF);
284 encode_uint (enc, 0x00, SvUV (*svp));
285 }
286 else
287 {
288 sv_setuv (*svp, enc->stringref_idx);
289 ++enc->stringref_idx;
290 encode_he (enc, he);
291 }
292 }
293 else
294 encode_he (enc, he);
295
296 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
297 }
298
299 if (mg)
300 encode_ch (enc, 0xe0 | 31);
301
302 --enc->depth;
303 }
304
305 // encode objects, arrays and special \0=false and \1=true values.
306 static void
307 encode_rv (enc_t *enc, SV *sv)
308 {
309 SvGETMAGIC (sv);
310
311 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
312 && ecb_expect_false (SvREFCNT (sv) > 1))
313 {
314 if (!enc->shareable)
315 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
316
317 SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
318
319 if (SvOK (*svp))
320 {
321 encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
322 encode_uint (enc, 0x00, SvUV (*svp));
323 return;
324 }
325 else
326 {
327 sv_setuv (*svp, enc->shareable_idx);
328 ++enc->shareable_idx;
329 encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
330 }
331 }
332
333 svtype svt = SvTYPE (sv);
334
335 if (ecb_expect_false (SvOBJECT (sv)))
336 {
337 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
338 ? types_boolean_stash
339 : gv_stashpv ("Types::Serialiser::Boolean", 1);
340 HV *error_stash = !CBOR_SLOW || types_error_stash
341 ? types_error_stash
342 : gv_stashpv ("Types::Serialiser::Error", 1);
343 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
344 ? cbor_tagged_stash
345 : gv_stashpv ("CBOR::XS::Tagged" , 1);
346
347 HV *stash = SvSTASH (sv);
348 GV *method;
349
350 if (stash == boolean_stash)
351 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
352 else if (stash == error_stash)
353 encode_ch (enc, 0xe0 | 23);
354 else if (stash == tagged_stash)
355 {
356 if (svt != SVt_PVAV)
357 croak ("encountered CBOR::XS::Tagged object that isn't an array");
358
359 encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
360 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
361 }
362 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
363 {
364 dSP;
365
366 ENTER; SAVETMPS; PUSHMARK (SP);
367 // we re-bless the reference to get overload and other niceties right
368 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
369
370 PUTBACK;
371 // G_SCALAR ensures that return value is 1
372 call_sv ((SV *)GvCV (method), G_SCALAR);
373 SPAGAIN;
374
375 // catch this surprisingly common error
376 if (SvROK (TOPs) && SvRV (TOPs) == sv)
377 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
378
379 encode_sv (enc, POPs);
380
381 PUTBACK;
382
383 FREETMPS; LEAVE;
384 }
385 else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
386 {
387 dSP;
388
389 ENTER; SAVETMPS; PUSHMARK (SP);
390 EXTEND (SP, 2);
391 // we re-bless the reference to get overload and other niceties right
392 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
393 PUSHs (sv_cbor);
394
395 PUTBACK;
396 int count = call_sv ((SV *)GvCV (method), G_ARRAY);
397 SPAGAIN;
398
399 // catch this surprisingly common error
400 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
401 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
402
403 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
404 encode_uint (enc, 0x80, count + 1);
405 encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
406
407 while (count)
408 encode_sv (enc, SP[1 - count--]);
409
410 PUTBACK;
411
412 FREETMPS; LEAVE;
413 }
414 else
415 croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
416 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
417 }
418 else if (svt == SVt_PVHV)
419 encode_hv (enc, (HV *)sv);
420 else if (svt == SVt_PVAV)
421 encode_av (enc, (AV *)sv);
422 else
423 {
424 encode_tag (enc, CBOR_TAG_INDIRECTION);
425 encode_sv (enc, sv);
426 }
427 }
428
429 static void
430 encode_nv (enc_t *enc, SV *sv)
431 {
432 double nv = SvNVX (sv);
433
434 need (enc, 9);
435
436 if (ecb_expect_false (nv == (U32)nv))
437 encode_uint (enc, 0x00, (U32)nv);
438 //TODO: maybe I32?
439 else if (ecb_expect_false (nv == (float)nv))
440 {
441 uint32_t fp = ecb_float_to_binary32 (nv);
442
443 *enc->cur++ = 0xe0 | 26;
444
445 if (!ecb_big_endian ())
446 fp = ecb_bswap32 (fp);
447
448 memcpy (enc->cur, &fp, 4);
449 enc->cur += 4;
450 }
451 else
452 {
453 uint64_t fp = ecb_double_to_binary64 (nv);
454
455 *enc->cur++ = 0xe0 | 27;
456
457 if (!ecb_big_endian ())
458 fp = ecb_bswap64 (fp);
459
460 memcpy (enc->cur, &fp, 8);
461 enc->cur += 8;
462 }
463 }
464
465 static void
466 encode_sv (enc_t *enc, SV *sv)
467 {
468 SvGETMAGIC (sv);
469
470 if (SvPOKp (sv))
471 {
472 STRLEN len;
473 char *str = SvPV (sv, len);
474 encode_str (enc, SvUTF8 (sv), str, len);
475 }
476 else if (SvNOKp (sv))
477 encode_nv (enc, sv);
478 else if (SvIOKp (sv))
479 {
480 if (SvIsUV (sv))
481 encode_uint (enc, 0x00, SvUVX (sv));
482 else if (SvIVX (sv) >= 0)
483 encode_uint (enc, 0x00, SvIVX (sv));
484 else
485 encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
486 }
487 else if (SvROK (sv))
488 encode_rv (enc, SvRV (sv));
489 else if (!SvOK (sv))
490 encode_ch (enc, 0xe0 | 22);
491 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
492 encode_ch (enc, 0xe0 | 23);
493 else
494 croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
495 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
496 }
497
498 static SV *
499 encode_cbor (SV *scalar, CBOR *cbor)
500 {
501 enc_t enc = { };
502
503 enc.cbor = *cbor;
504 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
505 enc.cur = SvPVX (enc.sv);
506 enc.end = SvEND (enc.sv);
507
508 SvPOK_only (enc.sv);
509
510 if (cbor->flags & (F_DEDUP_STRINGS | F_DEDUP_KEYS))
511 {
512 encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
513 enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
514 enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
515 }
516
517 encode_sv (&enc, scalar);
518
519 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
520 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
521
522 if (enc.cbor.flags & F_SHRINK)
523 shrink (enc.sv);
524
525 return enc.sv;
526 }
527
528 /////////////////////////////////////////////////////////////////////////////
529 // decoder
530
531 // structure used for decoding CBOR
532 typedef struct
533 {
534 U8 *cur; // current parser pointer
535 U8 *end; // end of input string
536 const char *err; // parse error, if != 0
537 CBOR cbor;
538 U32 depth; // recursion depth
539 U32 maxdepth; // recursion depth limit
540 AV *shareable;
541 AV *stringref;
542 } dec_t;
543
544 #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
545
546 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
547
548 #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
549 #define DEC_DEC_DEPTH --dec->depth
550
551 static UV
552 decode_uint (dec_t *dec)
553 {
554 switch (*dec->cur & 31)
555 {
556 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
557 case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
558 case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
559 return *dec->cur++ & 31;
560
561 case 24:
562 WANT (2);
563 dec->cur += 2;
564 return dec->cur[-1];
565
566 case 25:
567 WANT (3);
568 dec->cur += 3;
569 return (((UV)dec->cur[-2]) << 8)
570 | ((UV)dec->cur[-1]);
571
572 case 26:
573 WANT (5);
574 dec->cur += 5;
575 return (((UV)dec->cur[-4]) << 24)
576 | (((UV)dec->cur[-3]) << 16)
577 | (((UV)dec->cur[-2]) << 8)
578 | ((UV)dec->cur[-1]);
579
580 case 27:
581 WANT (9);
582 dec->cur += 9;
583 return (((UV)dec->cur[-8]) << 56)
584 | (((UV)dec->cur[-7]) << 48)
585 | (((UV)dec->cur[-6]) << 40)
586 | (((UV)dec->cur[-5]) << 32)
587 | (((UV)dec->cur[-4]) << 24)
588 | (((UV)dec->cur[-3]) << 16)
589 | (((UV)dec->cur[-2]) << 8)
590 | ((UV)dec->cur[-1]);
591
592 default:
593 ERR ("corrupted CBOR data (unsupported integer minor encoding)");
594 }
595
596 fail:
597 return 0;
598 }
599
600 static SV *decode_sv (dec_t *dec);
601
602 static SV *
603 decode_av (dec_t *dec)
604 {
605 AV *av = newAV ();
606
607 DEC_INC_DEPTH;
608
609 if ((*dec->cur & 31) == 31)
610 {
611 ++dec->cur;
612
613 for (;;)
614 {
615 WANT (1);
616
617 if (*dec->cur == (0xe0 | 31))
618 {
619 ++dec->cur;
620 break;
621 }
622
623 av_push (av, decode_sv (dec));
624 }
625 }
626 else
627 {
628 int i, len = decode_uint (dec);
629
630 av_fill (av, len - 1);
631
632 for (i = 0; i < len; ++i)
633 AvARRAY (av)[i] = decode_sv (dec);
634 }
635
636 DEC_DEC_DEPTH;
637 return newRV_noinc ((SV *)av);
638
639 fail:
640 SvREFCNT_dec (av);
641 DEC_DEC_DEPTH;
642 return &PL_sv_undef;
643 }
644
645 static void
646 decode_he (dec_t *dec, HV *hv)
647 {
648 // for speed reasons, we specialcase single-string
649 // byte or utf-8 strings as keys.
650
651 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
652 {
653 I32 len = decode_uint (dec);
654 char *key = (char *)dec->cur;
655
656 dec->cur += len;
657
658 if (ecb_expect_false (dec->stringref))
659 av_push (dec->stringref, newSVpvn (key, len));
660
661 hv_store (hv, key, len, decode_sv (dec), 0);
662 }
663 else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
664 {
665 I32 len = decode_uint (dec);
666 char *key = (char *)dec->cur;
667
668 dec->cur += len;
669
670 if (ecb_expect_false (dec->stringref))
671 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
672
673 hv_store (hv, key, -len, decode_sv (dec), 0);
674 }
675 else
676 {
677 SV *k = decode_sv (dec);
678 SV *v = decode_sv (dec);
679
680 hv_store_ent (hv, k, v, 0);
681 SvREFCNT_dec (k);
682 }
683 }
684
685 static SV *
686 decode_hv (dec_t *dec)
687 {
688 HV *hv = newHV ();
689
690 DEC_INC_DEPTH;
691
692 if ((*dec->cur & 31) == 31)
693 {
694 ++dec->cur;
695
696 for (;;)
697 {
698 WANT (1);
699
700 if (*dec->cur == (0xe0 | 31))
701 {
702 ++dec->cur;
703 break;
704 }
705
706 decode_he (dec, hv);
707 }
708 }
709 else
710 {
711 int pairs = decode_uint (dec);
712
713 while (pairs--)
714 decode_he (dec, hv);
715 }
716
717 DEC_DEC_DEPTH;
718 return newRV_noinc ((SV *)hv);
719
720 fail:
721 SvREFCNT_dec (hv);
722 DEC_DEC_DEPTH;
723 return &PL_sv_undef;
724 }
725
726 static SV *
727 decode_str (dec_t *dec, int utf8)
728 {
729 SV *sv = 0;
730
731 if ((*dec->cur & 31) == 31)
732 {
733 ++dec->cur;
734
735 sv = newSVpvn ("", 0);
736
737 // not very fast, and certainly not robust against illegal input
738 for (;;)
739 {
740 WANT (1);
741
742 if (*dec->cur == (0xe0 | 31))
743 {
744 ++dec->cur;
745 break;
746 }
747
748 sv_catsv (sv, decode_sv (dec));
749 }
750 }
751 else
752 {
753 STRLEN len = decode_uint (dec);
754
755 WANT (len);
756 sv = newSVpvn (dec->cur, len);
757 dec->cur += len;
758 }
759
760 if (utf8)
761 SvUTF8_on (sv);
762
763 if (ecb_expect_false (dec->stringref))
764 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
765
766 return sv;
767
768 fail:
769 SvREFCNT_dec (sv);
770 return &PL_sv_undef;
771 }
772
773 static SV *
774 decode_tagged (dec_t *dec)
775 {
776 SV *sv = 0;
777 UV tag = decode_uint (dec);
778
779 WANT (1);
780
781 switch (tag)
782 {
783 case CBOR_TAG_MAGIC:
784 sv = decode_sv (dec);
785 break;
786
787 case CBOR_TAG_INDIRECTION:
788 sv = newRV_noinc (decode_sv (dec));
789 break;
790
791 case CBOR_TAG_STRINGREF_NAMESPACE:
792 {
793 ENTER; SAVETMPS;
794
795 SAVESPTR (dec->stringref);
796 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
797
798 sv = decode_sv (dec);
799
800 FREETMPS; LEAVE;
801 }
802 break;
803
804 case CBOR_TAG_STRINGREF:
805 {
806 if ((*dec->cur >> 5) != 0)
807 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
808
809 UV idx = decode_uint (dec);
810
811 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
812 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
813
814 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
815 }
816 break;
817
818 case CBOR_TAG_VALUE_SHAREABLE:
819 {
820 if (ecb_expect_false (!dec->shareable))
821 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
822
823 sv = newSV (0);
824 av_push (dec->shareable, SvREFCNT_inc_NN (sv));
825
826 SV *osv = decode_sv (dec);
827 sv_setsv (sv, osv);
828 SvREFCNT_dec_NN (osv);
829 }
830 break;
831
832 case CBOR_TAG_VALUE_SHAREDREF:
833 {
834 if ((*dec->cur >> 5) != 0)
835 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
836
837 UV idx = decode_uint (dec);
838
839 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
840 ERR ("corrupted CBOR data (sharedref index out of bounds)");
841
842 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
843 }
844 break;
845
846 case CBOR_TAG_PERL_OBJECT:
847 {
848 sv = decode_sv (dec);
849
850 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
851 ERR ("corrupted CBOR data (non-array perl object)");
852
853 AV *av = (AV *)SvRV (sv);
854 int len = av_len (av) + 1;
855 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
856
857 if (!stash)
858 ERR ("cannot decode perl-object (package does not exist)");
859
860 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
861
862 if (!method)
863 ERR ("cannot decode perl-object (package does not have a THAW method)");
864
865 dSP;
866
867 ENTER; SAVETMPS; PUSHMARK (SP);
868 EXTEND (SP, len + 1);
869 // we re-bless the reference to get overload and other niceties right
870 PUSHs (*av_fetch (av, 0, 1));
871 PUSHs (sv_cbor);
872
873 int i;
874
875 for (i = 1; i < len; ++i)
876 PUSHs (*av_fetch (av, i, 1));
877
878 PUTBACK;
879 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
880 SPAGAIN;
881
882 if (SvTRUE (ERRSV))
883 {
884 FREETMPS; LEAVE;
885 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
886 }
887
888 SvREFCNT_dec (sv);
889 sv = SvREFCNT_inc (POPs);
890
891 PUTBACK;
892
893 FREETMPS; LEAVE;
894 }
895 break;
896
897 default:
898 {
899 sv = decode_sv (dec);
900
901 AV *av = newAV ();
902 av_push (av, newSVuv (tag));
903 av_push (av, sv);
904
905 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
906 ? cbor_tagged_stash
907 : gv_stashpv ("CBOR::XS::Tagged" , 1);
908
909 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
910 }
911 break;
912 }
913
914 return sv;
915
916 fail:
917 SvREFCNT_dec (sv);
918 return &PL_sv_undef;
919 }
920
921 static SV *
922 decode_sv (dec_t *dec)
923 {
924 WANT (1);
925
926 switch (*dec->cur >> 5)
927 {
928 case 0: // unsigned int
929 return newSVuv (decode_uint (dec));
930 case 1: // negative int
931 return newSViv (-1 - (IV)decode_uint (dec));
932 case 2: // octet string
933 return decode_str (dec, 0);
934 case 3: // utf-8 string
935 return decode_str (dec, 1);
936 case 4: // array
937 return decode_av (dec);
938 case 5: // map
939 return decode_hv (dec);
940 case 6: // tag
941 return decode_tagged (dec);
942 case 7: // misc
943 switch (*dec->cur++ & 31)
944 {
945 case 20:
946 #if CBOR_SLOW
947 types_false = get_bool ("Types::Serialiser::false");
948 #endif
949 return newSVsv (types_false);
950 case 21:
951 #if CBOR_SLOW
952 types_true = get_bool ("Types::Serialiser::true");
953 #endif
954 return newSVsv (types_true);
955 case 22:
956 return newSVsv (&PL_sv_undef);
957 case 23:
958 #if CBOR_SLOW
959 types_error = get_bool ("Types::Serialiser::error");
960 #endif
961 return newSVsv (types_error);
962
963 case 25:
964 {
965 WANT (2);
966
967 uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
968 dec->cur += 2;
969
970 return newSVnv (ecb_binary16_to_float (fp));
971 }
972
973 case 26:
974 {
975 uint32_t fp;
976 WANT (4);
977 memcpy (&fp, dec->cur, 4);
978 dec->cur += 4;
979
980 if (!ecb_big_endian ())
981 fp = ecb_bswap32 (fp);
982
983 return newSVnv (ecb_binary32_to_float (fp));
984 }
985
986 case 27:
987 {
988 uint64_t fp;
989 WANT (8);
990 memcpy (&fp, dec->cur, 8);
991 dec->cur += 8;
992
993 if (!ecb_big_endian ())
994 fp = ecb_bswap64 (fp);
995
996 return newSVnv (ecb_binary64_to_double (fp));
997 }
998
999 // 0..19 unassigned
1000 // 24 reserved + unassigned (reserved values are not encodable)
1001 default:
1002 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
1003 }
1004
1005 break;
1006 }
1007
1008 fail:
1009 return &PL_sv_undef;
1010 }
1011
1012 static SV *
1013 decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1014 {
1015 dec_t dec = { };
1016 SV *sv;
1017 STRLEN len;
1018 char *data = SvPVbyte (string, len);
1019
1020 if (len > cbor->max_size && cbor->max_size)
1021 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1022 (unsigned long)len, (unsigned long)cbor->max_size);
1023
1024 dec.cbor = *cbor;
1025 dec.cur = (U8 *)data;
1026 dec.end = (U8 *)data + len;
1027
1028 sv = decode_sv (&dec);
1029
1030 if (offset_return)
1031 *offset_return = dec.cur;
1032
1033 if (!(offset_return || !sv))
1034 if (dec.cur != dec.end && !dec.err)
1035 dec.err = "garbage after CBOR object";
1036
1037 if (dec.err)
1038 {
1039 SvREFCNT_dec (sv);
1040 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1041 }
1042
1043 sv = sv_2mortal (sv);
1044
1045 return sv;
1046 }
1047
1048 /////////////////////////////////////////////////////////////////////////////
1049 // XS interface functions
1050
1051 MODULE = CBOR::XS PACKAGE = CBOR::XS
1052
1053 BOOT:
1054 {
1055 cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1056 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1057
1058 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1059 types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1060
1061 types_true = get_bool ("Types::Serialiser::true" );
1062 types_false = get_bool ("Types::Serialiser::false");
1063 types_error = get_bool ("Types::Serialiser::error");
1064
1065 sv_cbor = newSVpv ("CBOR", 0);
1066 SvREADONLY_on (sv_cbor);
1067 }
1068
1069 PROTOTYPES: DISABLE
1070
1071 void CLONE (...)
1072 CODE:
1073 cbor_stash = 0;
1074 cbor_tagged_stash = 0;
1075 types_error_stash = 0;
1076 types_boolean_stash = 0;
1077
1078 void new (char *klass)
1079 PPCODE:
1080 {
1081 SV *pv = NEWSV (0, sizeof (CBOR));
1082 SvPOK_only (pv);
1083 cbor_init ((CBOR *)SvPVX (pv));
1084 XPUSHs (sv_2mortal (sv_bless (
1085 newRV_noinc (pv),
1086 strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1087 )));
1088 }
1089
1090 void shrink (CBOR *self, int enable = 1)
1091 ALIAS:
1092 shrink = F_SHRINK
1093 allow_unknown = F_ALLOW_UNKNOWN
1094 allow_sharing = F_ALLOW_SHARING
1095 dedup_keys = F_DEDUP_KEYS
1096 dedup_strings = F_DEDUP_STRINGS
1097 PPCODE:
1098 {
1099 if (enable)
1100 self->flags |= ix;
1101 else
1102 self->flags &= ~ix;
1103
1104 XPUSHs (ST (0));
1105 }
1106
1107 void get_shrink (CBOR *self)
1108 ALIAS:
1109 get_shrink = F_SHRINK
1110 get_allow_unknown = F_ALLOW_UNKNOWN
1111 get_allow_sharing = F_ALLOW_SHARING
1112 get_dedup_keys = F_DEDUP_KEYS
1113 get_dedup_strings = F_DEDUP_STRINGS
1114 PPCODE:
1115 XPUSHs (boolSV (self->flags & ix));
1116
1117 void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1118 PPCODE:
1119 self->max_depth = max_depth;
1120 XPUSHs (ST (0));
1121
1122 U32 get_max_depth (CBOR *self)
1123 CODE:
1124 RETVAL = self->max_depth;
1125 OUTPUT:
1126 RETVAL
1127
1128 void max_size (CBOR *self, U32 max_size = 0)
1129 PPCODE:
1130 self->max_size = max_size;
1131 XPUSHs (ST (0));
1132
1133 int get_max_size (CBOR *self)
1134 CODE:
1135 RETVAL = self->max_size;
1136 OUTPUT:
1137 RETVAL
1138
1139 void encode (CBOR *self, SV *scalar)
1140 PPCODE:
1141 PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1142 XPUSHs (scalar);
1143
1144 void decode (CBOR *self, SV *cborstr)
1145 PPCODE:
1146 PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1147 XPUSHs (cborstr);
1148
1149 void decode_prefix (CBOR *self, SV *cborstr)
1150 PPCODE:
1151 {
1152 SV *sv;
1153 char *offset;
1154 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1155 EXTEND (SP, 2);
1156 PUSHs (sv);
1157 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1158 }
1159
1160 PROTOTYPES: ENABLE
1161
1162 void encode_cbor (SV *scalar)
1163 PPCODE:
1164 {
1165 CBOR cbor;
1166 cbor_init (&cbor);
1167 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1168 XPUSHs (scalar);
1169 }
1170
1171 void decode_cbor (SV *cborstr)
1172 PPCODE:
1173 {
1174 CBOR cbor;
1175 cbor_init (&cbor);
1176 PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1177 XPUSHs (cborstr);
1178 }
1179