ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.69
Committed: Mon Nov 30 20:38:25 2020 UTC (3 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-1_82
Changes since 1.68: +9 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.35 // known major and minor types
34     enum cbor_type
35     {
36     MAJOR_SHIFT = 5,
37     MINOR_MASK = 0x1f,
38    
39     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41     MAJOR_BYTES = 2 << MAJOR_SHIFT,
42     MAJOR_TEXT = 3 << MAJOR_SHIFT,
43     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44     MAJOR_MAP = 5 << MAJOR_SHIFT,
45     MAJOR_TAG = 6 << MAJOR_SHIFT,
46     MAJOR_MISC = 7 << MAJOR_SHIFT,
47    
48     // INT/STRING/ARRAY/MAP subtypes
49     LENGTH_EXT1 = 24,
50     LENGTH_EXT2 = 25,
51     LENGTH_EXT4 = 26,
52     LENGTH_EXT8 = 27,
53    
54     // SIMPLE types (effectively MISC subtypes)
55     SIMPLE_FALSE = 20,
56     SIMPLE_TRUE = 21,
57     SIMPLE_NULL = 22,
58     SIMPLE_UNDEF = 23,
59    
60     // MISC subtype (unused)
61     MISC_EXT1 = 24,
62     MISC_FLOAT16 = 25,
63     MISC_FLOAT32 = 26,
64     MISC_FLOAT64 = 27,
65    
66     // BYTES/TEXT/ARRAY/MAP
67     MINOR_INDEF = 31,
68     };
69    
70 root 1.9 // known tags
71 root 1.8 enum cbor_tag
72     {
73 root 1.35 // extensions
74     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81    
82     // rfc7049
83     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85     CBOR_TAG_POS_BIGNUM = 2, // byte string
86     CBOR_TAG_NEG_BIGNUM = 3, // byte string
87     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88     CBOR_TAG_BIGFLOAT = 5, // array
89    
90     CBOR_TAG_CONV_B64U = 21, // base64url, any
91     CBOR_TAG_CONV_B64 = 22, // base64, any
92     CBOR_TAG_CONV_HEX = 23, // base16, any
93     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94    
95     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100 root 1.8
101 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 root 1.8 };
103    
104 root 1.66 // known forced types, also hardcoded in CBOR.pm
105     enum
106     {
107     AS_CBOR = 0,
108 root 1.68 AS_INT = 1,
109     AS_BYTES = 2,
110     AS_TEXT = 3,
111 root 1.67 AS_FLOAT16 = 4,
112     AS_FLOAT32 = 5,
113     AS_FLOAT64 = 6,
114     // possibly future enhancements: (generic) float, (generic) string
115 root 1.66 };
116    
117 root 1.24 #define F_SHRINK 0x00000001UL
118     #define F_ALLOW_UNKNOWN 0x00000002UL
119 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
120 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
121 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
122     #define F_PACK_STRINGS 0x00000020UL
123     #define F_TEXT_KEYS 0x00000040UL
124     #define F_TEXT_STRINGS 0x00000080UL
125     #define F_VALIDATE_UTF8 0x00000100UL
126 root 1.1
127     #define INIT_SIZE 32 // initial scalar size to be allocated
128    
129     #define SB do {
130     #define SE } while (0)
131    
132     #define IN_RANGE_INC(type,val,beg,end) \
133     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
134     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
135    
136     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
137    
138     #ifdef USE_ITHREADS
139     # define CBOR_SLOW 1
140     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
141     #else
142     # define CBOR_SLOW 0
143     # define CBOR_STASH cbor_stash
144     #endif
145    
146 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
147 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
148 root 1.1
149     typedef struct {
150     U32 flags;
151     U32 max_depth;
152     STRLEN max_size;
153 root 1.27 SV *filter;
154 root 1.40
155     // for the incremental parser
156     STRLEN incr_pos; // the current offset into the text
157     STRLEN incr_need; // minimum bytes needed to decode
158     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
159 root 1.1 } CBOR;
160    
161 root 1.5 ecb_inline void
162 root 1.1 cbor_init (CBOR *cbor)
163     {
164     Zero (cbor, 1, CBOR);
165     cbor->max_depth = 512;
166     }
167    
168 root 1.27 ecb_inline void
169     cbor_free (CBOR *cbor)
170     {
171     SvREFCNT_dec (cbor->filter);
172 root 1.40 SvREFCNT_dec (cbor->incr_count);
173 root 1.27 }
174    
175 root 1.1 /////////////////////////////////////////////////////////////////////////////
176     // utility functions
177    
178 root 1.5 ecb_inline SV *
179 root 1.1 get_bool (const char *name)
180     {
181     SV *sv = get_sv (name, 1);
182    
183     SvREADONLY_on (sv);
184     SvREADONLY_on (SvRV (sv));
185    
186     return sv;
187     }
188    
189 root 1.5 ecb_inline void
190 root 1.1 shrink (SV *sv)
191     {
192     sv_utf8_downgrade (sv, 1);
193    
194     if (SvLEN (sv) > SvCUR (sv) + 1)
195     {
196     #ifdef SvPV_shrink_to_cur
197     SvPV_shrink_to_cur (sv);
198     #elif defined (SvPV_renew)
199     SvPV_renew (sv, SvCUR (sv) + 1);
200     #endif
201     }
202     }
203    
204 root 1.21 // minimum length of a string to be registered for stringref
205     ecb_inline int
206     minimum_string_length (UV idx)
207     {
208 root 1.61 return idx <= 23 ? 3
209     : idx <= 0xffU ? 4
210     : idx <= 0xffffU ? 5
211     : idx <= 0xffffffffU ? 7
212     : 11;
213 root 1.21 }
214    
215 root 1.1 /////////////////////////////////////////////////////////////////////////////
216     // encoder
217    
218     // structure used for encoding CBOR
219     typedef struct
220     {
221     char *cur; // SvPVX (sv) + current output position
222     char *end; // SvEND (sv)
223     SV *sv; // result scalar
224     CBOR cbor;
225     U32 depth; // recursion level
226 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
227     UV stringref_idx;
228 root 1.19 HV *shareable; // ptr => index, or 0
229     UV shareable_idx;
230 root 1.1 } enc_t;
231    
232 root 1.5 ecb_inline void
233 root 1.1 need (enc_t *enc, STRLEN len)
234     {
235 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
236 root 1.1 {
237     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
238     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
239     enc->cur = SvPVX (enc->sv) + cur;
240     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
241     }
242     }
243    
244 root 1.5 ecb_inline void
245 root 1.1 encode_ch (enc_t *enc, char ch)
246     {
247     need (enc, 1);
248     *enc->cur++ = ch;
249     }
250    
251 root 1.66 // used for tags, intregers, element counts and so on
252 root 1.1 static void
253     encode_uint (enc_t *enc, int major, UV len)
254     {
255     need (enc, 9);
256    
257 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
258 root 1.1 *enc->cur++ = major | len;
259 root 1.36 else if (ecb_expect_true (len <= 0xffU))
260 root 1.1 {
261 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
262 root 1.1 *enc->cur++ = len;
263     }
264 root 1.36 else if (len <= 0xffffU)
265 root 1.1 {
266 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
267 root 1.1 *enc->cur++ = len >> 8;
268     *enc->cur++ = len;
269     }
270 root 1.36 else if (len <= 0xffffffffU)
271 root 1.1 {
272 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
273 root 1.1 *enc->cur++ = len >> 24;
274     *enc->cur++ = len >> 16;
275     *enc->cur++ = len >> 8;
276     *enc->cur++ = len;
277     }
278 root 1.4 else
279 root 1.1 {
280 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
281 root 1.1 *enc->cur++ = len >> 56;
282     *enc->cur++ = len >> 48;
283     *enc->cur++ = len >> 40;
284     *enc->cur++ = len >> 32;
285     *enc->cur++ = len >> 24;
286     *enc->cur++ = len >> 16;
287     *enc->cur++ = len >> 8;
288     *enc->cur++ = len;
289     }
290     }
291    
292 root 1.66 // encodes a perl value into a CBOR integer
293 root 1.67 ecb_inline void
294 root 1.66 encode_int (enc_t *enc, SV *sv)
295     {
296     if (SvIsUV (sv))
297     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
298     else if (SvIVX (sv) >= 0)
299     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
300     else
301     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
302     }
303    
304 root 1.21 ecb_inline void
305     encode_tag (enc_t *enc, UV tag)
306     {
307 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
308 root 1.21 }
309    
310 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
311     ecb_noinline static void
312     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
313     {
314     STRLEN ulen = len;
315     U8 *p, *pend = (U8 *)str + len;
316    
317     for (p = (U8 *)str; p < pend; ++p)
318     ulen += *p >> 7; // count set high bits
319    
320     encode_uint (enc, MAJOR_TEXT, ulen);
321    
322     need (enc, ulen);
323     for (p = (U8 *)str; p < pend; ++p)
324     if (*p < 0x80)
325     *enc->cur++ = *p;
326     else
327     {
328     *enc->cur++ = 0xc0 + (*p >> 6);
329     *enc->cur++ = 0x80 + (*p & 63);
330     }
331     }
332    
333 root 1.30 ecb_inline void
334 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
335 root 1.30 {
336 root 1.53 if (ecb_expect_false (upgrade_utf8))
337 root 1.51 if (!utf8)
338     {
339 root 1.53 encode_str_utf8 (enc, utf8, str, len);
340 root 1.51 return;
341     }
342    
343 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
344 root 1.30 need (enc, len);
345     memcpy (enc->cur, str, len);
346     enc->cur += len;
347     }
348    
349 root 1.54 ecb_inline void
350 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
351 root 1.1 {
352 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
353 root 1.21 {
354 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
355 root 1.21
356     if (SvOK (*svp))
357     {
358     // already registered, use stringref
359     encode_tag (enc, CBOR_TAG_STRINGREF);
360 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
361 root 1.21 return;
362     }
363     else if (len >= minimum_string_length (enc->stringref_idx))
364     {
365     // register only
366     sv_setuv (*svp, enc->stringref_idx);
367     ++enc->stringref_idx;
368     }
369     }
370    
371 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
372 root 1.1 }
373    
374 root 1.66 ecb_inline void
375     encode_float16 (enc_t *enc, NV nv)
376     {
377     need (enc, 1+2);
378    
379     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
380    
381     uint16_t fp = ecb_float_to_binary16 (nv);
382    
383     if (!ecb_big_endian ())
384     fp = ecb_bswap16 (fp);
385    
386     memcpy (enc->cur, &fp, 2);
387     enc->cur += 2;
388     }
389    
390     ecb_inline void
391     encode_float32 (enc_t *enc, NV nv)
392     {
393     need (enc, 1+4);
394    
395     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
396    
397     uint32_t fp = ecb_float_to_binary32 (nv);
398    
399     if (!ecb_big_endian ())
400     fp = ecb_bswap32 (fp);
401    
402     memcpy (enc->cur, &fp, 4);
403     enc->cur += 4;
404     }
405    
406     ecb_inline void
407     encode_float64 (enc_t *enc, NV nv)
408     {
409     need (enc, 1+8);
410    
411     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
412    
413     uint64_t fp = ecb_double_to_binary64 (nv);
414    
415     if (!ecb_big_endian ())
416     fp = ecb_bswap64 (fp);
417    
418     memcpy (enc->cur, &fp, 8);
419     enc->cur += 8;
420     }
421    
422     ecb_inline void
423 root 1.69 encode_bool (enc_t *enc, int istrue)
424     {
425     encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
426     }
427    
428     ecb_inline void
429 root 1.66 encode_forced (enc_t *enc, UV type, SV *sv)
430     {
431     switch (type)
432     {
433     case AS_CBOR:
434     {
435     STRLEN len;
436     char *str = SvPVbyte (sv, len);
437    
438     need (enc, len);
439     memcpy (enc->cur, str, len);
440     enc->cur += len;
441     }
442     break;
443    
444     case AS_BYTES:
445     {
446     STRLEN len;
447     char *str = SvPVbyte (sv, len);
448     encode_strref (enc, 0, 0, str, len);
449     }
450     break;
451    
452     case AS_TEXT:
453     {
454     STRLEN len;
455     char *str = SvPVutf8 (sv, len);
456     encode_strref (enc, 1, 1, str, len);
457     }
458     break;
459    
460 root 1.69 case AS_INT: encode_int (enc, sv); break;
461    
462 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
463     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
464     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
465    
466     default:
467     croak ("encountered malformed CBOR::XS::Tagged object");
468     }
469     }
470    
471 root 1.1 static void encode_sv (enc_t *enc, SV *sv);
472    
473     static void
474     encode_av (enc_t *enc, AV *av)
475     {
476     int i, len = av_len (av);
477    
478     if (enc->depth >= enc->cbor.max_depth)
479     croak (ERR_NESTING_EXCEEDED);
480    
481     ++enc->depth;
482    
483 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
484 root 1.1
485 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
486 root 1.45 for (i = 0; i <= len; ++i)
487     {
488     SV **svp = av_fetch (av, i, 0);
489     encode_sv (enc, svp ? *svp : &PL_sv_undef);
490     }
491     else
492     for (i = 0; i <= len; ++i)
493 root 1.46 {
494     SV *sv = AvARRAY (av)[i];
495     encode_sv (enc, sv ? sv : &PL_sv_undef);
496     }
497 root 1.1
498     --enc->depth;
499     }
500    
501     static void
502     encode_hv (enc_t *enc, HV *hv)
503     {
504     HE *he;
505    
506     if (enc->depth >= enc->cbor.max_depth)
507     croak (ERR_NESTING_EXCEEDED);
508    
509     ++enc->depth;
510    
511     int pairs = hv_iterinit (hv);
512     int mg = SvMAGICAL (hv);
513    
514 root 1.63 if (ecb_expect_false (mg))
515 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
516 root 1.1 else
517 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
518 root 1.1
519     while ((he = hv_iternext (hv)))
520     {
521 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
522     encode_sv (enc, HeSVKEY (he));
523 root 1.1 else
524 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
525 root 1.1
526 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
527 root 1.1 }
528    
529 root 1.63 if (ecb_expect_false (mg))
530 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
531 root 1.1
532     --enc->depth;
533     }
534    
535     // encode objects, arrays and special \0=false and \1=true values.
536     static void
537     encode_rv (enc_t *enc, SV *sv)
538     {
539 root 1.19 SvGETMAGIC (sv);
540 root 1.1
541 root 1.19 svtype svt = SvTYPE (sv);
542 root 1.18
543 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
544 root 1.1 {
545 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
546     ? types_boolean_stash
547     : gv_stashpv ("Types::Serialiser::Boolean", 1);
548     HV *error_stash = !CBOR_SLOW || types_error_stash
549     ? types_error_stash
550     : gv_stashpv ("Types::Serialiser::Error", 1);
551 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
552     ? cbor_tagged_stash
553     : gv_stashpv ("CBOR::XS::Tagged" , 1);
554 root 1.1
555 root 1.11 HV *stash = SvSTASH (sv);
556    
557     if (stash == boolean_stash)
558 root 1.34 {
559 root 1.69 encode_bool (enc, SvIV (sv));
560 root 1.34 return;
561     }
562 root 1.11 else if (stash == error_stash)
563 root 1.34 {
564 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
565 root 1.34 return;
566     }
567 root 1.11 else if (stash == tagged_stash)
568 root 1.6 {
569     if (svt != SVt_PVAV)
570     croak ("encountered CBOR::XS::Tagged object that isn't an array");
571    
572 root 1.66 switch (av_len ((AV *)sv))
573     {
574     case 2-1:
575     // actually a tagged value
576     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
577     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
578     break;
579    
580     case 3-1:
581     // a forced type [value, type, undef]
582     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
583     break;
584    
585     default:
586     croak ("encountered malformed CBOR::XS::Tagged object");
587     }
588 root 1.34
589     return;
590 root 1.6 }
591 root 1.34 }
592    
593     if (ecb_expect_false (SvREFCNT (sv) > 1)
594     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
595     {
596 root 1.63 if (ecb_expect_false (!enc->shareable))
597 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
598    
599     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
600    
601     if (SvOK (*svp))
602     {
603     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
604 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
605 root 1.34 return;
606     }
607     else
608     {
609     sv_setuv (*svp, enc->shareable_idx);
610     ++enc->shareable_idx;
611     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
612     }
613     }
614    
615     if (ecb_expect_false (SvOBJECT (sv)))
616     {
617     HV *stash = SvSTASH (sv);
618     GV *method;
619    
620 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
621     croak ("encountered object '%s', but forbid_objects is enabled",
622     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
623     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
624 root 1.1 {
625 root 1.11 dSP;
626    
627 root 1.50 ENTER; SAVETMPS;
628     PUSHMARK (SP);
629 root 1.6 // we re-bless the reference to get overload and other niceties right
630 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
631    
632     PUTBACK;
633     // G_SCALAR ensures that return value is 1
634     call_sv ((SV *)GvCV (method), G_SCALAR);
635     SPAGAIN;
636    
637     // catch this surprisingly common error
638     if (SvROK (TOPs) && SvRV (TOPs) == sv)
639     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
640    
641     encode_sv (enc, POPs);
642    
643     PUTBACK;
644    
645     FREETMPS; LEAVE;
646     }
647     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
648     {
649     dSP;
650 root 1.6
651 root 1.50 ENTER; SAVETMPS;
652     PUSHMARK (SP);
653 root 1.11 EXTEND (SP, 2);
654     // we re-bless the reference to get overload and other niceties right
655     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
656     PUSHs (sv_cbor);
657 root 1.1
658 root 1.11 PUTBACK;
659     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
660     SPAGAIN;
661 root 1.6
662 root 1.11 // catch this surprisingly common error
663     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
664     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
665 root 1.6
666 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
667 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
668 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
669 root 1.6
670 root 1.65 {
671     int i;
672    
673     for (i = 0; i < count; ++i)
674     encode_sv (enc, SP[i + 1 - count]);
675    
676     SP -= count;
677     }
678 root 1.6
679 root 1.11 PUTBACK;
680 root 1.6
681 root 1.11 FREETMPS; LEAVE;
682 root 1.1 }
683 root 1.11 else
684     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
685     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
686 root 1.1 }
687     else if (svt == SVt_PVHV)
688     encode_hv (enc, (HV *)sv);
689     else if (svt == SVt_PVAV)
690     encode_av (enc, (AV *)sv);
691 root 1.18 else
692 root 1.1 {
693 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
694     encode_sv (enc, sv);
695 root 1.1 }
696     }
697    
698     static void
699     encode_nv (enc_t *enc, SV *sv)
700     {
701     double nv = SvNVX (sv);
702    
703     need (enc, 9);
704    
705 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
706     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
707 root 1.1 //TODO: maybe I32?
708 root 1.5 else if (ecb_expect_false (nv == (float)nv))
709 root 1.66 encode_float32 (enc, nv);
710 root 1.1 else
711 root 1.66 encode_float64 (enc, nv);
712 root 1.1 }
713    
714     static void
715     encode_sv (enc_t *enc, SV *sv)
716     {
717     SvGETMAGIC (sv);
718    
719     if (SvPOKp (sv))
720     {
721     STRLEN len;
722     char *str = SvPV (sv, len);
723 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
724 root 1.1 }
725     else if (SvNOKp (sv))
726     encode_nv (enc, sv);
727     else if (SvIOKp (sv))
728 root 1.67 encode_int (enc, sv);
729 root 1.1 else if (SvROK (sv))
730     encode_rv (enc, SvRV (sv));
731     else if (!SvOK (sv))
732 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
733 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
734 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
735 root 1.1 else
736     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
737     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
738     }
739    
740     static SV *
741     encode_cbor (SV *scalar, CBOR *cbor)
742     {
743 root 1.48 enc_t enc = { 0 };
744 root 1.1
745 root 1.63 enc.cbor = *cbor;
746     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
747     enc.cur = SvPVX (enc.sv);
748     enc.end = SvEND (enc.sv);
749 root 1.1
750     SvPOK_only (enc.sv);
751 root 1.20
752 root 1.32 if (cbor->flags & F_PACK_STRINGS)
753 root 1.20 {
754     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
755     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
756     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
757     }
758    
759 root 1.1 encode_sv (&enc, scalar);
760    
761     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
762     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
763    
764     if (enc.cbor.flags & F_SHRINK)
765     shrink (enc.sv);
766    
767     return enc.sv;
768     }
769    
770     /////////////////////////////////////////////////////////////////////////////
771     // decoder
772    
773     // structure used for decoding CBOR
774     typedef struct
775     {
776     U8 *cur; // current parser pointer
777     U8 *end; // end of input string
778     const char *err; // parse error, if != 0
779     CBOR cbor;
780     U32 depth; // recursion depth
781     U32 maxdepth; // recursion depth limit
782 root 1.19 AV *shareable;
783 root 1.20 AV *stringref;
784 root 1.27 SV *decode_tagged;
785 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
786 root 1.1 } dec_t;
787    
788 root 1.59 // set dec->err to ERRSV
789     ecb_cold static void
790     err_errsv (dec_t *dec)
791     {
792     if (!dec->err)
793     {
794     dec->err_sv = newSVsv (ERRSV);
795    
796     // chop off the trailing \n
797     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
798     *SvEND (dec->err_sv) = 0;
799    
800     dec->err = SvPVutf8_nolen (dec->err_sv);
801     }
802     }
803 root 1.1
804 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
805     ecb_cold static void
806     err_set (dec_t *dec, const char *reason)
807     {
808     if (!dec->err)
809     dec->err = reason;
810     }
811    
812     ecb_cold static void
813     err_unexpected_end (dec_t *dec)
814     {
815     err_set (dec, "unexpected end of CBOR data");
816     }
817    
818     #define ERR_DO(do) SB do; goto fail; SE
819     #define ERR(reason) ERR_DO (err_set (dec, reason))
820     #define ERR_ERRSV ERR_DO (err_errsv (dec))
821    
822 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
823 root 1.1
824 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
825 root 1.1 #define DEC_DEC_DEPTH --dec->depth
826    
827     static UV
828     decode_uint (dec_t *dec)
829     {
830 root 1.35 U8 m = *dec->cur & MINOR_MASK;
831     ++dec->cur;
832    
833     if (ecb_expect_true (m < LENGTH_EXT1))
834     return m;
835 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
836     {
837     WANT (1);
838     dec->cur += 1;
839     return dec->cur[-1];
840     }
841     else if (ecb_expect_true (m == LENGTH_EXT2))
842     {
843     WANT (2);
844     dec->cur += 2;
845     return (((UV)dec->cur[-2]) << 8)
846     | ((UV)dec->cur[-1]);
847     }
848     else if (ecb_expect_true (m == LENGTH_EXT4))
849     {
850     WANT (4);
851     dec->cur += 4;
852     return (((UV)dec->cur[-4]) << 24)
853     | (((UV)dec->cur[-3]) << 16)
854     | (((UV)dec->cur[-2]) << 8)
855     | ((UV)dec->cur[-1]);
856     }
857     else if (ecb_expect_true (m == LENGTH_EXT8))
858 root 1.1 {
859 root 1.36 WANT (8);
860     dec->cur += 8;
861 root 1.34
862 root 1.36 return
863 root 1.34 #if UVSIZE < 8
864 root 1.36 0
865 root 1.34 #else
866 root 1.36 (((UV)dec->cur[-8]) << 56)
867     | (((UV)dec->cur[-7]) << 48)
868     | (((UV)dec->cur[-6]) << 40)
869     | (((UV)dec->cur[-5]) << 32)
870 root 1.34 #endif
871 root 1.36 | (((UV)dec->cur[-4]) << 24)
872     | (((UV)dec->cur[-3]) << 16)
873     | (((UV)dec->cur[-2]) << 8)
874     | ((UV)dec->cur[-1]);
875 root 1.1 }
876 root 1.36 else
877     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
878 root 1.1
879     fail:
880     return 0;
881     }
882    
883     static SV *decode_sv (dec_t *dec);
884    
885     static SV *
886     decode_av (dec_t *dec)
887     {
888     AV *av = newAV ();
889    
890     DEC_INC_DEPTH;
891    
892 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
893 root 1.1 {
894     ++dec->cur;
895    
896     for (;;)
897     {
898     WANT (1);
899    
900 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
901 root 1.1 {
902     ++dec->cur;
903     break;
904     }
905    
906     av_push (av, decode_sv (dec));
907     }
908     }
909     else
910     {
911 root 1.57 UV i, len = decode_uint (dec);
912 root 1.1
913 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
914 root 1.1 av_fill (av, len - 1);
915    
916     for (i = 0; i < len; ++i)
917     AvARRAY (av)[i] = decode_sv (dec);
918     }
919    
920     DEC_DEC_DEPTH;
921     return newRV_noinc ((SV *)av);
922    
923     fail:
924 root 1.63 SvREFCNT_dec_NN (av);
925 root 1.1 DEC_DEC_DEPTH;
926     return &PL_sv_undef;
927     }
928    
929 root 1.16 static void
930     decode_he (dec_t *dec, HV *hv)
931     {
932     // for speed reasons, we specialcase single-string
933 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
934    
935 root 1.23 if (ecb_expect_true (!dec->stringref))
936 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
937 root 1.21 {
938 root 1.56 STRLEN len = decode_uint (dec);
939 root 1.21 char *key = (char *)dec->cur;
940 root 1.16
941 root 1.49 WANT (len);
942 root 1.21 dec->cur += len;
943    
944 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
945 root 1.16
946 root 1.21 return;
947     }
948 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
949 root 1.21 {
950 root 1.56 STRLEN len = decode_uint (dec);
951 root 1.21 char *key = (char *)dec->cur;
952 root 1.16
953 root 1.49 WANT (len);
954 root 1.21 dec->cur += len;
955 root 1.20
956 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
957     if (!is_utf8_string (key, len))
958     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
959 root 1.16
960 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
961 root 1.16
962 root 1.21 return;
963     }
964 root 1.20
965 root 1.21 SV *k = decode_sv (dec);
966     SV *v = decode_sv (dec);
967 root 1.16
968 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
969     // methods, and this is hopefully the only place where it can happen,
970     // so if there is a chance of an exception, take the very slow path.
971     // since catching exceptions is "undocumented/internal/forbidden" by
972     // the new p5p powers, we need to call out to a perl function :/
973     if (ecb_expect_false (SvAMAGIC (k)))
974     {
975     dSP;
976    
977     ENTER; SAVETMPS;
978     PUSHMARK (SP);
979     EXTEND (SP, 3);
980     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
981     PUSHs (sv_2mortal (k));
982     PUSHs (sv_2mortal (v));
983    
984     PUTBACK;
985     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
986     SPAGAIN;
987    
988     FREETMPS; LEAVE;
989    
990     if (SvTRUE (ERRSV))
991     ERR_ERRSV;
992    
993     return;
994     }
995    
996 root 1.21 hv_store_ent (hv, k, v, 0);
997 root 1.63 SvREFCNT_dec_NN (k);
998 root 1.38
999     fail:
1000     ;
1001 root 1.16 }
1002    
1003 root 1.1 static SV *
1004     decode_hv (dec_t *dec)
1005     {
1006     HV *hv = newHV ();
1007    
1008     DEC_INC_DEPTH;
1009    
1010 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1011 root 1.1 {
1012     ++dec->cur;
1013    
1014     for (;;)
1015     {
1016     WANT (1);
1017    
1018 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1019 root 1.1 {
1020     ++dec->cur;
1021     break;
1022     }
1023    
1024 root 1.16 decode_he (dec, hv);
1025 root 1.1 }
1026     }
1027     else
1028     {
1029 root 1.57 UV pairs = decode_uint (dec);
1030    
1031     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1032 root 1.1
1033 root 1.16 while (pairs--)
1034     decode_he (dec, hv);
1035 root 1.1 }
1036    
1037     DEC_DEC_DEPTH;
1038     return newRV_noinc ((SV *)hv);
1039    
1040     fail:
1041 root 1.63 SvREFCNT_dec_NN (hv);
1042 root 1.1 DEC_DEC_DEPTH;
1043     return &PL_sv_undef;
1044     }
1045    
1046     static SV *
1047     decode_str (dec_t *dec, int utf8)
1048     {
1049 root 1.6 SV *sv = 0;
1050 root 1.1
1051 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1052 root 1.1 {
1053 root 1.33 // indefinite length strings
1054 root 1.1 ++dec->cur;
1055    
1056 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1057 root 1.33
1058 root 1.1 sv = newSVpvn ("", 0);
1059    
1060     for (;;)
1061     {
1062     WANT (1);
1063    
1064 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1065     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1066 root 1.33 {
1067     ++dec->cur;
1068     break;
1069     }
1070     else
1071     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1072    
1073     STRLEN len = decode_uint (dec);
1074 root 1.1
1075 root 1.33 WANT (len);
1076     sv_catpvn (sv, dec->cur, len);
1077     dec->cur += len;
1078 root 1.1 }
1079     }
1080     else
1081     {
1082     STRLEN len = decode_uint (dec);
1083    
1084     WANT (len);
1085     sv = newSVpvn (dec->cur, len);
1086     dec->cur += len;
1087 root 1.25
1088     if (ecb_expect_false (dec->stringref)
1089     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1090     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1091 root 1.1 }
1092    
1093     if (utf8)
1094 root 1.38 {
1095     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1096     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
1097     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1098    
1099     SvUTF8_on (sv);
1100     }
1101 root 1.1
1102     return sv;
1103    
1104     fail:
1105 root 1.6 SvREFCNT_dec (sv);
1106 root 1.1 return &PL_sv_undef;
1107     }
1108    
1109     static SV *
1110 root 1.3 decode_tagged (dec_t *dec)
1111     {
1112 root 1.19 SV *sv = 0;
1113 root 1.3 UV tag = decode_uint (dec);
1114 root 1.19
1115     WANT (1);
1116 root 1.3
1117 root 1.18 switch (tag)
1118     {
1119     case CBOR_TAG_MAGIC:
1120 root 1.20 sv = decode_sv (dec);
1121     break;
1122 root 1.18
1123     case CBOR_TAG_INDIRECTION:
1124 root 1.20 sv = newRV_noinc (decode_sv (dec));
1125     break;
1126    
1127     case CBOR_TAG_STRINGREF_NAMESPACE:
1128     {
1129 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1130 root 1.55 // erase mortalised caches, e.g. "shareable"
1131     ENTER;
1132 root 1.20
1133     SAVESPTR (dec->stringref);
1134     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1135    
1136     sv = decode_sv (dec);
1137    
1138 root 1.55 LEAVE;
1139 root 1.20 }
1140     break;
1141    
1142     case CBOR_TAG_STRINGREF:
1143     {
1144 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1145 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1146    
1147     UV idx = decode_uint (dec);
1148    
1149 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1150 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1151    
1152     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1153     }
1154     break;
1155 root 1.11
1156 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1157     {
1158     if (ecb_expect_false (!dec->shareable))
1159     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1160    
1161 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1162     {
1163     sv = newSV (0);
1164     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1165 root 1.18
1166 root 1.37 SV *osv = decode_sv (dec);
1167     sv_setsv (sv, osv);
1168     SvREFCNT_dec_NN (osv);
1169     }
1170     else
1171     {
1172     av_push (dec->shareable, &PL_sv_undef);
1173     int idx = AvFILLp (dec->shareable);
1174     sv = decode_sv (dec);
1175     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1176     }
1177 root 1.19 }
1178 root 1.20 break;
1179 root 1.18
1180     case CBOR_TAG_VALUE_SHAREDREF:
1181 root 1.17 {
1182 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1183 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1184 root 1.18
1185 root 1.19 UV idx = decode_uint (dec);
1186    
1187 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1188 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1189    
1190 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1191 root 1.37
1192     if (sv == &PL_sv_undef)
1193     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1194 root 1.17 }
1195 root 1.20 break;
1196 root 1.17
1197 root 1.18 case CBOR_TAG_PERL_OBJECT:
1198     {
1199 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1200     goto filter;
1201    
1202 root 1.19 sv = decode_sv (dec);
1203    
1204 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1205     ERR ("corrupted CBOR data (non-array perl object)");
1206    
1207     AV *av = (AV *)SvRV (sv);
1208     int len = av_len (av) + 1;
1209     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1210    
1211     if (!stash)
1212     ERR ("cannot decode perl-object (package does not exist)");
1213    
1214     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1215    
1216     if (!method)
1217     ERR ("cannot decode perl-object (package does not have a THAW method)");
1218    
1219     dSP;
1220    
1221 root 1.50 ENTER; SAVETMPS;
1222     PUSHMARK (SP);
1223 root 1.18 EXTEND (SP, len + 1);
1224     // we re-bless the reference to get overload and other niceties right
1225     PUSHs (*av_fetch (av, 0, 1));
1226     PUSHs (sv_cbor);
1227    
1228     int i;
1229    
1230     for (i = 1; i < len; ++i)
1231     PUSHs (*av_fetch (av, i, 1));
1232    
1233     PUTBACK;
1234     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1235     SPAGAIN;
1236    
1237     if (SvTRUE (ERRSV))
1238     {
1239     FREETMPS; LEAVE;
1240 root 1.59 ERR_ERRSV;
1241 root 1.18 }
1242    
1243 root 1.63 SvREFCNT_dec_NN (sv);
1244 root 1.18 sv = SvREFCNT_inc (POPs);
1245 root 1.11
1246 root 1.18 PUTBACK;
1247 root 1.11
1248 root 1.18 FREETMPS; LEAVE;
1249     }
1250 root 1.20 break;
1251 root 1.9
1252 root 1.18 default:
1253 root 1.60 filter:
1254 root 1.18 {
1255 root 1.58 SV *tag_sv = newSVuv (tag);
1256    
1257 root 1.19 sv = decode_sv (dec);
1258    
1259 root 1.27 dSP;
1260 root 1.50 ENTER; SAVETMPS;
1261     PUSHMARK (SP);
1262 root 1.27 EXTEND (SP, 2);
1263 root 1.58 PUSHs (tag_sv);
1264 root 1.27 PUSHs (sv);
1265    
1266     PUTBACK;
1267     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1268     SPAGAIN;
1269    
1270     if (SvTRUE (ERRSV))
1271     {
1272 root 1.63 SvREFCNT_dec_NN (tag_sv);
1273 root 1.27 FREETMPS; LEAVE;
1274 root 1.59 ERR_ERRSV;
1275 root 1.27 }
1276    
1277     if (count)
1278     {
1279 root 1.63 SvREFCNT_dec_NN (tag_sv);
1280     SvREFCNT_dec_NN (sv);
1281 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1282     SP -= count;
1283 root 1.27 }
1284     else
1285     {
1286     AV *av = newAV ();
1287 root 1.58 av_push (av, tag_sv);
1288 root 1.27 av_push (av, sv);
1289    
1290     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1291     ? cbor_tagged_stash
1292     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1293     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1294     }
1295 root 1.7
1296 root 1.27 PUTBACK;
1297    
1298     FREETMPS; LEAVE;
1299 root 1.18 }
1300 root 1.20 break;
1301 root 1.11 }
1302 root 1.9
1303 root 1.20 return sv;
1304    
1305 root 1.9 fail:
1306     SvREFCNT_dec (sv);
1307     return &PL_sv_undef;
1308 root 1.3 }
1309    
1310     static SV *
1311 root 1.1 decode_sv (dec_t *dec)
1312     {
1313     WANT (1);
1314    
1315 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1316 root 1.1 {
1317 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1318     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1319     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1320     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1321     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1322     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1323     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1324    
1325     case MAJOR_MISC >> MAJOR_SHIFT:
1326     switch (*dec->cur++ & MINOR_MASK)
1327 root 1.1 {
1328 root 1.35 case SIMPLE_FALSE:
1329 root 1.1 #if CBOR_SLOW
1330 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1331 root 1.1 #endif
1332 root 1.10 return newSVsv (types_false);
1333 root 1.35 case SIMPLE_TRUE:
1334 root 1.1 #if CBOR_SLOW
1335 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1336 root 1.1 #endif
1337 root 1.10 return newSVsv (types_true);
1338 root 1.35 case SIMPLE_NULL:
1339 root 1.1 return newSVsv (&PL_sv_undef);
1340 root 1.35 case SIMPLE_UNDEF:
1341 root 1.10 #if CBOR_SLOW
1342     types_error = get_bool ("Types::Serialiser::error");
1343     #endif
1344     return newSVsv (types_error);
1345 root 1.1
1346 root 1.35 case MISC_FLOAT16:
1347 root 1.2 {
1348     WANT (2);
1349    
1350     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1351     dec->cur += 2;
1352    
1353     return newSVnv (ecb_binary16_to_float (fp));
1354     }
1355 root 1.1
1356 root 1.35 case MISC_FLOAT32:
1357 root 1.1 {
1358     uint32_t fp;
1359     WANT (4);
1360     memcpy (&fp, dec->cur, 4);
1361     dec->cur += 4;
1362    
1363     if (!ecb_big_endian ())
1364     fp = ecb_bswap32 (fp);
1365    
1366     return newSVnv (ecb_binary32_to_float (fp));
1367     }
1368    
1369 root 1.35 case MISC_FLOAT64:
1370 root 1.1 {
1371     uint64_t fp;
1372     WANT (8);
1373     memcpy (&fp, dec->cur, 8);
1374     dec->cur += 8;
1375    
1376     if (!ecb_big_endian ())
1377     fp = ecb_bswap64 (fp);
1378    
1379     return newSVnv (ecb_binary64_to_double (fp));
1380     }
1381    
1382 root 1.35 // 0..19 unassigned simple
1383 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1384     // 28-30 unassigned misc
1385     // 31 break code
1386 root 1.1 default:
1387 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1388 root 1.1 }
1389    
1390     break;
1391     }
1392    
1393     fail:
1394     return &PL_sv_undef;
1395     }
1396    
1397     static SV *
1398     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1399     {
1400 root 1.48 dec_t dec = { 0 };
1401 root 1.1 SV *sv;
1402 root 1.16 STRLEN len;
1403     char *data = SvPVbyte (string, len);
1404 root 1.1
1405 root 1.16 if (len > cbor->max_size && cbor->max_size)
1406     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1407     (unsigned long)len, (unsigned long)cbor->max_size);
1408 root 1.1
1409     dec.cbor = *cbor;
1410 root 1.16 dec.cur = (U8 *)data;
1411     dec.end = (U8 *)data + len;
1412 root 1.1
1413     sv = decode_sv (&dec);
1414    
1415     if (offset_return)
1416     *offset_return = dec.cur;
1417    
1418     if (!(offset_return || !sv))
1419 root 1.2 if (dec.cur != dec.end && !dec.err)
1420     dec.err = "garbage after CBOR object";
1421    
1422     if (dec.err)
1423 root 1.1 {
1424 root 1.39 if (dec.shareable)
1425     {
1426 root 1.58 // need to break cyclic links, which would all be in shareable
1427 root 1.39 int i;
1428     SV **svp;
1429    
1430     for (i = av_len (dec.shareable) + 1; i--; )
1431     if ((svp = av_fetch (dec.shareable, i, 0)))
1432     sv_setsv (*svp, &PL_sv_undef);
1433     }
1434    
1435 root 1.63 SvREFCNT_dec_NN (sv);
1436 root 1.59
1437     if (dec.err_sv)
1438     sv_2mortal (dec.err_sv);
1439    
1440 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1441 root 1.1 }
1442    
1443     sv = sv_2mortal (sv);
1444    
1445     return sv;
1446     }
1447    
1448     /////////////////////////////////////////////////////////////////////////////
1449 root 1.40 // incremental parser
1450    
1451     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1452    
1453     // returns 0 for notyet, 1 for success or error
1454     static int
1455     incr_parse (CBOR *self, SV *cborstr)
1456     {
1457     STRLEN cur;
1458     SvPV (cborstr, cur);
1459    
1460     while (ecb_expect_true (self->incr_need <= cur))
1461     {
1462     // table of integer count bytes
1463     static I8 incr_len[MINOR_MASK + 1] = {
1464     0, 0, 0, 0, 0, 0, 0, 0,
1465     0, 0, 0, 0, 0, 0, 0, 0,
1466     0, 0, 0, 0, 0, 0, 0, 0,
1467     1, 2, 4, 8,-1,-1,-1,-2
1468     };
1469    
1470     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1471     U8 m = *p & MINOR_MASK;
1472     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1473     I8 ilen = incr_len[m];
1474    
1475     self->incr_need = self->incr_pos + 1;
1476    
1477     if (ecb_expect_false (ilen < 0))
1478     {
1479     if (m != MINOR_INDEF)
1480     return 1; // error
1481    
1482     if (*p == (MAJOR_MISC | MINOR_INDEF))
1483     {
1484     if (count >= 0)
1485     return 1; // error
1486    
1487     count = 1;
1488     }
1489     else
1490     {
1491     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1492     count = -1;
1493     }
1494     }
1495     else
1496     {
1497     self->incr_need += ilen;
1498     if (ecb_expect_false (self->incr_need > cur))
1499     return 0;
1500    
1501     int major = *p >> MAJOR_SHIFT;
1502    
1503     switch (major)
1504     {
1505 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1506     ++count; // tags merely prefix another value
1507     break;
1508    
1509 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1510     case MAJOR_TEXT >> MAJOR_SHIFT:
1511     case MAJOR_ARRAY >> MAJOR_SHIFT:
1512     case MAJOR_MAP >> MAJOR_SHIFT:
1513     {
1514     UV len;
1515    
1516     if (ecb_expect_false (ilen))
1517     {
1518     len = 0;
1519    
1520     do {
1521     len = (len << 8) | *++p;
1522     } while (--ilen);
1523     }
1524     else
1525     len = m;
1526    
1527     switch (major)
1528     {
1529     case MAJOR_BYTES >> MAJOR_SHIFT:
1530     case MAJOR_TEXT >> MAJOR_SHIFT:
1531     self->incr_need += len;
1532     if (ecb_expect_false (self->incr_need > cur))
1533     return 0;
1534    
1535     break;
1536    
1537     case MAJOR_MAP >> MAJOR_SHIFT:
1538     len <<= 1;
1539     case MAJOR_ARRAY >> MAJOR_SHIFT:
1540     if (len)
1541     {
1542     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1543     count = len + 1;
1544     }
1545     break;
1546     }
1547     }
1548     }
1549     }
1550    
1551     self->incr_pos = self->incr_need;
1552    
1553     if (count > 0)
1554     {
1555     while (!--count)
1556     {
1557     if (!AvFILLp (self->incr_count))
1558     return 1; // done
1559    
1560     SvREFCNT_dec_NN (av_pop (self->incr_count));
1561     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1562     }
1563    
1564     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1565     }
1566     }
1567    
1568     return 0;
1569     }
1570    
1571    
1572     /////////////////////////////////////////////////////////////////////////////
1573 root 1.1 // XS interface functions
1574    
1575     MODULE = CBOR::XS PACKAGE = CBOR::XS
1576    
1577     BOOT:
1578     {
1579     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1580 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1581 root 1.1
1582 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1583     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1584    
1585     types_true = get_bool ("Types::Serialiser::true" );
1586     types_false = get_bool ("Types::Serialiser::false");
1587     types_error = get_bool ("Types::Serialiser::error");
1588 root 1.11
1589 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1590    
1591 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1592     SvREADONLY_on (sv_cbor);
1593 root 1.56
1594     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1595 root 1.1 }
1596    
1597     PROTOTYPES: DISABLE
1598    
1599     void CLONE (...)
1600     CODE:
1601 root 1.10 cbor_stash = 0;
1602     cbor_tagged_stash = 0;
1603     types_error_stash = 0;
1604     types_boolean_stash = 0;
1605 root 1.1
1606     void new (char *klass)
1607     PPCODE:
1608     {
1609     SV *pv = NEWSV (0, sizeof (CBOR));
1610     SvPOK_only (pv);
1611     cbor_init ((CBOR *)SvPVX (pv));
1612     XPUSHs (sv_2mortal (sv_bless (
1613     newRV_noinc (pv),
1614     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1615     )));
1616     }
1617    
1618     void shrink (CBOR *self, int enable = 1)
1619     ALIAS:
1620     shrink = F_SHRINK
1621     allow_unknown = F_ALLOW_UNKNOWN
1622 root 1.18 allow_sharing = F_ALLOW_SHARING
1623 root 1.37 allow_cycles = F_ALLOW_CYCLES
1624 root 1.60 forbid_objects = F_FORBID_OBJECTS
1625 root 1.32 pack_strings = F_PACK_STRINGS
1626 root 1.54 text_keys = F_TEXT_KEYS
1627     text_strings = F_TEXT_STRINGS
1628 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1629 root 1.1 PPCODE:
1630     {
1631     if (enable)
1632     self->flags |= ix;
1633     else
1634     self->flags &= ~ix;
1635    
1636     XPUSHs (ST (0));
1637     }
1638    
1639     void get_shrink (CBOR *self)
1640     ALIAS:
1641     get_shrink = F_SHRINK
1642     get_allow_unknown = F_ALLOW_UNKNOWN
1643 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1644 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1645 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1646 root 1.32 get_pack_strings = F_PACK_STRINGS
1647 root 1.54 get_text_keys = F_TEXT_KEYS
1648     get_text_strings = F_TEXT_STRINGS
1649 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1650 root 1.1 PPCODE:
1651     XPUSHs (boolSV (self->flags & ix));
1652    
1653     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1654     PPCODE:
1655     self->max_depth = max_depth;
1656     XPUSHs (ST (0));
1657    
1658     U32 get_max_depth (CBOR *self)
1659     CODE:
1660     RETVAL = self->max_depth;
1661     OUTPUT:
1662     RETVAL
1663    
1664     void max_size (CBOR *self, U32 max_size = 0)
1665     PPCODE:
1666     self->max_size = max_size;
1667     XPUSHs (ST (0));
1668    
1669     int get_max_size (CBOR *self)
1670     CODE:
1671     RETVAL = self->max_size;
1672     OUTPUT:
1673     RETVAL
1674    
1675 root 1.27 void filter (CBOR *self, SV *filter = 0)
1676     PPCODE:
1677     SvREFCNT_dec (self->filter);
1678     self->filter = filter ? newSVsv (filter) : filter;
1679     XPUSHs (ST (0));
1680    
1681     SV *get_filter (CBOR *self)
1682     CODE:
1683     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1684     OUTPUT:
1685     RETVAL
1686    
1687 root 1.1 void encode (CBOR *self, SV *scalar)
1688     PPCODE:
1689     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1690     XPUSHs (scalar);
1691    
1692     void decode (CBOR *self, SV *cborstr)
1693     PPCODE:
1694     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1695     XPUSHs (cborstr);
1696    
1697     void decode_prefix (CBOR *self, SV *cborstr)
1698     PPCODE:
1699     {
1700     SV *sv;
1701     char *offset;
1702     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1703     EXTEND (SP, 2);
1704     PUSHs (sv);
1705     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1706     }
1707    
1708 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1709 root 1.42 ALIAS:
1710     incr_parse_multiple = 1
1711 root 1.40 PPCODE:
1712     {
1713     if (SvUTF8 (cborstr))
1714     sv_utf8_downgrade (cborstr, 0);
1715    
1716     if (!self->incr_count)
1717     {
1718     self->incr_count = newAV ();
1719     self->incr_pos = 0;
1720     self->incr_need = 1;
1721    
1722     av_push (self->incr_count, newSViv (1));
1723     }
1724    
1725 root 1.41 do
1726 root 1.40 {
1727     if (!incr_parse (self, cborstr))
1728     {
1729     if (self->incr_need > self->max_size && self->max_size)
1730     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1731     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1732    
1733     break;
1734     }
1735    
1736 root 1.41 SV *sv;
1737     char *offset;
1738 root 1.40
1739 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1740     XPUSHs (sv);
1741 root 1.40
1742 root 1.41 sv_chop (cborstr, offset);
1743 root 1.40
1744 root 1.41 av_clear (self->incr_count);
1745     av_push (self->incr_count, newSViv (1));
1746 root 1.40
1747 root 1.41 self->incr_pos = 0;
1748     self->incr_need = self->incr_pos + 1;
1749 root 1.40 }
1750 root 1.42 while (ix);
1751 root 1.40 }
1752    
1753     void incr_reset (CBOR *self)
1754     CODE:
1755     {
1756     SvREFCNT_dec (self->incr_count);
1757     self->incr_count = 0;
1758     }
1759    
1760 root 1.27 void DESTROY (CBOR *self)
1761     PPCODE:
1762     cbor_free (self);
1763    
1764 root 1.1 PROTOTYPES: ENABLE
1765    
1766     void encode_cbor (SV *scalar)
1767 root 1.36 ALIAS:
1768     encode_cbor = 0
1769     encode_cbor_sharing = F_ALLOW_SHARING
1770 root 1.1 PPCODE:
1771     {
1772     CBOR cbor;
1773     cbor_init (&cbor);
1774 root 1.36 cbor.flags |= ix;
1775 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1776     XPUSHs (scalar);
1777     }
1778    
1779     void decode_cbor (SV *cborstr)
1780     PPCODE:
1781     {
1782     CBOR cbor;
1783     cbor_init (&cbor);
1784     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1785     XPUSHs (cborstr);
1786     }
1787    
1788 root 1.64 #ifdef __AFL_COMPILER
1789    
1790     void
1791     afl_init ()
1792     CODE:
1793     __AFL_INIT ();
1794    
1795     int
1796     afl_loop (unsigned int count = 10000)
1797     CODE:
1798     RETVAL = __AFL_LOOP (count);
1799     OUTPUT:
1800     RETVAL
1801    
1802     #endif
1803