ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.76
Committed: Thu Sep 7 23:57:58 2023 UTC (8 months, 1 week ago) by root
Branch: MAIN
Changes since 1.75: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.72 // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34     #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
35    
36 root 1.35 // known major and minor types
37     enum cbor_type
38     {
39     MAJOR_SHIFT = 5,
40     MINOR_MASK = 0x1f,
41    
42     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
43     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
44     MAJOR_BYTES = 2 << MAJOR_SHIFT,
45     MAJOR_TEXT = 3 << MAJOR_SHIFT,
46     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
47     MAJOR_MAP = 5 << MAJOR_SHIFT,
48     MAJOR_TAG = 6 << MAJOR_SHIFT,
49     MAJOR_MISC = 7 << MAJOR_SHIFT,
50    
51     // INT/STRING/ARRAY/MAP subtypes
52     LENGTH_EXT1 = 24,
53     LENGTH_EXT2 = 25,
54     LENGTH_EXT4 = 26,
55     LENGTH_EXT8 = 27,
56    
57     // SIMPLE types (effectively MISC subtypes)
58     SIMPLE_FALSE = 20,
59     SIMPLE_TRUE = 21,
60     SIMPLE_NULL = 22,
61     SIMPLE_UNDEF = 23,
62    
63     // MISC subtype (unused)
64     MISC_EXT1 = 24,
65     MISC_FLOAT16 = 25,
66     MISC_FLOAT32 = 26,
67     MISC_FLOAT64 = 27,
68    
69     // BYTES/TEXT/ARRAY/MAP
70     MINOR_INDEF = 31,
71     };
72    
73 root 1.9 // known tags
74 root 1.8 enum cbor_tag
75     {
76 root 1.35 // extensions
77     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
78     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
79     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
80     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
81     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
82     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
83     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
84    
85     // rfc7049
86     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
87     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
88     CBOR_TAG_POS_BIGNUM = 2, // byte string
89     CBOR_TAG_NEG_BIGNUM = 3, // byte string
90     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
91     CBOR_TAG_BIGFLOAT = 5, // array
92    
93     CBOR_TAG_CONV_B64U = 21, // base64url, any
94     CBOR_TAG_CONV_B64 = 22, // base64, any
95     CBOR_TAG_CONV_HEX = 23, // base16, any
96     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
97    
98     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
99     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
100     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
101     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
102     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
103 root 1.8
104 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
105 root 1.8 };
106    
107 root 1.66 // known forced types, also hardcoded in CBOR.pm
108     enum
109     {
110     AS_CBOR = 0,
111 root 1.68 AS_INT = 1,
112     AS_BYTES = 2,
113     AS_TEXT = 3,
114 root 1.67 AS_FLOAT16 = 4,
115     AS_FLOAT32 = 5,
116     AS_FLOAT64 = 6,
117 root 1.70 AS_MAP = 7,
118 root 1.67 // possibly future enhancements: (generic) float, (generic) string
119 root 1.66 };
120    
121 root 1.24 #define F_SHRINK 0x00000001UL
122     #define F_ALLOW_UNKNOWN 0x00000002UL
123 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
124 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
125 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
126     #define F_PACK_STRINGS 0x00000020UL
127     #define F_TEXT_KEYS 0x00000040UL
128     #define F_TEXT_STRINGS 0x00000080UL
129     #define F_VALIDATE_UTF8 0x00000100UL
130 root 1.1
131     #define INIT_SIZE 32 // initial scalar size to be allocated
132    
133     #define SB do {
134     #define SE } while (0)
135    
136     #define IN_RANGE_INC(type,val,beg,end) \
137     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
138     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
139    
140     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
141    
142     #ifdef USE_ITHREADS
143     # define CBOR_SLOW 1
144     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
145     #else
146     # define CBOR_SLOW 0
147     # define CBOR_STASH cbor_stash
148     #endif
149    
150 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
151 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
152 root 1.1
153     typedef struct {
154     U32 flags;
155     U32 max_depth;
156     STRLEN max_size;
157 root 1.27 SV *filter;
158 root 1.40
159     // for the incremental parser
160     STRLEN incr_pos; // the current offset into the text
161     STRLEN incr_need; // minimum bytes needed to decode
162     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
163 root 1.1 } CBOR;
164    
165 root 1.5 ecb_inline void
166 root 1.1 cbor_init (CBOR *cbor)
167     {
168     Zero (cbor, 1, CBOR);
169     cbor->max_depth = 512;
170     }
171    
172 root 1.27 ecb_inline void
173     cbor_free (CBOR *cbor)
174     {
175     SvREFCNT_dec (cbor->filter);
176 root 1.40 SvREFCNT_dec (cbor->incr_count);
177 root 1.27 }
178    
179 root 1.1 /////////////////////////////////////////////////////////////////////////////
180     // utility functions
181    
182 root 1.5 ecb_inline SV *
183 root 1.1 get_bool (const char *name)
184     {
185     SV *sv = get_sv (name, 1);
186    
187     SvREADONLY_on (sv);
188     SvREADONLY_on (SvRV (sv));
189    
190     return sv;
191     }
192    
193 root 1.5 ecb_inline void
194 root 1.1 shrink (SV *sv)
195     {
196     sv_utf8_downgrade (sv, 1);
197    
198     if (SvLEN (sv) > SvCUR (sv) + 1)
199     {
200     #ifdef SvPV_shrink_to_cur
201     SvPV_shrink_to_cur (sv);
202     #elif defined (SvPV_renew)
203     SvPV_renew (sv, SvCUR (sv) + 1);
204     #endif
205     }
206     }
207    
208 root 1.21 // minimum length of a string to be registered for stringref
209 root 1.71 ecb_inline STRLEN
210 root 1.21 minimum_string_length (UV idx)
211     {
212 root 1.61 return idx <= 23 ? 3
213     : idx <= 0xffU ? 4
214     : idx <= 0xffffU ? 5
215     : idx <= 0xffffffffU ? 7
216     : 11;
217 root 1.21 }
218    
219 root 1.1 /////////////////////////////////////////////////////////////////////////////
220     // encoder
221    
222     // structure used for encoding CBOR
223     typedef struct
224     {
225     char *cur; // SvPVX (sv) + current output position
226     char *end; // SvEND (sv)
227     SV *sv; // result scalar
228     CBOR cbor;
229     U32 depth; // recursion level
230 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
231     UV stringref_idx;
232 root 1.19 HV *shareable; // ptr => index, or 0
233     UV shareable_idx;
234 root 1.1 } enc_t;
235    
236 root 1.5 ecb_inline void
237 root 1.1 need (enc_t *enc, STRLEN len)
238     {
239 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
240 root 1.1 {
241     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
242     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
243     enc->cur = SvPVX (enc->sv) + cur;
244     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
245     }
246     }
247    
248 root 1.70 static void encode_sv (enc_t *enc, SV *sv);
249    
250 root 1.5 ecb_inline void
251 root 1.1 encode_ch (enc_t *enc, char ch)
252     {
253     need (enc, 1);
254     *enc->cur++ = ch;
255     }
256    
257 root 1.66 // used for tags, intregers, element counts and so on
258 root 1.1 static void
259     encode_uint (enc_t *enc, int major, UV len)
260     {
261     need (enc, 9);
262    
263 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
264 root 1.1 *enc->cur++ = major | len;
265 root 1.36 else if (ecb_expect_true (len <= 0xffU))
266 root 1.1 {
267 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
268 root 1.1 *enc->cur++ = len;
269     }
270 root 1.36 else if (len <= 0xffffU)
271 root 1.1 {
272 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
273 root 1.1 *enc->cur++ = len >> 8;
274     *enc->cur++ = len;
275     }
276 root 1.36 else if (len <= 0xffffffffU)
277 root 1.1 {
278 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
279 root 1.1 *enc->cur++ = len >> 24;
280     *enc->cur++ = len >> 16;
281     *enc->cur++ = len >> 8;
282     *enc->cur++ = len;
283     }
284 root 1.4 else
285 root 1.1 {
286 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
287 root 1.1 *enc->cur++ = len >> 56;
288     *enc->cur++ = len >> 48;
289     *enc->cur++ = len >> 40;
290     *enc->cur++ = len >> 32;
291     *enc->cur++ = len >> 24;
292     *enc->cur++ = len >> 16;
293     *enc->cur++ = len >> 8;
294     *enc->cur++ = len;
295     }
296     }
297    
298 root 1.66 // encodes a perl value into a CBOR integer
299 root 1.67 ecb_inline void
300 root 1.66 encode_int (enc_t *enc, SV *sv)
301     {
302     if (SvIsUV (sv))
303     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
304     else if (SvIVX (sv) >= 0)
305     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
306     else
307     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
308     }
309    
310 root 1.21 ecb_inline void
311     encode_tag (enc_t *enc, UV tag)
312     {
313 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
314 root 1.21 }
315    
316 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
317     ecb_noinline static void
318     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
319     {
320     STRLEN ulen = len;
321     U8 *p, *pend = (U8 *)str + len;
322    
323     for (p = (U8 *)str; p < pend; ++p)
324     ulen += *p >> 7; // count set high bits
325    
326     encode_uint (enc, MAJOR_TEXT, ulen);
327    
328     need (enc, ulen);
329     for (p = (U8 *)str; p < pend; ++p)
330     if (*p < 0x80)
331     *enc->cur++ = *p;
332     else
333     {
334     *enc->cur++ = 0xc0 + (*p >> 6);
335     *enc->cur++ = 0x80 + (*p & 63);
336     }
337     }
338    
339 root 1.30 ecb_inline void
340 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
341 root 1.30 {
342 root 1.53 if (ecb_expect_false (upgrade_utf8))
343 root 1.51 if (!utf8)
344     {
345 root 1.53 encode_str_utf8 (enc, utf8, str, len);
346 root 1.51 return;
347     }
348    
349 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
350 root 1.30 need (enc, len);
351     memcpy (enc->cur, str, len);
352     enc->cur += len;
353     }
354    
355 root 1.54 ecb_inline void
356 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
357 root 1.1 {
358 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
359 root 1.21 {
360 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
361 root 1.21
362     if (SvOK (*svp))
363     {
364     // already registered, use stringref
365     encode_tag (enc, CBOR_TAG_STRINGREF);
366 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
367 root 1.21 return;
368     }
369     else if (len >= minimum_string_length (enc->stringref_idx))
370     {
371     // register only
372     sv_setuv (*svp, enc->stringref_idx);
373     ++enc->stringref_idx;
374     }
375     }
376    
377 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
378 root 1.1 }
379    
380 root 1.66 ecb_inline void
381     encode_float16 (enc_t *enc, NV nv)
382     {
383     need (enc, 1+2);
384    
385     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
386    
387     uint16_t fp = ecb_float_to_binary16 (nv);
388    
389     if (!ecb_big_endian ())
390     fp = ecb_bswap16 (fp);
391    
392     memcpy (enc->cur, &fp, 2);
393     enc->cur += 2;
394     }
395    
396     ecb_inline void
397     encode_float32 (enc_t *enc, NV nv)
398     {
399     need (enc, 1+4);
400    
401     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
402    
403     uint32_t fp = ecb_float_to_binary32 (nv);
404    
405     if (!ecb_big_endian ())
406     fp = ecb_bswap32 (fp);
407    
408     memcpy (enc->cur, &fp, 4);
409     enc->cur += 4;
410     }
411    
412     ecb_inline void
413     encode_float64 (enc_t *enc, NV nv)
414     {
415     need (enc, 1+8);
416    
417     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
418    
419     uint64_t fp = ecb_double_to_binary64 (nv);
420    
421     if (!ecb_big_endian ())
422     fp = ecb_bswap64 (fp);
423    
424     memcpy (enc->cur, &fp, 8);
425     enc->cur += 8;
426     }
427    
428     ecb_inline void
429 root 1.69 encode_bool (enc_t *enc, int istrue)
430     {
431     encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
432     }
433    
434 root 1.70 // encodes an arrayref containing key-value pairs as CBOR map
435     ecb_inline void
436     encode_array_as_map (enc_t *enc, SV *sv)
437     {
438     if (enc->depth >= enc->cbor.max_depth)
439     croak (ERR_NESTING_EXCEEDED);
440    
441     ++enc->depth;
442    
443     // as_map does error checking for us, but we re-check in case
444     // things have changed.
445    
446     if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
447     croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
448    
449     AV *av = (AV *)SvRV (sv);
450     int i, len = av_len (av);
451    
452     if (!(len & 1))
453     croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
454    
455     encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
456    
457     for (i = 0; i <= len; ++i)
458     {
459     SV **svp = av_fetch (av, i, 0);
460     encode_sv (enc, svp ? *svp : &PL_sv_undef);
461     }
462    
463     --enc->depth;
464     }
465    
466 root 1.69 ecb_inline void
467 root 1.66 encode_forced (enc_t *enc, UV type, SV *sv)
468     {
469     switch (type)
470     {
471     case AS_CBOR:
472     {
473     STRLEN len;
474     char *str = SvPVbyte (sv, len);
475    
476     need (enc, len);
477     memcpy (enc->cur, str, len);
478     enc->cur += len;
479     }
480     break;
481    
482     case AS_BYTES:
483     {
484     STRLEN len;
485     char *str = SvPVbyte (sv, len);
486     encode_strref (enc, 0, 0, str, len);
487     }
488     break;
489    
490     case AS_TEXT:
491     {
492     STRLEN len;
493     char *str = SvPVutf8 (sv, len);
494     encode_strref (enc, 1, 1, str, len);
495     }
496     break;
497    
498 root 1.69 case AS_INT: encode_int (enc, sv); break;
499    
500 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
501     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
502     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
503    
504 root 1.70 case AS_MAP: encode_array_as_map (enc, sv); break;
505    
506 root 1.66 default:
507     croak ("encountered malformed CBOR::XS::Tagged object");
508     }
509     }
510    
511 root 1.1 static void
512     encode_av (enc_t *enc, AV *av)
513     {
514     int i, len = av_len (av);
515    
516     if (enc->depth >= enc->cbor.max_depth)
517     croak (ERR_NESTING_EXCEEDED);
518    
519     ++enc->depth;
520    
521 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
522 root 1.1
523 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
524 root 1.45 for (i = 0; i <= len; ++i)
525     {
526     SV **svp = av_fetch (av, i, 0);
527     encode_sv (enc, svp ? *svp : &PL_sv_undef);
528     }
529     else
530     for (i = 0; i <= len; ++i)
531 root 1.46 {
532     SV *sv = AvARRAY (av)[i];
533     encode_sv (enc, sv ? sv : &PL_sv_undef);
534     }
535 root 1.1
536     --enc->depth;
537     }
538    
539     static void
540     encode_hv (enc_t *enc, HV *hv)
541     {
542     HE *he;
543    
544     if (enc->depth >= enc->cbor.max_depth)
545     croak (ERR_NESTING_EXCEEDED);
546    
547     ++enc->depth;
548    
549     int pairs = hv_iterinit (hv);
550     int mg = SvMAGICAL (hv);
551    
552 root 1.63 if (ecb_expect_false (mg))
553 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
554 root 1.1 else
555 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
556 root 1.1
557     while ((he = hv_iternext (hv)))
558     {
559 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
560     encode_sv (enc, HeSVKEY (he));
561 root 1.1 else
562 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
563 root 1.1
564 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
565 root 1.1 }
566    
567 root 1.63 if (ecb_expect_false (mg))
568 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
569 root 1.1
570     --enc->depth;
571     }
572    
573     // encode objects, arrays and special \0=false and \1=true values.
574     static void
575     encode_rv (enc_t *enc, SV *sv)
576     {
577 root 1.19 SvGETMAGIC (sv);
578 root 1.1
579 root 1.19 svtype svt = SvTYPE (sv);
580 root 1.18
581 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
582 root 1.1 {
583 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
584     ? types_boolean_stash
585     : gv_stashpv ("Types::Serialiser::Boolean", 1);
586     HV *error_stash = !CBOR_SLOW || types_error_stash
587     ? types_error_stash
588     : gv_stashpv ("Types::Serialiser::Error", 1);
589 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
590     ? cbor_tagged_stash
591     : gv_stashpv ("CBOR::XS::Tagged" , 1);
592 root 1.1
593 root 1.11 HV *stash = SvSTASH (sv);
594    
595     if (stash == boolean_stash)
596 root 1.34 {
597 root 1.69 encode_bool (enc, SvIV (sv));
598 root 1.34 return;
599     }
600 root 1.11 else if (stash == error_stash)
601 root 1.34 {
602 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
603 root 1.34 return;
604     }
605 root 1.11 else if (stash == tagged_stash)
606 root 1.6 {
607     if (svt != SVt_PVAV)
608     croak ("encountered CBOR::XS::Tagged object that isn't an array");
609    
610 root 1.66 switch (av_len ((AV *)sv))
611     {
612     case 2-1:
613     // actually a tagged value
614     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
615     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
616     break;
617    
618     case 3-1:
619     // a forced type [value, type, undef]
620     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
621     break;
622    
623     default:
624     croak ("encountered malformed CBOR::XS::Tagged object");
625     }
626 root 1.34
627     return;
628 root 1.6 }
629 root 1.34 }
630    
631     if (ecb_expect_false (SvREFCNT (sv) > 1)
632     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
633     {
634 root 1.63 if (ecb_expect_false (!enc->shareable))
635 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
636    
637     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
638    
639     if (SvOK (*svp))
640     {
641     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
642 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
643 root 1.34 return;
644     }
645     else
646     {
647     sv_setuv (*svp, enc->shareable_idx);
648     ++enc->shareable_idx;
649     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
650     }
651     }
652    
653     if (ecb_expect_false (SvOBJECT (sv)))
654     {
655     HV *stash = SvSTASH (sv);
656     GV *method;
657    
658 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
659     croak ("encountered object '%s', but forbid_objects is enabled",
660     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
661     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
662 root 1.1 {
663 root 1.11 dSP;
664    
665 root 1.50 ENTER; SAVETMPS;
666     PUSHMARK (SP);
667 root 1.6 // we re-bless the reference to get overload and other niceties right
668 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
669    
670     PUTBACK;
671     // G_SCALAR ensures that return value is 1
672     call_sv ((SV *)GvCV (method), G_SCALAR);
673     SPAGAIN;
674    
675     // catch this surprisingly common error
676     if (SvROK (TOPs) && SvRV (TOPs) == sv)
677     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
678    
679     encode_sv (enc, POPs);
680    
681     PUTBACK;
682    
683     FREETMPS; LEAVE;
684     }
685     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
686     {
687     dSP;
688 root 1.6
689 root 1.50 ENTER; SAVETMPS;
690     PUSHMARK (SP);
691 root 1.11 EXTEND (SP, 2);
692     // we re-bless the reference to get overload and other niceties right
693     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
694     PUSHs (sv_cbor);
695 root 1.1
696 root 1.11 PUTBACK;
697     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
698     SPAGAIN;
699 root 1.6
700 root 1.11 // catch this surprisingly common error
701     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
702     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
703 root 1.6
704 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
705 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
706 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
707 root 1.6
708 root 1.65 {
709     int i;
710    
711     for (i = 0; i < count; ++i)
712     encode_sv (enc, SP[i + 1 - count]);
713    
714     SP -= count;
715     }
716 root 1.6
717 root 1.11 PUTBACK;
718 root 1.6
719 root 1.11 FREETMPS; LEAVE;
720 root 1.1 }
721 root 1.11 else
722     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
723     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
724 root 1.1 }
725     else if (svt == SVt_PVHV)
726     encode_hv (enc, (HV *)sv);
727     else if (svt == SVt_PVAV)
728     encode_av (enc, (AV *)sv);
729 root 1.18 else
730 root 1.1 {
731 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
732     encode_sv (enc, sv);
733 root 1.1 }
734     }
735    
736     static void
737     encode_nv (enc_t *enc, SV *sv)
738     {
739     double nv = SvNVX (sv);
740    
741     need (enc, 9);
742    
743 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
744     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
745 root 1.1 //TODO: maybe I32?
746 root 1.5 else if (ecb_expect_false (nv == (float)nv))
747 root 1.66 encode_float32 (enc, nv);
748 root 1.1 else
749 root 1.66 encode_float64 (enc, nv);
750 root 1.1 }
751    
752     static void
753     encode_sv (enc_t *enc, SV *sv)
754     {
755     SvGETMAGIC (sv);
756    
757     if (SvPOKp (sv))
758     {
759     STRLEN len;
760     char *str = SvPV (sv, len);
761 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
762 root 1.1 }
763     else if (SvNOKp (sv))
764     encode_nv (enc, sv);
765     else if (SvIOKp (sv))
766 root 1.67 encode_int (enc, sv);
767 root 1.1 else if (SvROK (sv))
768     encode_rv (enc, SvRV (sv));
769     else if (!SvOK (sv))
770 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
771 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
772 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
773 root 1.1 else
774     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
775     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
776     }
777    
778     static SV *
779     encode_cbor (SV *scalar, CBOR *cbor)
780     {
781 root 1.48 enc_t enc = { 0 };
782 root 1.1
783 root 1.63 enc.cbor = *cbor;
784     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
785     enc.cur = SvPVX (enc.sv);
786     enc.end = SvEND (enc.sv);
787 root 1.1
788     SvPOK_only (enc.sv);
789 root 1.20
790 root 1.32 if (cbor->flags & F_PACK_STRINGS)
791 root 1.20 {
792     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
793     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
794     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
795     }
796    
797 root 1.1 encode_sv (&enc, scalar);
798    
799     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
800     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
801    
802     if (enc.cbor.flags & F_SHRINK)
803     shrink (enc.sv);
804    
805     return enc.sv;
806     }
807    
808     /////////////////////////////////////////////////////////////////////////////
809     // decoder
810    
811     // structure used for decoding CBOR
812     typedef struct
813     {
814     U8 *cur; // current parser pointer
815     U8 *end; // end of input string
816     const char *err; // parse error, if != 0
817     CBOR cbor;
818     U32 depth; // recursion depth
819     U32 maxdepth; // recursion depth limit
820 root 1.19 AV *shareable;
821 root 1.20 AV *stringref;
822 root 1.27 SV *decode_tagged;
823 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
824 root 1.1 } dec_t;
825    
826 root 1.59 // set dec->err to ERRSV
827     ecb_cold static void
828     err_errsv (dec_t *dec)
829     {
830     if (!dec->err)
831     {
832     dec->err_sv = newSVsv (ERRSV);
833    
834     // chop off the trailing \n
835     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
836     *SvEND (dec->err_sv) = 0;
837    
838     dec->err = SvPVutf8_nolen (dec->err_sv);
839     }
840     }
841 root 1.1
842 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
843     ecb_cold static void
844     err_set (dec_t *dec, const char *reason)
845     {
846     if (!dec->err)
847     dec->err = reason;
848     }
849    
850     ecb_cold static void
851     err_unexpected_end (dec_t *dec)
852     {
853     err_set (dec, "unexpected end of CBOR data");
854     }
855    
856     #define ERR_DO(do) SB do; goto fail; SE
857     #define ERR(reason) ERR_DO (err_set (dec, reason))
858     #define ERR_ERRSV ERR_DO (err_errsv (dec))
859    
860 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
861 root 1.1
862 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
863 root 1.1 #define DEC_DEC_DEPTH --dec->depth
864    
865     static UV
866     decode_uint (dec_t *dec)
867     {
868 root 1.35 U8 m = *dec->cur & MINOR_MASK;
869     ++dec->cur;
870    
871     if (ecb_expect_true (m < LENGTH_EXT1))
872     return m;
873 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
874     {
875     WANT (1);
876     dec->cur += 1;
877     return dec->cur[-1];
878     }
879     else if (ecb_expect_true (m == LENGTH_EXT2))
880     {
881     WANT (2);
882     dec->cur += 2;
883     return (((UV)dec->cur[-2]) << 8)
884     | ((UV)dec->cur[-1]);
885     }
886     else if (ecb_expect_true (m == LENGTH_EXT4))
887     {
888     WANT (4);
889     dec->cur += 4;
890     return (((UV)dec->cur[-4]) << 24)
891     | (((UV)dec->cur[-3]) << 16)
892     | (((UV)dec->cur[-2]) << 8)
893     | ((UV)dec->cur[-1]);
894     }
895     else if (ecb_expect_true (m == LENGTH_EXT8))
896 root 1.1 {
897 root 1.36 WANT (8);
898     dec->cur += 8;
899 root 1.34
900 root 1.36 return
901 root 1.34 #if UVSIZE < 8
902 root 1.36 0
903 root 1.34 #else
904 root 1.36 (((UV)dec->cur[-8]) << 56)
905     | (((UV)dec->cur[-7]) << 48)
906     | (((UV)dec->cur[-6]) << 40)
907     | (((UV)dec->cur[-5]) << 32)
908 root 1.34 #endif
909 root 1.36 | (((UV)dec->cur[-4]) << 24)
910     | (((UV)dec->cur[-3]) << 16)
911     | (((UV)dec->cur[-2]) << 8)
912     | ((UV)dec->cur[-1]);
913 root 1.1 }
914 root 1.36 else
915     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
916 root 1.1
917     fail:
918     return 0;
919     }
920    
921     static SV *decode_sv (dec_t *dec);
922    
923     static SV *
924     decode_av (dec_t *dec)
925     {
926     AV *av = newAV ();
927    
928     DEC_INC_DEPTH;
929    
930 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
931 root 1.1 {
932     ++dec->cur;
933    
934     for (;;)
935     {
936     WANT (1);
937    
938 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
939 root 1.1 {
940     ++dec->cur;
941     break;
942     }
943    
944     av_push (av, decode_sv (dec));
945     }
946     }
947     else
948     {
949 root 1.57 UV i, len = decode_uint (dec);
950 root 1.1
951 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
952 root 1.1 av_fill (av, len - 1);
953    
954     for (i = 0; i < len; ++i)
955     AvARRAY (av)[i] = decode_sv (dec);
956     }
957    
958     DEC_DEC_DEPTH;
959     return newRV_noinc ((SV *)av);
960    
961     fail:
962 root 1.63 SvREFCNT_dec_NN (av);
963 root 1.1 DEC_DEC_DEPTH;
964     return &PL_sv_undef;
965     }
966    
967 root 1.16 static void
968     decode_he (dec_t *dec, HV *hv)
969     {
970     // for speed reasons, we specialcase single-string
971 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
972    
973 root 1.23 if (ecb_expect_true (!dec->stringref))
974 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
975 root 1.21 {
976 root 1.56 STRLEN len = decode_uint (dec);
977 root 1.21 char *key = (char *)dec->cur;
978 root 1.16
979 root 1.49 WANT (len);
980 root 1.21 dec->cur += len;
981    
982 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
983 root 1.16
984 root 1.21 return;
985     }
986 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
987 root 1.21 {
988 root 1.56 STRLEN len = decode_uint (dec);
989 root 1.21 char *key = (char *)dec->cur;
990 root 1.16
991 root 1.49 WANT (len);
992 root 1.21 dec->cur += len;
993 root 1.20
994 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
995 root 1.72 if (!cbor_is_utf8_string ((U8 *)key, len))
996 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
997 root 1.16
998 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
999 root 1.16
1000 root 1.21 return;
1001     }
1002 root 1.20
1003 root 1.21 SV *k = decode_sv (dec);
1004     SV *v = decode_sv (dec);
1005 root 1.16
1006 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
1007     // methods, and this is hopefully the only place where it can happen,
1008     // so if there is a chance of an exception, take the very slow path.
1009     // since catching exceptions is "undocumented/internal/forbidden" by
1010     // the new p5p powers, we need to call out to a perl function :/
1011     if (ecb_expect_false (SvAMAGIC (k)))
1012     {
1013     dSP;
1014    
1015     ENTER; SAVETMPS;
1016     PUSHMARK (SP);
1017     EXTEND (SP, 3);
1018     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
1019     PUSHs (sv_2mortal (k));
1020     PUSHs (sv_2mortal (v));
1021    
1022     PUTBACK;
1023     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
1024     SPAGAIN;
1025    
1026     FREETMPS; LEAVE;
1027    
1028     if (SvTRUE (ERRSV))
1029     ERR_ERRSV;
1030    
1031     return;
1032     }
1033    
1034 root 1.21 hv_store_ent (hv, k, v, 0);
1035 root 1.63 SvREFCNT_dec_NN (k);
1036 root 1.38
1037     fail:
1038     ;
1039 root 1.16 }
1040    
1041 root 1.1 static SV *
1042     decode_hv (dec_t *dec)
1043     {
1044     HV *hv = newHV ();
1045    
1046     DEC_INC_DEPTH;
1047    
1048 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1049 root 1.1 {
1050     ++dec->cur;
1051    
1052     for (;;)
1053     {
1054     WANT (1);
1055    
1056 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1057 root 1.1 {
1058     ++dec->cur;
1059     break;
1060     }
1061    
1062 root 1.16 decode_he (dec, hv);
1063 root 1.1 }
1064     }
1065     else
1066     {
1067 root 1.57 UV pairs = decode_uint (dec);
1068    
1069     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1070 root 1.1
1071 root 1.16 while (pairs--)
1072     decode_he (dec, hv);
1073 root 1.1 }
1074    
1075     DEC_DEC_DEPTH;
1076     return newRV_noinc ((SV *)hv);
1077    
1078     fail:
1079 root 1.63 SvREFCNT_dec_NN (hv);
1080 root 1.1 DEC_DEC_DEPTH;
1081     return &PL_sv_undef;
1082     }
1083    
1084     static SV *
1085     decode_str (dec_t *dec, int utf8)
1086     {
1087 root 1.6 SV *sv = 0;
1088 root 1.1
1089 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1090 root 1.1 {
1091 root 1.33 // indefinite length strings
1092 root 1.1 ++dec->cur;
1093    
1094 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1095 root 1.33
1096 root 1.1 sv = newSVpvn ("", 0);
1097    
1098     for (;;)
1099     {
1100     WANT (1);
1101    
1102 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1103     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1104 root 1.33 {
1105     ++dec->cur;
1106     break;
1107     }
1108     else
1109     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1110    
1111     STRLEN len = decode_uint (dec);
1112 root 1.1
1113 root 1.33 WANT (len);
1114     sv_catpvn (sv, dec->cur, len);
1115     dec->cur += len;
1116 root 1.1 }
1117     }
1118     else
1119     {
1120     STRLEN len = decode_uint (dec);
1121    
1122     WANT (len);
1123     sv = newSVpvn (dec->cur, len);
1124     dec->cur += len;
1125 root 1.25
1126     if (ecb_expect_false (dec->stringref)
1127     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1128     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1129 root 1.1 }
1130    
1131     if (utf8)
1132 root 1.38 {
1133     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1134 root 1.72 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
1135 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1136    
1137     SvUTF8_on (sv);
1138     }
1139 root 1.1
1140     return sv;
1141    
1142     fail:
1143 root 1.6 SvREFCNT_dec (sv);
1144 root 1.1 return &PL_sv_undef;
1145     }
1146    
1147     static SV *
1148 root 1.3 decode_tagged (dec_t *dec)
1149     {
1150 root 1.19 SV *sv = 0;
1151 root 1.3 UV tag = decode_uint (dec);
1152 root 1.19
1153     WANT (1);
1154 root 1.3
1155 root 1.18 switch (tag)
1156     {
1157     case CBOR_TAG_MAGIC:
1158 root 1.20 sv = decode_sv (dec);
1159     break;
1160 root 1.18
1161     case CBOR_TAG_INDIRECTION:
1162 root 1.20 sv = newRV_noinc (decode_sv (dec));
1163     break;
1164    
1165     case CBOR_TAG_STRINGREF_NAMESPACE:
1166     {
1167 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1168 root 1.55 // erase mortalised caches, e.g. "shareable"
1169     ENTER;
1170 root 1.20
1171     SAVESPTR (dec->stringref);
1172     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1173    
1174     sv = decode_sv (dec);
1175    
1176 root 1.55 LEAVE;
1177 root 1.20 }
1178     break;
1179    
1180     case CBOR_TAG_STRINGREF:
1181     {
1182 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1183 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1184    
1185     UV idx = decode_uint (dec);
1186    
1187 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1188 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1189    
1190     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1191     }
1192     break;
1193 root 1.11
1194 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1195     {
1196     if (ecb_expect_false (!dec->shareable))
1197     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1198    
1199 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1200     {
1201     sv = newSV (0);
1202     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1203 root 1.18
1204 root 1.37 SV *osv = decode_sv (dec);
1205     sv_setsv (sv, osv);
1206     SvREFCNT_dec_NN (osv);
1207     }
1208     else
1209     {
1210     av_push (dec->shareable, &PL_sv_undef);
1211     int idx = AvFILLp (dec->shareable);
1212     sv = decode_sv (dec);
1213     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1214     }
1215 root 1.19 }
1216 root 1.20 break;
1217 root 1.18
1218     case CBOR_TAG_VALUE_SHAREDREF:
1219 root 1.17 {
1220 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1221 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1222 root 1.18
1223 root 1.19 UV idx = decode_uint (dec);
1224    
1225 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1226 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1227    
1228 root 1.76 sv = newSVsv (AvARRAY (dec->shareable)[idx]);
1229 root 1.37
1230     if (sv == &PL_sv_undef)
1231     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1232 root 1.17 }
1233 root 1.20 break;
1234 root 1.17
1235 root 1.18 case CBOR_TAG_PERL_OBJECT:
1236     {
1237 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1238     goto filter;
1239    
1240 root 1.19 sv = decode_sv (dec);
1241    
1242 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1243     ERR ("corrupted CBOR data (non-array perl object)");
1244    
1245     AV *av = (AV *)SvRV (sv);
1246     int len = av_len (av) + 1;
1247     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1248    
1249     if (!stash)
1250     ERR ("cannot decode perl-object (package does not exist)");
1251    
1252     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1253    
1254     if (!method)
1255     ERR ("cannot decode perl-object (package does not have a THAW method)");
1256    
1257     dSP;
1258    
1259 root 1.50 ENTER; SAVETMPS;
1260     PUSHMARK (SP);
1261 root 1.18 EXTEND (SP, len + 1);
1262     // we re-bless the reference to get overload and other niceties right
1263     PUSHs (*av_fetch (av, 0, 1));
1264     PUSHs (sv_cbor);
1265    
1266     int i;
1267    
1268     for (i = 1; i < len; ++i)
1269     PUSHs (*av_fetch (av, i, 1));
1270    
1271     PUTBACK;
1272     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1273     SPAGAIN;
1274    
1275     if (SvTRUE (ERRSV))
1276     {
1277     FREETMPS; LEAVE;
1278 root 1.59 ERR_ERRSV;
1279 root 1.18 }
1280    
1281 root 1.63 SvREFCNT_dec_NN (sv);
1282 root 1.18 sv = SvREFCNT_inc (POPs);
1283 root 1.11
1284 root 1.18 PUTBACK;
1285 root 1.11
1286 root 1.18 FREETMPS; LEAVE;
1287     }
1288 root 1.20 break;
1289 root 1.9
1290 root 1.18 default:
1291 root 1.60 filter:
1292 root 1.18 {
1293 root 1.58 SV *tag_sv = newSVuv (tag);
1294    
1295 root 1.19 sv = decode_sv (dec);
1296    
1297 root 1.27 dSP;
1298 root 1.50 ENTER; SAVETMPS;
1299     PUSHMARK (SP);
1300 root 1.27 EXTEND (SP, 2);
1301 root 1.58 PUSHs (tag_sv);
1302 root 1.27 PUSHs (sv);
1303    
1304     PUTBACK;
1305     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1306     SPAGAIN;
1307    
1308     if (SvTRUE (ERRSV))
1309     {
1310 root 1.63 SvREFCNT_dec_NN (tag_sv);
1311 root 1.27 FREETMPS; LEAVE;
1312 root 1.59 ERR_ERRSV;
1313 root 1.27 }
1314    
1315     if (count)
1316     {
1317 root 1.63 SvREFCNT_dec_NN (tag_sv);
1318     SvREFCNT_dec_NN (sv);
1319 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1320     SP -= count;
1321 root 1.27 }
1322     else
1323     {
1324     AV *av = newAV ();
1325 root 1.58 av_push (av, tag_sv);
1326 root 1.27 av_push (av, sv);
1327    
1328     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1329     ? cbor_tagged_stash
1330     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1331     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1332     }
1333 root 1.7
1334 root 1.27 PUTBACK;
1335    
1336     FREETMPS; LEAVE;
1337 root 1.18 }
1338 root 1.20 break;
1339 root 1.11 }
1340 root 1.9
1341 root 1.20 return sv;
1342    
1343 root 1.9 fail:
1344     SvREFCNT_dec (sv);
1345     return &PL_sv_undef;
1346 root 1.3 }
1347    
1348     static SV *
1349 root 1.1 decode_sv (dec_t *dec)
1350     {
1351     WANT (1);
1352    
1353 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1354 root 1.1 {
1355 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1356     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1357     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1358     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1359     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1360     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1361     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1362    
1363     case MAJOR_MISC >> MAJOR_SHIFT:
1364     switch (*dec->cur++ & MINOR_MASK)
1365 root 1.1 {
1366 root 1.35 case SIMPLE_FALSE:
1367 root 1.1 #if CBOR_SLOW
1368 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1369 root 1.1 #endif
1370 root 1.10 return newSVsv (types_false);
1371 root 1.35 case SIMPLE_TRUE:
1372 root 1.1 #if CBOR_SLOW
1373 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1374 root 1.1 #endif
1375 root 1.10 return newSVsv (types_true);
1376 root 1.35 case SIMPLE_NULL:
1377 root 1.1 return newSVsv (&PL_sv_undef);
1378 root 1.35 case SIMPLE_UNDEF:
1379 root 1.10 #if CBOR_SLOW
1380     types_error = get_bool ("Types::Serialiser::error");
1381     #endif
1382     return newSVsv (types_error);
1383 root 1.1
1384 root 1.35 case MISC_FLOAT16:
1385 root 1.2 {
1386     WANT (2);
1387    
1388     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1389     dec->cur += 2;
1390    
1391     return newSVnv (ecb_binary16_to_float (fp));
1392     }
1393 root 1.1
1394 root 1.35 case MISC_FLOAT32:
1395 root 1.1 {
1396     uint32_t fp;
1397     WANT (4);
1398     memcpy (&fp, dec->cur, 4);
1399     dec->cur += 4;
1400    
1401     if (!ecb_big_endian ())
1402     fp = ecb_bswap32 (fp);
1403    
1404     return newSVnv (ecb_binary32_to_float (fp));
1405     }
1406    
1407 root 1.35 case MISC_FLOAT64:
1408 root 1.1 {
1409     uint64_t fp;
1410     WANT (8);
1411     memcpy (&fp, dec->cur, 8);
1412     dec->cur += 8;
1413    
1414     if (!ecb_big_endian ())
1415     fp = ecb_bswap64 (fp);
1416    
1417     return newSVnv (ecb_binary64_to_double (fp));
1418     }
1419    
1420 root 1.35 // 0..19 unassigned simple
1421 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1422     // 28-30 unassigned misc
1423     // 31 break code
1424 root 1.1 default:
1425 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1426 root 1.1 }
1427    
1428     break;
1429     }
1430    
1431     fail:
1432     return &PL_sv_undef;
1433     }
1434    
1435     static SV *
1436     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1437     {
1438 root 1.48 dec_t dec = { 0 };
1439 root 1.1 SV *sv;
1440 root 1.16 STRLEN len;
1441     char *data = SvPVbyte (string, len);
1442 root 1.1
1443 root 1.16 if (len > cbor->max_size && cbor->max_size)
1444     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1445     (unsigned long)len, (unsigned long)cbor->max_size);
1446 root 1.1
1447     dec.cbor = *cbor;
1448 root 1.16 dec.cur = (U8 *)data;
1449     dec.end = (U8 *)data + len;
1450 root 1.1
1451     sv = decode_sv (&dec);
1452    
1453     if (offset_return)
1454     *offset_return = dec.cur;
1455    
1456     if (!(offset_return || !sv))
1457 root 1.2 if (dec.cur != dec.end && !dec.err)
1458     dec.err = "garbage after CBOR object";
1459    
1460     if (dec.err)
1461 root 1.1 {
1462 root 1.39 if (dec.shareable)
1463     {
1464 root 1.58 // need to break cyclic links, which would all be in shareable
1465 root 1.39 int i;
1466     SV **svp;
1467    
1468     for (i = av_len (dec.shareable) + 1; i--; )
1469     if ((svp = av_fetch (dec.shareable, i, 0)))
1470     sv_setsv (*svp, &PL_sv_undef);
1471     }
1472    
1473 root 1.63 SvREFCNT_dec_NN (sv);
1474 root 1.59
1475     if (dec.err_sv)
1476     sv_2mortal (dec.err_sv);
1477    
1478 root 1.74 croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur);
1479 root 1.1 }
1480    
1481     sv = sv_2mortal (sv);
1482    
1483     return sv;
1484     }
1485    
1486     /////////////////////////////////////////////////////////////////////////////
1487 root 1.40 // incremental parser
1488    
1489     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1490    
1491     // returns 0 for notyet, 1 for success or error
1492     static int
1493     incr_parse (CBOR *self, SV *cborstr)
1494     {
1495     STRLEN cur;
1496     SvPV (cborstr, cur);
1497    
1498     while (ecb_expect_true (self->incr_need <= cur))
1499     {
1500     // table of integer count bytes
1501     static I8 incr_len[MINOR_MASK + 1] = {
1502     0, 0, 0, 0, 0, 0, 0, 0,
1503     0, 0, 0, 0, 0, 0, 0, 0,
1504     0, 0, 0, 0, 0, 0, 0, 0,
1505     1, 2, 4, 8,-1,-1,-1,-2
1506     };
1507    
1508     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1509     U8 m = *p & MINOR_MASK;
1510     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1511     I8 ilen = incr_len[m];
1512    
1513     self->incr_need = self->incr_pos + 1;
1514    
1515     if (ecb_expect_false (ilen < 0))
1516     {
1517     if (m != MINOR_INDEF)
1518     return 1; // error
1519    
1520     if (*p == (MAJOR_MISC | MINOR_INDEF))
1521     {
1522     if (count >= 0)
1523     return 1; // error
1524    
1525     count = 1;
1526     }
1527     else
1528     {
1529     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1530     count = -1;
1531     }
1532     }
1533     else
1534     {
1535     self->incr_need += ilen;
1536     if (ecb_expect_false (self->incr_need > cur))
1537     return 0;
1538    
1539     int major = *p >> MAJOR_SHIFT;
1540    
1541     switch (major)
1542     {
1543 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1544     ++count; // tags merely prefix another value
1545     break;
1546    
1547 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1548     case MAJOR_TEXT >> MAJOR_SHIFT:
1549     case MAJOR_ARRAY >> MAJOR_SHIFT:
1550     case MAJOR_MAP >> MAJOR_SHIFT:
1551     {
1552     UV len;
1553    
1554     if (ecb_expect_false (ilen))
1555     {
1556     len = 0;
1557    
1558     do {
1559     len = (len << 8) | *++p;
1560     } while (--ilen);
1561     }
1562     else
1563     len = m;
1564    
1565     switch (major)
1566     {
1567     case MAJOR_BYTES >> MAJOR_SHIFT:
1568     case MAJOR_TEXT >> MAJOR_SHIFT:
1569     self->incr_need += len;
1570     if (ecb_expect_false (self->incr_need > cur))
1571     return 0;
1572    
1573     break;
1574    
1575     case MAJOR_MAP >> MAJOR_SHIFT:
1576     len <<= 1;
1577 root 1.71 /* FALLTHROUGH */
1578 root 1.40 case MAJOR_ARRAY >> MAJOR_SHIFT:
1579     if (len)
1580     {
1581     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1582     count = len + 1;
1583     }
1584     break;
1585     }
1586     }
1587     }
1588     }
1589    
1590     self->incr_pos = self->incr_need;
1591    
1592     if (count > 0)
1593     {
1594     while (!--count)
1595     {
1596     if (!AvFILLp (self->incr_count))
1597     return 1; // done
1598    
1599     SvREFCNT_dec_NN (av_pop (self->incr_count));
1600     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1601     }
1602    
1603     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1604     }
1605     }
1606    
1607     return 0;
1608     }
1609    
1610    
1611     /////////////////////////////////////////////////////////////////////////////
1612 root 1.1 // XS interface functions
1613    
1614     MODULE = CBOR::XS PACKAGE = CBOR::XS
1615    
1616     BOOT:
1617     {
1618     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1619 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1620 root 1.1
1621 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1622     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1623    
1624     types_true = get_bool ("Types::Serialiser::true" );
1625     types_false = get_bool ("Types::Serialiser::false");
1626     types_error = get_bool ("Types::Serialiser::error");
1627 root 1.11
1628 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1629    
1630 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1631     SvREADONLY_on (sv_cbor);
1632 root 1.56
1633     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1634 root 1.1 }
1635    
1636     PROTOTYPES: DISABLE
1637    
1638     void CLONE (...)
1639     CODE:
1640 root 1.10 cbor_stash = 0;
1641     cbor_tagged_stash = 0;
1642     types_error_stash = 0;
1643     types_boolean_stash = 0;
1644 root 1.1
1645     void new (char *klass)
1646     PPCODE:
1647     {
1648     SV *pv = NEWSV (0, sizeof (CBOR));
1649     SvPOK_only (pv);
1650     cbor_init ((CBOR *)SvPVX (pv));
1651     XPUSHs (sv_2mortal (sv_bless (
1652     newRV_noinc (pv),
1653     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1654     )));
1655     }
1656    
1657     void shrink (CBOR *self, int enable = 1)
1658     ALIAS:
1659     shrink = F_SHRINK
1660     allow_unknown = F_ALLOW_UNKNOWN
1661 root 1.18 allow_sharing = F_ALLOW_SHARING
1662 root 1.37 allow_cycles = F_ALLOW_CYCLES
1663 root 1.60 forbid_objects = F_FORBID_OBJECTS
1664 root 1.32 pack_strings = F_PACK_STRINGS
1665 root 1.54 text_keys = F_TEXT_KEYS
1666     text_strings = F_TEXT_STRINGS
1667 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1668 root 1.1 PPCODE:
1669     {
1670     if (enable)
1671     self->flags |= ix;
1672     else
1673     self->flags &= ~ix;
1674    
1675     XPUSHs (ST (0));
1676     }
1677    
1678     void get_shrink (CBOR *self)
1679     ALIAS:
1680     get_shrink = F_SHRINK
1681     get_allow_unknown = F_ALLOW_UNKNOWN
1682 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1683 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1684 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1685 root 1.32 get_pack_strings = F_PACK_STRINGS
1686 root 1.54 get_text_keys = F_TEXT_KEYS
1687     get_text_strings = F_TEXT_STRINGS
1688 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1689 root 1.1 PPCODE:
1690     XPUSHs (boolSV (self->flags & ix));
1691    
1692     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1693     PPCODE:
1694     self->max_depth = max_depth;
1695     XPUSHs (ST (0));
1696    
1697     U32 get_max_depth (CBOR *self)
1698     CODE:
1699     RETVAL = self->max_depth;
1700     OUTPUT:
1701     RETVAL
1702    
1703     void max_size (CBOR *self, U32 max_size = 0)
1704     PPCODE:
1705     self->max_size = max_size;
1706     XPUSHs (ST (0));
1707    
1708     int get_max_size (CBOR *self)
1709     CODE:
1710     RETVAL = self->max_size;
1711     OUTPUT:
1712     RETVAL
1713    
1714 root 1.27 void filter (CBOR *self, SV *filter = 0)
1715     PPCODE:
1716     SvREFCNT_dec (self->filter);
1717     self->filter = filter ? newSVsv (filter) : filter;
1718     XPUSHs (ST (0));
1719    
1720     SV *get_filter (CBOR *self)
1721     CODE:
1722     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1723     OUTPUT:
1724     RETVAL
1725    
1726 root 1.1 void encode (CBOR *self, SV *scalar)
1727     PPCODE:
1728     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1729     XPUSHs (scalar);
1730    
1731     void decode (CBOR *self, SV *cborstr)
1732     PPCODE:
1733     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1734     XPUSHs (cborstr);
1735    
1736     void decode_prefix (CBOR *self, SV *cborstr)
1737     PPCODE:
1738     {
1739     SV *sv;
1740     char *offset;
1741     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1742     EXTEND (SP, 2);
1743     PUSHs (sv);
1744     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1745     }
1746    
1747 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1748 root 1.42 ALIAS:
1749     incr_parse_multiple = 1
1750 root 1.40 PPCODE:
1751     {
1752     if (SvUTF8 (cborstr))
1753     sv_utf8_downgrade (cborstr, 0);
1754    
1755     if (!self->incr_count)
1756     {
1757     self->incr_count = newAV ();
1758     self->incr_pos = 0;
1759     self->incr_need = 1;
1760    
1761     av_push (self->incr_count, newSViv (1));
1762     }
1763    
1764 root 1.41 do
1765 root 1.40 {
1766     if (!incr_parse (self, cborstr))
1767     {
1768     if (self->incr_need > self->max_size && self->max_size)
1769     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1770     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1771    
1772     break;
1773     }
1774    
1775 root 1.41 SV *sv;
1776     char *offset;
1777 root 1.40
1778 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1779     XPUSHs (sv);
1780 root 1.40
1781 root 1.41 sv_chop (cborstr, offset);
1782 root 1.40
1783 root 1.41 av_clear (self->incr_count);
1784     av_push (self->incr_count, newSViv (1));
1785 root 1.40
1786 root 1.41 self->incr_pos = 0;
1787     self->incr_need = self->incr_pos + 1;
1788 root 1.40 }
1789 root 1.42 while (ix);
1790 root 1.40 }
1791    
1792     void incr_reset (CBOR *self)
1793     CODE:
1794     {
1795     SvREFCNT_dec (self->incr_count);
1796     self->incr_count = 0;
1797     }
1798    
1799 root 1.27 void DESTROY (CBOR *self)
1800     PPCODE:
1801     cbor_free (self);
1802    
1803 root 1.1 PROTOTYPES: ENABLE
1804    
1805     void encode_cbor (SV *scalar)
1806 root 1.36 ALIAS:
1807     encode_cbor = 0
1808     encode_cbor_sharing = F_ALLOW_SHARING
1809 root 1.1 PPCODE:
1810     {
1811     CBOR cbor;
1812     cbor_init (&cbor);
1813 root 1.36 cbor.flags |= ix;
1814 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1815     XPUSHs (scalar);
1816     }
1817    
1818     void decode_cbor (SV *cborstr)
1819     PPCODE:
1820     {
1821     CBOR cbor;
1822     cbor_init (&cbor);
1823     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1824     XPUSHs (cborstr);
1825     }
1826    
1827 root 1.64 #ifdef __AFL_COMPILER
1828    
1829     void
1830     afl_init ()
1831     CODE:
1832     __AFL_INIT ();
1833    
1834     int
1835     afl_loop (unsigned int count = 10000)
1836     CODE:
1837     RETVAL = __AFL_LOOP (count);
1838     OUTPUT:
1839     RETVAL
1840    
1841     #endif
1842