ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.77
Committed: Fri Sep 8 05:47:10 2023 UTC (8 months, 1 week ago) by root
Branch: MAIN
Changes since 1.76: +29 -7 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.72 // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34     #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
35    
36 root 1.35 // known major and minor types
37     enum cbor_type
38     {
39     MAJOR_SHIFT = 5,
40     MINOR_MASK = 0x1f,
41    
42     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
43     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
44     MAJOR_BYTES = 2 << MAJOR_SHIFT,
45     MAJOR_TEXT = 3 << MAJOR_SHIFT,
46     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
47     MAJOR_MAP = 5 << MAJOR_SHIFT,
48     MAJOR_TAG = 6 << MAJOR_SHIFT,
49     MAJOR_MISC = 7 << MAJOR_SHIFT,
50    
51     // INT/STRING/ARRAY/MAP subtypes
52     LENGTH_EXT1 = 24,
53     LENGTH_EXT2 = 25,
54     LENGTH_EXT4 = 26,
55     LENGTH_EXT8 = 27,
56    
57     // SIMPLE types (effectively MISC subtypes)
58     SIMPLE_FALSE = 20,
59     SIMPLE_TRUE = 21,
60     SIMPLE_NULL = 22,
61     SIMPLE_UNDEF = 23,
62    
63     // MISC subtype (unused)
64     MISC_EXT1 = 24,
65     MISC_FLOAT16 = 25,
66     MISC_FLOAT32 = 26,
67     MISC_FLOAT64 = 27,
68    
69     // BYTES/TEXT/ARRAY/MAP
70     MINOR_INDEF = 31,
71     };
72    
73 root 1.9 // known tags
74 root 1.8 enum cbor_tag
75     {
76 root 1.35 // extensions
77     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
78     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
79     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
80     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
81     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
82     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
83     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
84    
85     // rfc7049
86     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
87     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
88     CBOR_TAG_POS_BIGNUM = 2, // byte string
89     CBOR_TAG_NEG_BIGNUM = 3, // byte string
90     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
91     CBOR_TAG_BIGFLOAT = 5, // array
92    
93     CBOR_TAG_CONV_B64U = 21, // base64url, any
94     CBOR_TAG_CONV_B64 = 22, // base64, any
95     CBOR_TAG_CONV_HEX = 23, // base16, any
96     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
97    
98     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
99     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
100     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
101     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
102     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
103 root 1.8
104 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
105 root 1.8 };
106    
107 root 1.66 // known forced types, also hardcoded in CBOR.pm
108     enum
109     {
110     AS_CBOR = 0,
111 root 1.68 AS_INT = 1,
112     AS_BYTES = 2,
113     AS_TEXT = 3,
114 root 1.67 AS_FLOAT16 = 4,
115     AS_FLOAT32 = 5,
116     AS_FLOAT64 = 6,
117 root 1.70 AS_MAP = 7,
118 root 1.67 // possibly future enhancements: (generic) float, (generic) string
119 root 1.66 };
120    
121 root 1.24 #define F_SHRINK 0x00000001UL
122     #define F_ALLOW_UNKNOWN 0x00000002UL
123 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
124 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
125 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
126     #define F_PACK_STRINGS 0x00000020UL
127     #define F_TEXT_KEYS 0x00000040UL
128     #define F_TEXT_STRINGS 0x00000080UL
129     #define F_VALIDATE_UTF8 0x00000100UL
130 root 1.1
131     #define INIT_SIZE 32 // initial scalar size to be allocated
132    
133     #define SB do {
134     #define SE } while (0)
135    
136     #define IN_RANGE_INC(type,val,beg,end) \
137     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
138     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
139    
140     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
141    
142     #ifdef USE_ITHREADS
143     # define CBOR_SLOW 1
144     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
145     #else
146     # define CBOR_SLOW 0
147     # define CBOR_STASH cbor_stash
148     #endif
149    
150 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
151 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
152 root 1.1
153     typedef struct {
154     U32 flags;
155     U32 max_depth;
156     STRLEN max_size;
157 root 1.27 SV *filter;
158 root 1.40
159     // for the incremental parser
160     STRLEN incr_pos; // the current offset into the text
161     STRLEN incr_need; // minimum bytes needed to decode
162     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
163 root 1.1 } CBOR;
164    
165 root 1.5 ecb_inline void
166 root 1.1 cbor_init (CBOR *cbor)
167     {
168     Zero (cbor, 1, CBOR);
169     cbor->max_depth = 512;
170     }
171    
172 root 1.27 ecb_inline void
173     cbor_free (CBOR *cbor)
174     {
175     SvREFCNT_dec (cbor->filter);
176 root 1.40 SvREFCNT_dec (cbor->incr_count);
177 root 1.27 }
178    
179 root 1.1 /////////////////////////////////////////////////////////////////////////////
180     // utility functions
181    
182 root 1.5 ecb_inline SV *
183 root 1.1 get_bool (const char *name)
184     {
185     SV *sv = get_sv (name, 1);
186    
187     SvREADONLY_on (sv);
188     SvREADONLY_on (SvRV (sv));
189    
190     return sv;
191     }
192    
193 root 1.5 ecb_inline void
194 root 1.1 shrink (SV *sv)
195     {
196     sv_utf8_downgrade (sv, 1);
197    
198     if (SvLEN (sv) > SvCUR (sv) + 1)
199     {
200     #ifdef SvPV_shrink_to_cur
201     SvPV_shrink_to_cur (sv);
202     #elif defined (SvPV_renew)
203     SvPV_renew (sv, SvCUR (sv) + 1);
204     #endif
205     }
206     }
207    
208 root 1.21 // minimum length of a string to be registered for stringref
209 root 1.71 ecb_inline STRLEN
210 root 1.21 minimum_string_length (UV idx)
211     {
212 root 1.61 return idx <= 23 ? 3
213     : idx <= 0xffU ? 4
214     : idx <= 0xffffU ? 5
215     : idx <= 0xffffffffU ? 7
216     : 11;
217 root 1.21 }
218    
219 root 1.1 /////////////////////////////////////////////////////////////////////////////
220     // encoder
221    
222     // structure used for encoding CBOR
223     typedef struct
224     {
225     char *cur; // SvPVX (sv) + current output position
226     char *end; // SvEND (sv)
227     SV *sv; // result scalar
228     CBOR cbor;
229     U32 depth; // recursion level
230 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
231     UV stringref_idx;
232 root 1.19 HV *shareable; // ptr => index, or 0
233     UV shareable_idx;
234 root 1.1 } enc_t;
235    
236 root 1.5 ecb_inline void
237 root 1.1 need (enc_t *enc, STRLEN len)
238     {
239 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
240 root 1.1 {
241     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
242     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
243     enc->cur = SvPVX (enc->sv) + cur;
244     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
245     }
246     }
247    
248 root 1.70 static void encode_sv (enc_t *enc, SV *sv);
249    
250 root 1.5 ecb_inline void
251 root 1.1 encode_ch (enc_t *enc, char ch)
252     {
253     need (enc, 1);
254     *enc->cur++ = ch;
255     }
256    
257 root 1.66 // used for tags, intregers, element counts and so on
258 root 1.1 static void
259     encode_uint (enc_t *enc, int major, UV len)
260     {
261     need (enc, 9);
262    
263 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
264 root 1.1 *enc->cur++ = major | len;
265 root 1.36 else if (ecb_expect_true (len <= 0xffU))
266 root 1.1 {
267 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
268 root 1.1 *enc->cur++ = len;
269     }
270 root 1.36 else if (len <= 0xffffU)
271 root 1.1 {
272 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
273 root 1.1 *enc->cur++ = len >> 8;
274     *enc->cur++ = len;
275     }
276 root 1.36 else if (len <= 0xffffffffU)
277 root 1.1 {
278 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
279 root 1.1 *enc->cur++ = len >> 24;
280     *enc->cur++ = len >> 16;
281     *enc->cur++ = len >> 8;
282     *enc->cur++ = len;
283     }
284 root 1.4 else
285 root 1.1 {
286 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
287 root 1.1 *enc->cur++ = len >> 56;
288     *enc->cur++ = len >> 48;
289     *enc->cur++ = len >> 40;
290     *enc->cur++ = len >> 32;
291     *enc->cur++ = len >> 24;
292     *enc->cur++ = len >> 16;
293     *enc->cur++ = len >> 8;
294     *enc->cur++ = len;
295     }
296     }
297    
298 root 1.66 // encodes a perl value into a CBOR integer
299 root 1.67 ecb_inline void
300 root 1.66 encode_int (enc_t *enc, SV *sv)
301     {
302     if (SvIsUV (sv))
303     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
304     else if (SvIVX (sv) >= 0)
305     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
306     else
307     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
308     }
309    
310 root 1.21 ecb_inline void
311     encode_tag (enc_t *enc, UV tag)
312     {
313 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
314 root 1.21 }
315    
316 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
317     ecb_noinline static void
318     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
319     {
320     STRLEN ulen = len;
321     U8 *p, *pend = (U8 *)str + len;
322    
323     for (p = (U8 *)str; p < pend; ++p)
324     ulen += *p >> 7; // count set high bits
325    
326     encode_uint (enc, MAJOR_TEXT, ulen);
327    
328     need (enc, ulen);
329     for (p = (U8 *)str; p < pend; ++p)
330     if (*p < 0x80)
331     *enc->cur++ = *p;
332     else
333     {
334     *enc->cur++ = 0xc0 + (*p >> 6);
335     *enc->cur++ = 0x80 + (*p & 63);
336     }
337     }
338    
339 root 1.30 ecb_inline void
340 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
341 root 1.30 {
342 root 1.53 if (ecb_expect_false (upgrade_utf8))
343 root 1.51 if (!utf8)
344     {
345 root 1.53 encode_str_utf8 (enc, utf8, str, len);
346 root 1.51 return;
347     }
348    
349 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
350 root 1.30 need (enc, len);
351     memcpy (enc->cur, str, len);
352     enc->cur += len;
353     }
354    
355 root 1.54 ecb_inline void
356 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
357 root 1.1 {
358 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
359 root 1.21 {
360 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
361 root 1.21
362     if (SvOK (*svp))
363     {
364     // already registered, use stringref
365     encode_tag (enc, CBOR_TAG_STRINGREF);
366 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
367 root 1.21 return;
368     }
369     else if (len >= minimum_string_length (enc->stringref_idx))
370     {
371     // register only
372     sv_setuv (*svp, enc->stringref_idx);
373     ++enc->stringref_idx;
374     }
375     }
376    
377 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
378 root 1.1 }
379    
380 root 1.66 ecb_inline void
381     encode_float16 (enc_t *enc, NV nv)
382     {
383     need (enc, 1+2);
384    
385     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
386    
387     uint16_t fp = ecb_float_to_binary16 (nv);
388    
389     if (!ecb_big_endian ())
390     fp = ecb_bswap16 (fp);
391    
392     memcpy (enc->cur, &fp, 2);
393     enc->cur += 2;
394     }
395    
396     ecb_inline void
397     encode_float32 (enc_t *enc, NV nv)
398     {
399     need (enc, 1+4);
400    
401     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
402    
403     uint32_t fp = ecb_float_to_binary32 (nv);
404    
405     if (!ecb_big_endian ())
406     fp = ecb_bswap32 (fp);
407    
408     memcpy (enc->cur, &fp, 4);
409     enc->cur += 4;
410     }
411    
412     ecb_inline void
413     encode_float64 (enc_t *enc, NV nv)
414     {
415     need (enc, 1+8);
416    
417     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
418    
419     uint64_t fp = ecb_double_to_binary64 (nv);
420    
421     if (!ecb_big_endian ())
422     fp = ecb_bswap64 (fp);
423    
424     memcpy (enc->cur, &fp, 8);
425     enc->cur += 8;
426     }
427    
428     ecb_inline void
429 root 1.69 encode_bool (enc_t *enc, int istrue)
430     {
431     encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
432     }
433    
434 root 1.70 // encodes an arrayref containing key-value pairs as CBOR map
435     ecb_inline void
436     encode_array_as_map (enc_t *enc, SV *sv)
437     {
438     if (enc->depth >= enc->cbor.max_depth)
439     croak (ERR_NESTING_EXCEEDED);
440    
441     ++enc->depth;
442    
443     // as_map does error checking for us, but we re-check in case
444     // things have changed.
445    
446     if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
447     croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
448    
449     AV *av = (AV *)SvRV (sv);
450     int i, len = av_len (av);
451    
452     if (!(len & 1))
453     croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
454    
455     encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
456    
457     for (i = 0; i <= len; ++i)
458     {
459     SV **svp = av_fetch (av, i, 0);
460     encode_sv (enc, svp ? *svp : &PL_sv_undef);
461     }
462    
463     --enc->depth;
464     }
465    
466 root 1.69 ecb_inline void
467 root 1.66 encode_forced (enc_t *enc, UV type, SV *sv)
468     {
469     switch (type)
470     {
471     case AS_CBOR:
472     {
473     STRLEN len;
474     char *str = SvPVbyte (sv, len);
475    
476     need (enc, len);
477     memcpy (enc->cur, str, len);
478     enc->cur += len;
479     }
480     break;
481    
482     case AS_BYTES:
483     {
484     STRLEN len;
485     char *str = SvPVbyte (sv, len);
486     encode_strref (enc, 0, 0, str, len);
487     }
488     break;
489    
490     case AS_TEXT:
491     {
492     STRLEN len;
493     char *str = SvPVutf8 (sv, len);
494     encode_strref (enc, 1, 1, str, len);
495     }
496     break;
497    
498 root 1.69 case AS_INT: encode_int (enc, sv); break;
499    
500 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
501     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
502     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
503    
504 root 1.70 case AS_MAP: encode_array_as_map (enc, sv); break;
505    
506 root 1.66 default:
507     croak ("encountered malformed CBOR::XS::Tagged object");
508     }
509     }
510    
511 root 1.1 static void
512     encode_av (enc_t *enc, AV *av)
513     {
514     int i, len = av_len (av);
515    
516     if (enc->depth >= enc->cbor.max_depth)
517     croak (ERR_NESTING_EXCEEDED);
518    
519     ++enc->depth;
520    
521 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
522 root 1.1
523 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
524 root 1.45 for (i = 0; i <= len; ++i)
525     {
526     SV **svp = av_fetch (av, i, 0);
527     encode_sv (enc, svp ? *svp : &PL_sv_undef);
528     }
529     else
530     for (i = 0; i <= len; ++i)
531 root 1.46 {
532     SV *sv = AvARRAY (av)[i];
533     encode_sv (enc, sv ? sv : &PL_sv_undef);
534     }
535 root 1.1
536     --enc->depth;
537     }
538    
539     static void
540     encode_hv (enc_t *enc, HV *hv)
541     {
542     HE *he;
543    
544     if (enc->depth >= enc->cbor.max_depth)
545     croak (ERR_NESTING_EXCEEDED);
546    
547     ++enc->depth;
548    
549     int pairs = hv_iterinit (hv);
550     int mg = SvMAGICAL (hv);
551    
552 root 1.63 if (ecb_expect_false (mg))
553 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
554 root 1.1 else
555 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
556 root 1.1
557     while ((he = hv_iternext (hv)))
558     {
559 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
560     encode_sv (enc, HeSVKEY (he));
561 root 1.1 else
562 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
563 root 1.1
564 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
565 root 1.1 }
566    
567 root 1.63 if (ecb_expect_false (mg))
568 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
569 root 1.1
570     --enc->depth;
571     }
572    
573     // encode objects, arrays and special \0=false and \1=true values.
574     static void
575     encode_rv (enc_t *enc, SV *sv)
576     {
577 root 1.19 SvGETMAGIC (sv);
578 root 1.1
579 root 1.19 svtype svt = SvTYPE (sv);
580 root 1.18
581 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
582 root 1.1 {
583 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
584     ? types_boolean_stash
585     : gv_stashpv ("Types::Serialiser::Boolean", 1);
586     HV *error_stash = !CBOR_SLOW || types_error_stash
587     ? types_error_stash
588     : gv_stashpv ("Types::Serialiser::Error", 1);
589 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
590     ? cbor_tagged_stash
591     : gv_stashpv ("CBOR::XS::Tagged" , 1);
592 root 1.1
593 root 1.11 HV *stash = SvSTASH (sv);
594    
595     if (stash == boolean_stash)
596 root 1.34 {
597 root 1.69 encode_bool (enc, SvIV (sv));
598 root 1.34 return;
599     }
600 root 1.11 else if (stash == error_stash)
601 root 1.34 {
602 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
603 root 1.34 return;
604     }
605 root 1.11 else if (stash == tagged_stash)
606 root 1.6 {
607     if (svt != SVt_PVAV)
608     croak ("encountered CBOR::XS::Tagged object that isn't an array");
609    
610 root 1.66 switch (av_len ((AV *)sv))
611     {
612     case 2-1:
613     // actually a tagged value
614     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
615     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
616     break;
617    
618     case 3-1:
619     // a forced type [value, type, undef]
620     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
621     break;
622    
623     default:
624     croak ("encountered malformed CBOR::XS::Tagged object");
625     }
626 root 1.34
627     return;
628 root 1.6 }
629 root 1.34 }
630    
631     if (ecb_expect_false (SvREFCNT (sv) > 1)
632     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
633     {
634 root 1.63 if (ecb_expect_false (!enc->shareable))
635 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
636    
637     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
638    
639     if (SvOK (*svp))
640     {
641     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
642 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
643 root 1.34 return;
644     }
645     else
646     {
647     sv_setuv (*svp, enc->shareable_idx);
648     ++enc->shareable_idx;
649     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
650     }
651     }
652    
653     if (ecb_expect_false (SvOBJECT (sv)))
654     {
655     HV *stash = SvSTASH (sv);
656     GV *method;
657    
658 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
659     croak ("encountered object '%s', but forbid_objects is enabled",
660     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
661     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
662 root 1.1 {
663 root 1.11 dSP;
664    
665 root 1.50 ENTER; SAVETMPS;
666     PUSHMARK (SP);
667 root 1.6 // we re-bless the reference to get overload and other niceties right
668 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
669    
670     PUTBACK;
671     // G_SCALAR ensures that return value is 1
672     call_sv ((SV *)GvCV (method), G_SCALAR);
673     SPAGAIN;
674    
675     // catch this surprisingly common error
676     if (SvROK (TOPs) && SvRV (TOPs) == sv)
677     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
678    
679     encode_sv (enc, POPs);
680    
681     PUTBACK;
682    
683     FREETMPS; LEAVE;
684     }
685     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
686     {
687     dSP;
688 root 1.6
689 root 1.50 ENTER; SAVETMPS;
690     PUSHMARK (SP);
691 root 1.11 EXTEND (SP, 2);
692     // we re-bless the reference to get overload and other niceties right
693     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
694     PUSHs (sv_cbor);
695 root 1.1
696 root 1.11 PUTBACK;
697     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
698     SPAGAIN;
699 root 1.6
700 root 1.11 // catch this surprisingly common error
701     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
702     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
703 root 1.6
704 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
705 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
706 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
707 root 1.6
708 root 1.65 {
709     int i;
710    
711     for (i = 0; i < count; ++i)
712     encode_sv (enc, SP[i + 1 - count]);
713    
714     SP -= count;
715     }
716 root 1.6
717 root 1.11 PUTBACK;
718 root 1.6
719 root 1.11 FREETMPS; LEAVE;
720 root 1.1 }
721 root 1.11 else
722     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
723     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
724 root 1.1 }
725     else if (svt == SVt_PVHV)
726     encode_hv (enc, (HV *)sv);
727     else if (svt == SVt_PVAV)
728     encode_av (enc, (AV *)sv);
729 root 1.18 else
730 root 1.1 {
731 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
732     encode_sv (enc, sv);
733 root 1.1 }
734     }
735    
736     static void
737     encode_nv (enc_t *enc, SV *sv)
738     {
739     double nv = SvNVX (sv);
740    
741     need (enc, 9);
742    
743 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
744     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
745 root 1.1 //TODO: maybe I32?
746 root 1.5 else if (ecb_expect_false (nv == (float)nv))
747 root 1.66 encode_float32 (enc, nv);
748 root 1.1 else
749 root 1.66 encode_float64 (enc, nv);
750 root 1.1 }
751    
752     static void
753     encode_sv (enc_t *enc, SV *sv)
754     {
755     SvGETMAGIC (sv);
756    
757     if (SvPOKp (sv))
758     {
759     STRLEN len;
760     char *str = SvPV (sv, len);
761 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
762 root 1.1 }
763     else if (SvNOKp (sv))
764     encode_nv (enc, sv);
765     else if (SvIOKp (sv))
766 root 1.67 encode_int (enc, sv);
767 root 1.1 else if (SvROK (sv))
768     encode_rv (enc, SvRV (sv));
769     else if (!SvOK (sv))
770 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
771 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
772 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
773 root 1.1 else
774     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
775     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
776     }
777    
778     static SV *
779     encode_cbor (SV *scalar, CBOR *cbor)
780     {
781 root 1.48 enc_t enc = { 0 };
782 root 1.1
783 root 1.63 enc.cbor = *cbor;
784     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
785     enc.cur = SvPVX (enc.sv);
786     enc.end = SvEND (enc.sv);
787 root 1.1
788     SvPOK_only (enc.sv);
789 root 1.20
790 root 1.32 if (cbor->flags & F_PACK_STRINGS)
791 root 1.20 {
792     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
793     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
794     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
795     }
796    
797 root 1.1 encode_sv (&enc, scalar);
798    
799     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
800     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
801    
802     if (enc.cbor.flags & F_SHRINK)
803     shrink (enc.sv);
804    
805     return enc.sv;
806     }
807    
808     /////////////////////////////////////////////////////////////////////////////
809     // decoder
810    
811     // structure used for decoding CBOR
812     typedef struct
813     {
814     U8 *cur; // current parser pointer
815     U8 *end; // end of input string
816     const char *err; // parse error, if != 0
817     CBOR cbor;
818     U32 depth; // recursion depth
819     U32 maxdepth; // recursion depth limit
820 root 1.19 AV *shareable;
821 root 1.20 AV *stringref;
822 root 1.27 SV *decode_tagged;
823 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
824 root 1.1 } dec_t;
825    
826 root 1.59 // set dec->err to ERRSV
827     ecb_cold static void
828     err_errsv (dec_t *dec)
829     {
830     if (!dec->err)
831     {
832     dec->err_sv = newSVsv (ERRSV);
833    
834     // chop off the trailing \n
835     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
836     *SvEND (dec->err_sv) = 0;
837    
838     dec->err = SvPVutf8_nolen (dec->err_sv);
839     }
840     }
841 root 1.1
842 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
843     ecb_cold static void
844     err_set (dec_t *dec, const char *reason)
845     {
846     if (!dec->err)
847     dec->err = reason;
848     }
849    
850     ecb_cold static void
851     err_unexpected_end (dec_t *dec)
852     {
853     err_set (dec, "unexpected end of CBOR data");
854     }
855    
856     #define ERR_DO(do) SB do; goto fail; SE
857     #define ERR(reason) ERR_DO (err_set (dec, reason))
858     #define ERR_ERRSV ERR_DO (err_errsv (dec))
859    
860 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
861 root 1.1
862 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
863 root 1.1 #define DEC_DEC_DEPTH --dec->depth
864    
865     static UV
866     decode_uint (dec_t *dec)
867     {
868 root 1.35 U8 m = *dec->cur & MINOR_MASK;
869     ++dec->cur;
870    
871     if (ecb_expect_true (m < LENGTH_EXT1))
872     return m;
873 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
874     {
875     WANT (1);
876     dec->cur += 1;
877     return dec->cur[-1];
878     }
879     else if (ecb_expect_true (m == LENGTH_EXT2))
880     {
881     WANT (2);
882     dec->cur += 2;
883     return (((UV)dec->cur[-2]) << 8)
884     | ((UV)dec->cur[-1]);
885     }
886     else if (ecb_expect_true (m == LENGTH_EXT4))
887     {
888     WANT (4);
889     dec->cur += 4;
890     return (((UV)dec->cur[-4]) << 24)
891     | (((UV)dec->cur[-3]) << 16)
892     | (((UV)dec->cur[-2]) << 8)
893     | ((UV)dec->cur[-1]);
894     }
895     else if (ecb_expect_true (m == LENGTH_EXT8))
896 root 1.1 {
897 root 1.36 WANT (8);
898     dec->cur += 8;
899 root 1.34
900 root 1.36 return
901 root 1.34 #if UVSIZE < 8
902 root 1.36 0
903 root 1.34 #else
904 root 1.36 (((UV)dec->cur[-8]) << 56)
905     | (((UV)dec->cur[-7]) << 48)
906     | (((UV)dec->cur[-6]) << 40)
907     | (((UV)dec->cur[-5]) << 32)
908 root 1.34 #endif
909 root 1.36 | (((UV)dec->cur[-4]) << 24)
910     | (((UV)dec->cur[-3]) << 16)
911     | (((UV)dec->cur[-2]) << 8)
912     | ((UV)dec->cur[-1]);
913 root 1.1 }
914 root 1.36 else
915     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
916 root 1.1
917     fail:
918     return 0;
919     }
920    
921     static SV *decode_sv (dec_t *dec);
922    
923     static SV *
924     decode_av (dec_t *dec)
925     {
926     AV *av = newAV ();
927    
928     DEC_INC_DEPTH;
929    
930 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
931 root 1.1 {
932     ++dec->cur;
933    
934     for (;;)
935     {
936     WANT (1);
937    
938 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
939 root 1.1 {
940     ++dec->cur;
941     break;
942     }
943    
944     av_push (av, decode_sv (dec));
945     }
946     }
947     else
948     {
949 root 1.57 UV i, len = decode_uint (dec);
950 root 1.1
951 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
952 root 1.1 av_fill (av, len - 1);
953    
954     for (i = 0; i < len; ++i)
955     AvARRAY (av)[i] = decode_sv (dec);
956     }
957    
958     DEC_DEC_DEPTH;
959     return newRV_noinc ((SV *)av);
960    
961     fail:
962 root 1.63 SvREFCNT_dec_NN (av);
963 root 1.1 DEC_DEC_DEPTH;
964     return &PL_sv_undef;
965     }
966    
967 root 1.16 static void
968     decode_he (dec_t *dec, HV *hv)
969     {
970     // for speed reasons, we specialcase single-string
971 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
972    
973 root 1.23 if (ecb_expect_true (!dec->stringref))
974 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
975 root 1.21 {
976 root 1.56 STRLEN len = decode_uint (dec);
977 root 1.21 char *key = (char *)dec->cur;
978 root 1.16
979 root 1.49 WANT (len);
980 root 1.21 dec->cur += len;
981    
982 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
983 root 1.16
984 root 1.21 return;
985     }
986 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
987 root 1.21 {
988 root 1.56 STRLEN len = decode_uint (dec);
989 root 1.21 char *key = (char *)dec->cur;
990 root 1.16
991 root 1.49 WANT (len);
992 root 1.21 dec->cur += len;
993 root 1.20
994 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
995 root 1.72 if (!cbor_is_utf8_string ((U8 *)key, len))
996 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
997 root 1.16
998 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
999 root 1.16
1000 root 1.21 return;
1001     }
1002 root 1.20
1003 root 1.21 SV *k = decode_sv (dec);
1004     SV *v = decode_sv (dec);
1005 root 1.16
1006 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
1007     // methods, and this is hopefully the only place where it can happen,
1008     // so if there is a chance of an exception, take the very slow path.
1009     // since catching exceptions is "undocumented/internal/forbidden" by
1010     // the new p5p powers, we need to call out to a perl function :/
1011     if (ecb_expect_false (SvAMAGIC (k)))
1012     {
1013     dSP;
1014    
1015     ENTER; SAVETMPS;
1016     PUSHMARK (SP);
1017     EXTEND (SP, 3);
1018     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
1019     PUSHs (sv_2mortal (k));
1020     PUSHs (sv_2mortal (v));
1021    
1022     PUTBACK;
1023     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
1024     SPAGAIN;
1025    
1026     FREETMPS; LEAVE;
1027    
1028     if (SvTRUE (ERRSV))
1029     ERR_ERRSV;
1030    
1031     return;
1032     }
1033    
1034 root 1.21 hv_store_ent (hv, k, v, 0);
1035 root 1.63 SvREFCNT_dec_NN (k);
1036 root 1.38
1037     fail:
1038     ;
1039 root 1.16 }
1040    
1041 root 1.1 static SV *
1042     decode_hv (dec_t *dec)
1043     {
1044     HV *hv = newHV ();
1045    
1046     DEC_INC_DEPTH;
1047    
1048 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1049 root 1.1 {
1050     ++dec->cur;
1051    
1052     for (;;)
1053     {
1054     WANT (1);
1055    
1056 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1057 root 1.1 {
1058     ++dec->cur;
1059     break;
1060     }
1061    
1062 root 1.16 decode_he (dec, hv);
1063 root 1.1 }
1064     }
1065     else
1066     {
1067 root 1.57 UV pairs = decode_uint (dec);
1068    
1069     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1070 root 1.1
1071 root 1.16 while (pairs--)
1072     decode_he (dec, hv);
1073 root 1.1 }
1074    
1075     DEC_DEC_DEPTH;
1076     return newRV_noinc ((SV *)hv);
1077    
1078     fail:
1079 root 1.63 SvREFCNT_dec_NN (hv);
1080 root 1.1 DEC_DEC_DEPTH;
1081     return &PL_sv_undef;
1082     }
1083    
1084     static SV *
1085     decode_str (dec_t *dec, int utf8)
1086     {
1087 root 1.6 SV *sv = 0;
1088 root 1.1
1089 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1090 root 1.1 {
1091 root 1.33 // indefinite length strings
1092 root 1.1 ++dec->cur;
1093    
1094 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1095 root 1.33
1096 root 1.1 sv = newSVpvn ("", 0);
1097    
1098     for (;;)
1099     {
1100     WANT (1);
1101    
1102 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1103     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1104 root 1.33 {
1105     ++dec->cur;
1106     break;
1107     }
1108     else
1109     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1110    
1111     STRLEN len = decode_uint (dec);
1112 root 1.1
1113 root 1.33 WANT (len);
1114     sv_catpvn (sv, dec->cur, len);
1115     dec->cur += len;
1116 root 1.1 }
1117     }
1118     else
1119     {
1120     STRLEN len = decode_uint (dec);
1121    
1122     WANT (len);
1123     sv = newSVpvn (dec->cur, len);
1124     dec->cur += len;
1125 root 1.25
1126     if (ecb_expect_false (dec->stringref)
1127     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1128     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1129 root 1.1 }
1130    
1131     if (utf8)
1132 root 1.38 {
1133     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1134 root 1.72 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
1135 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1136    
1137     SvUTF8_on (sv);
1138     }
1139 root 1.1
1140     return sv;
1141    
1142     fail:
1143 root 1.6 SvREFCNT_dec (sv);
1144 root 1.1 return &PL_sv_undef;
1145     }
1146    
1147     static SV *
1148 root 1.3 decode_tagged (dec_t *dec)
1149     {
1150 root 1.19 SV *sv = 0;
1151 root 1.3 UV tag = decode_uint (dec);
1152 root 1.19
1153     WANT (1);
1154 root 1.3
1155 root 1.18 switch (tag)
1156     {
1157     case CBOR_TAG_MAGIC:
1158 root 1.20 sv = decode_sv (dec);
1159     break;
1160 root 1.18
1161     case CBOR_TAG_INDIRECTION:
1162 root 1.20 sv = newRV_noinc (decode_sv (dec));
1163     break;
1164    
1165     case CBOR_TAG_STRINGREF_NAMESPACE:
1166     {
1167 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1168 root 1.55 // erase mortalised caches, e.g. "shareable"
1169     ENTER;
1170 root 1.20
1171     SAVESPTR (dec->stringref);
1172     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1173    
1174     sv = decode_sv (dec);
1175    
1176 root 1.55 LEAVE;
1177 root 1.20 }
1178     break;
1179    
1180     case CBOR_TAG_STRINGREF:
1181     {
1182 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1183 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1184    
1185     UV idx = decode_uint (dec);
1186    
1187 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1188 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1189    
1190     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1191     }
1192     break;
1193 root 1.11
1194 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1195     {
1196     if (ecb_expect_false (!dec->shareable))
1197     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1198    
1199 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1200     {
1201 root 1.77 // if cycles are allowed, then we store an AV as value
1202     // while it is being decoded, and gather unresolved
1203     // references in it, to be re4solved after decoding.
1204     int idx, i;
1205     AV *av = newAV ();
1206     av_push (dec->shareable, SvREFCNT_inc_NN ((SV *)av));
1207     idx = AvFILLp (dec->shareable);
1208    
1209     sv = decode_sv (dec);
1210 root 1.18
1211 root 1.77 // the AV now contains \undef for all unresolved references,
1212     // so we fix them up here.
1213     for (i = 0; i <= AvFILLp (av); ++i)
1214     SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv)));
1215    
1216     // now replace the AV by a reference to the completed value
1217     SvREFCNT_dec_NN (av);
1218     AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1219 root 1.37 }
1220     else
1221     {
1222     av_push (dec->shareable, &PL_sv_undef);
1223     int idx = AvFILLp (dec->shareable);
1224     sv = decode_sv (dec);
1225     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1226     }
1227 root 1.19 }
1228 root 1.20 break;
1229 root 1.18
1230     case CBOR_TAG_VALUE_SHAREDREF:
1231 root 1.17 {
1232 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1233 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1234 root 1.18
1235 root 1.19 UV idx = decode_uint (dec);
1236    
1237 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1238 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1239    
1240 root 1.77 sv = AvARRAY (dec->shareable)[idx];
1241 root 1.37
1242 root 1.77 // reference to cycle, we create a new \undef and use that, and also
1243     // registerr it in the AV for later fixing
1244     if (SvTYPE (sv) == SVt_PVAV)
1245     {
1246     AV *av = (AV *)sv;
1247     sv = newRV_noinc (&PL_sv_undef);
1248     av_push (av, sv);
1249     }
1250     else if (sv == &PL_sv_undef) // not yet decoded, but cycles not allowed
1251 root 1.37 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1252 root 1.77 else // we decoded the object earlier, no cycle
1253     sv = newSVsv (sv);
1254 root 1.17 }
1255 root 1.20 break;
1256 root 1.17
1257 root 1.18 case CBOR_TAG_PERL_OBJECT:
1258     {
1259 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1260     goto filter;
1261    
1262 root 1.19 sv = decode_sv (dec);
1263    
1264 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1265     ERR ("corrupted CBOR data (non-array perl object)");
1266    
1267     AV *av = (AV *)SvRV (sv);
1268     int len = av_len (av) + 1;
1269     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1270    
1271     if (!stash)
1272     ERR ("cannot decode perl-object (package does not exist)");
1273    
1274     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1275    
1276     if (!method)
1277     ERR ("cannot decode perl-object (package does not have a THAW method)");
1278    
1279     dSP;
1280    
1281 root 1.50 ENTER; SAVETMPS;
1282     PUSHMARK (SP);
1283 root 1.18 EXTEND (SP, len + 1);
1284     // we re-bless the reference to get overload and other niceties right
1285     PUSHs (*av_fetch (av, 0, 1));
1286     PUSHs (sv_cbor);
1287    
1288     int i;
1289    
1290     for (i = 1; i < len; ++i)
1291     PUSHs (*av_fetch (av, i, 1));
1292    
1293     PUTBACK;
1294     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1295     SPAGAIN;
1296    
1297     if (SvTRUE (ERRSV))
1298     {
1299     FREETMPS; LEAVE;
1300 root 1.59 ERR_ERRSV;
1301 root 1.18 }
1302    
1303 root 1.63 SvREFCNT_dec_NN (sv);
1304 root 1.18 sv = SvREFCNT_inc (POPs);
1305 root 1.11
1306 root 1.18 PUTBACK;
1307 root 1.11
1308 root 1.18 FREETMPS; LEAVE;
1309     }
1310 root 1.20 break;
1311 root 1.9
1312 root 1.18 default:
1313 root 1.60 filter:
1314 root 1.18 {
1315 root 1.58 SV *tag_sv = newSVuv (tag);
1316    
1317 root 1.19 sv = decode_sv (dec);
1318    
1319 root 1.27 dSP;
1320 root 1.50 ENTER; SAVETMPS;
1321     PUSHMARK (SP);
1322 root 1.27 EXTEND (SP, 2);
1323 root 1.58 PUSHs (tag_sv);
1324 root 1.27 PUSHs (sv);
1325    
1326     PUTBACK;
1327     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1328     SPAGAIN;
1329    
1330     if (SvTRUE (ERRSV))
1331     {
1332 root 1.63 SvREFCNT_dec_NN (tag_sv);
1333 root 1.27 FREETMPS; LEAVE;
1334 root 1.59 ERR_ERRSV;
1335 root 1.27 }
1336    
1337     if (count)
1338     {
1339 root 1.63 SvREFCNT_dec_NN (tag_sv);
1340     SvREFCNT_dec_NN (sv);
1341 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1342     SP -= count;
1343 root 1.27 }
1344     else
1345     {
1346     AV *av = newAV ();
1347 root 1.58 av_push (av, tag_sv);
1348 root 1.27 av_push (av, sv);
1349    
1350     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1351     ? cbor_tagged_stash
1352     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1353     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1354     }
1355 root 1.7
1356 root 1.27 PUTBACK;
1357    
1358     FREETMPS; LEAVE;
1359 root 1.18 }
1360 root 1.20 break;
1361 root 1.11 }
1362 root 1.9
1363 root 1.20 return sv;
1364    
1365 root 1.9 fail:
1366     SvREFCNT_dec (sv);
1367     return &PL_sv_undef;
1368 root 1.3 }
1369    
1370     static SV *
1371 root 1.1 decode_sv (dec_t *dec)
1372     {
1373     WANT (1);
1374    
1375 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1376 root 1.1 {
1377 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1378     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1379     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1380     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1381     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1382     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1383     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1384    
1385     case MAJOR_MISC >> MAJOR_SHIFT:
1386     switch (*dec->cur++ & MINOR_MASK)
1387 root 1.1 {
1388 root 1.35 case SIMPLE_FALSE:
1389 root 1.1 #if CBOR_SLOW
1390 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1391 root 1.1 #endif
1392 root 1.10 return newSVsv (types_false);
1393 root 1.35 case SIMPLE_TRUE:
1394 root 1.1 #if CBOR_SLOW
1395 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1396 root 1.1 #endif
1397 root 1.10 return newSVsv (types_true);
1398 root 1.35 case SIMPLE_NULL:
1399 root 1.1 return newSVsv (&PL_sv_undef);
1400 root 1.35 case SIMPLE_UNDEF:
1401 root 1.10 #if CBOR_SLOW
1402     types_error = get_bool ("Types::Serialiser::error");
1403     #endif
1404     return newSVsv (types_error);
1405 root 1.1
1406 root 1.35 case MISC_FLOAT16:
1407 root 1.2 {
1408     WANT (2);
1409    
1410     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1411     dec->cur += 2;
1412    
1413     return newSVnv (ecb_binary16_to_float (fp));
1414     }
1415 root 1.1
1416 root 1.35 case MISC_FLOAT32:
1417 root 1.1 {
1418     uint32_t fp;
1419     WANT (4);
1420     memcpy (&fp, dec->cur, 4);
1421     dec->cur += 4;
1422    
1423     if (!ecb_big_endian ())
1424     fp = ecb_bswap32 (fp);
1425    
1426     return newSVnv (ecb_binary32_to_float (fp));
1427     }
1428    
1429 root 1.35 case MISC_FLOAT64:
1430 root 1.1 {
1431     uint64_t fp;
1432     WANT (8);
1433     memcpy (&fp, dec->cur, 8);
1434     dec->cur += 8;
1435    
1436     if (!ecb_big_endian ())
1437     fp = ecb_bswap64 (fp);
1438    
1439     return newSVnv (ecb_binary64_to_double (fp));
1440     }
1441    
1442 root 1.35 // 0..19 unassigned simple
1443 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1444     // 28-30 unassigned misc
1445     // 31 break code
1446 root 1.1 default:
1447 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1448 root 1.1 }
1449    
1450     break;
1451     }
1452    
1453     fail:
1454     return &PL_sv_undef;
1455     }
1456    
1457     static SV *
1458     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1459     {
1460 root 1.48 dec_t dec = { 0 };
1461 root 1.1 SV *sv;
1462 root 1.16 STRLEN len;
1463     char *data = SvPVbyte (string, len);
1464 root 1.1
1465 root 1.16 if (len > cbor->max_size && cbor->max_size)
1466     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1467     (unsigned long)len, (unsigned long)cbor->max_size);
1468 root 1.1
1469     dec.cbor = *cbor;
1470 root 1.16 dec.cur = (U8 *)data;
1471     dec.end = (U8 *)data + len;
1472 root 1.1
1473     sv = decode_sv (&dec);
1474    
1475     if (offset_return)
1476     *offset_return = dec.cur;
1477    
1478     if (!(offset_return || !sv))
1479 root 1.2 if (dec.cur != dec.end && !dec.err)
1480     dec.err = "garbage after CBOR object";
1481    
1482     if (dec.err)
1483 root 1.1 {
1484 root 1.39 if (dec.shareable)
1485     {
1486 root 1.58 // need to break cyclic links, which would all be in shareable
1487 root 1.39 int i;
1488     SV **svp;
1489    
1490     for (i = av_len (dec.shareable) + 1; i--; )
1491     if ((svp = av_fetch (dec.shareable, i, 0)))
1492     sv_setsv (*svp, &PL_sv_undef);
1493     }
1494    
1495 root 1.63 SvREFCNT_dec_NN (sv);
1496 root 1.59
1497     if (dec.err_sv)
1498     sv_2mortal (dec.err_sv);
1499    
1500 root 1.74 croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur);
1501 root 1.1 }
1502    
1503     sv = sv_2mortal (sv);
1504    
1505     return sv;
1506     }
1507    
1508     /////////////////////////////////////////////////////////////////////////////
1509 root 1.40 // incremental parser
1510    
1511     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1512    
1513     // returns 0 for notyet, 1 for success or error
1514     static int
1515     incr_parse (CBOR *self, SV *cborstr)
1516     {
1517     STRLEN cur;
1518     SvPV (cborstr, cur);
1519    
1520     while (ecb_expect_true (self->incr_need <= cur))
1521     {
1522     // table of integer count bytes
1523     static I8 incr_len[MINOR_MASK + 1] = {
1524     0, 0, 0, 0, 0, 0, 0, 0,
1525     0, 0, 0, 0, 0, 0, 0, 0,
1526     0, 0, 0, 0, 0, 0, 0, 0,
1527     1, 2, 4, 8,-1,-1,-1,-2
1528     };
1529    
1530     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1531     U8 m = *p & MINOR_MASK;
1532     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1533     I8 ilen = incr_len[m];
1534    
1535     self->incr_need = self->incr_pos + 1;
1536    
1537     if (ecb_expect_false (ilen < 0))
1538     {
1539     if (m != MINOR_INDEF)
1540     return 1; // error
1541    
1542     if (*p == (MAJOR_MISC | MINOR_INDEF))
1543     {
1544     if (count >= 0)
1545     return 1; // error
1546    
1547     count = 1;
1548     }
1549     else
1550     {
1551     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1552     count = -1;
1553     }
1554     }
1555     else
1556     {
1557     self->incr_need += ilen;
1558     if (ecb_expect_false (self->incr_need > cur))
1559     return 0;
1560    
1561     int major = *p >> MAJOR_SHIFT;
1562    
1563     switch (major)
1564     {
1565 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1566     ++count; // tags merely prefix another value
1567     break;
1568    
1569 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1570     case MAJOR_TEXT >> MAJOR_SHIFT:
1571     case MAJOR_ARRAY >> MAJOR_SHIFT:
1572     case MAJOR_MAP >> MAJOR_SHIFT:
1573     {
1574     UV len;
1575    
1576     if (ecb_expect_false (ilen))
1577     {
1578     len = 0;
1579    
1580     do {
1581     len = (len << 8) | *++p;
1582     } while (--ilen);
1583     }
1584     else
1585     len = m;
1586    
1587     switch (major)
1588     {
1589     case MAJOR_BYTES >> MAJOR_SHIFT:
1590     case MAJOR_TEXT >> MAJOR_SHIFT:
1591     self->incr_need += len;
1592     if (ecb_expect_false (self->incr_need > cur))
1593     return 0;
1594    
1595     break;
1596    
1597     case MAJOR_MAP >> MAJOR_SHIFT:
1598     len <<= 1;
1599 root 1.71 /* FALLTHROUGH */
1600 root 1.40 case MAJOR_ARRAY >> MAJOR_SHIFT:
1601     if (len)
1602     {
1603     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1604     count = len + 1;
1605     }
1606     break;
1607     }
1608     }
1609     }
1610     }
1611    
1612     self->incr_pos = self->incr_need;
1613    
1614     if (count > 0)
1615     {
1616     while (!--count)
1617     {
1618     if (!AvFILLp (self->incr_count))
1619     return 1; // done
1620    
1621     SvREFCNT_dec_NN (av_pop (self->incr_count));
1622     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1623     }
1624    
1625     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1626     }
1627     }
1628    
1629     return 0;
1630     }
1631    
1632    
1633     /////////////////////////////////////////////////////////////////////////////
1634 root 1.1 // XS interface functions
1635    
1636     MODULE = CBOR::XS PACKAGE = CBOR::XS
1637    
1638     BOOT:
1639     {
1640     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1641 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1642 root 1.1
1643 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1644     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1645    
1646     types_true = get_bool ("Types::Serialiser::true" );
1647     types_false = get_bool ("Types::Serialiser::false");
1648     types_error = get_bool ("Types::Serialiser::error");
1649 root 1.11
1650 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1651    
1652 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1653     SvREADONLY_on (sv_cbor);
1654 root 1.56
1655     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1656 root 1.1 }
1657    
1658     PROTOTYPES: DISABLE
1659    
1660     void CLONE (...)
1661     CODE:
1662 root 1.10 cbor_stash = 0;
1663     cbor_tagged_stash = 0;
1664     types_error_stash = 0;
1665     types_boolean_stash = 0;
1666 root 1.1
1667     void new (char *klass)
1668     PPCODE:
1669     {
1670     SV *pv = NEWSV (0, sizeof (CBOR));
1671     SvPOK_only (pv);
1672     cbor_init ((CBOR *)SvPVX (pv));
1673     XPUSHs (sv_2mortal (sv_bless (
1674     newRV_noinc (pv),
1675     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1676     )));
1677     }
1678    
1679     void shrink (CBOR *self, int enable = 1)
1680     ALIAS:
1681     shrink = F_SHRINK
1682     allow_unknown = F_ALLOW_UNKNOWN
1683 root 1.18 allow_sharing = F_ALLOW_SHARING
1684 root 1.37 allow_cycles = F_ALLOW_CYCLES
1685 root 1.60 forbid_objects = F_FORBID_OBJECTS
1686 root 1.32 pack_strings = F_PACK_STRINGS
1687 root 1.54 text_keys = F_TEXT_KEYS
1688     text_strings = F_TEXT_STRINGS
1689 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1690 root 1.1 PPCODE:
1691     {
1692     if (enable)
1693     self->flags |= ix;
1694     else
1695     self->flags &= ~ix;
1696    
1697     XPUSHs (ST (0));
1698     }
1699    
1700     void get_shrink (CBOR *self)
1701     ALIAS:
1702     get_shrink = F_SHRINK
1703     get_allow_unknown = F_ALLOW_UNKNOWN
1704 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1705 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1706 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1707 root 1.32 get_pack_strings = F_PACK_STRINGS
1708 root 1.54 get_text_keys = F_TEXT_KEYS
1709     get_text_strings = F_TEXT_STRINGS
1710 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1711 root 1.1 PPCODE:
1712     XPUSHs (boolSV (self->flags & ix));
1713    
1714     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1715     PPCODE:
1716     self->max_depth = max_depth;
1717     XPUSHs (ST (0));
1718    
1719     U32 get_max_depth (CBOR *self)
1720     CODE:
1721     RETVAL = self->max_depth;
1722     OUTPUT:
1723     RETVAL
1724    
1725     void max_size (CBOR *self, U32 max_size = 0)
1726     PPCODE:
1727     self->max_size = max_size;
1728     XPUSHs (ST (0));
1729    
1730     int get_max_size (CBOR *self)
1731     CODE:
1732     RETVAL = self->max_size;
1733     OUTPUT:
1734     RETVAL
1735    
1736 root 1.27 void filter (CBOR *self, SV *filter = 0)
1737     PPCODE:
1738     SvREFCNT_dec (self->filter);
1739     self->filter = filter ? newSVsv (filter) : filter;
1740     XPUSHs (ST (0));
1741    
1742     SV *get_filter (CBOR *self)
1743     CODE:
1744     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1745     OUTPUT:
1746     RETVAL
1747    
1748 root 1.1 void encode (CBOR *self, SV *scalar)
1749     PPCODE:
1750     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1751     XPUSHs (scalar);
1752    
1753     void decode (CBOR *self, SV *cborstr)
1754     PPCODE:
1755     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1756     XPUSHs (cborstr);
1757    
1758     void decode_prefix (CBOR *self, SV *cborstr)
1759     PPCODE:
1760     {
1761     SV *sv;
1762     char *offset;
1763     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1764     EXTEND (SP, 2);
1765     PUSHs (sv);
1766     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1767     }
1768    
1769 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1770 root 1.42 ALIAS:
1771     incr_parse_multiple = 1
1772 root 1.40 PPCODE:
1773     {
1774     if (SvUTF8 (cborstr))
1775     sv_utf8_downgrade (cborstr, 0);
1776    
1777     if (!self->incr_count)
1778     {
1779     self->incr_count = newAV ();
1780     self->incr_pos = 0;
1781     self->incr_need = 1;
1782    
1783     av_push (self->incr_count, newSViv (1));
1784     }
1785    
1786 root 1.41 do
1787 root 1.40 {
1788     if (!incr_parse (self, cborstr))
1789     {
1790     if (self->incr_need > self->max_size && self->max_size)
1791     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1792     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1793    
1794     break;
1795     }
1796    
1797 root 1.41 SV *sv;
1798     char *offset;
1799 root 1.40
1800 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1801     XPUSHs (sv);
1802 root 1.40
1803 root 1.41 sv_chop (cborstr, offset);
1804 root 1.40
1805 root 1.41 av_clear (self->incr_count);
1806     av_push (self->incr_count, newSViv (1));
1807 root 1.40
1808 root 1.41 self->incr_pos = 0;
1809     self->incr_need = self->incr_pos + 1;
1810 root 1.40 }
1811 root 1.42 while (ix);
1812 root 1.40 }
1813    
1814     void incr_reset (CBOR *self)
1815     CODE:
1816     {
1817     SvREFCNT_dec (self->incr_count);
1818     self->incr_count = 0;
1819     }
1820    
1821 root 1.27 void DESTROY (CBOR *self)
1822     PPCODE:
1823     cbor_free (self);
1824    
1825 root 1.1 PROTOTYPES: ENABLE
1826    
1827     void encode_cbor (SV *scalar)
1828 root 1.36 ALIAS:
1829     encode_cbor = 0
1830     encode_cbor_sharing = F_ALLOW_SHARING
1831 root 1.1 PPCODE:
1832     {
1833     CBOR cbor;
1834     cbor_init (&cbor);
1835 root 1.36 cbor.flags |= ix;
1836 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1837     XPUSHs (scalar);
1838     }
1839    
1840     void decode_cbor (SV *cborstr)
1841     PPCODE:
1842     {
1843     CBOR cbor;
1844     cbor_init (&cbor);
1845     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1846     XPUSHs (cborstr);
1847     }
1848    
1849 root 1.64 #ifdef __AFL_COMPILER
1850    
1851     void
1852     afl_init ()
1853     CODE:
1854     __AFL_INIT ();
1855    
1856     int
1857     afl_loop (unsigned int count = 10000)
1858     CODE:
1859     RETVAL = __AFL_LOOP (count);
1860     OUTPUT:
1861     RETVAL
1862    
1863     #endif
1864