ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.80
Committed: Fri Sep 8 20:03:06 2023 UTC (8 months, 1 week ago) by root
Branch: MAIN
CVS Tags: rel-1_87, HEAD
Changes since 1.79: +38 -30 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.72 // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34     #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
35    
36 root 1.35 // known major and minor types
37     enum cbor_type
38     {
39     MAJOR_SHIFT = 5,
40     MINOR_MASK = 0x1f,
41    
42     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
43     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
44     MAJOR_BYTES = 2 << MAJOR_SHIFT,
45     MAJOR_TEXT = 3 << MAJOR_SHIFT,
46     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
47     MAJOR_MAP = 5 << MAJOR_SHIFT,
48     MAJOR_TAG = 6 << MAJOR_SHIFT,
49     MAJOR_MISC = 7 << MAJOR_SHIFT,
50    
51     // INT/STRING/ARRAY/MAP subtypes
52     LENGTH_EXT1 = 24,
53     LENGTH_EXT2 = 25,
54     LENGTH_EXT4 = 26,
55     LENGTH_EXT8 = 27,
56    
57     // SIMPLE types (effectively MISC subtypes)
58     SIMPLE_FALSE = 20,
59     SIMPLE_TRUE = 21,
60     SIMPLE_NULL = 22,
61     SIMPLE_UNDEF = 23,
62    
63     // MISC subtype (unused)
64     MISC_EXT1 = 24,
65     MISC_FLOAT16 = 25,
66     MISC_FLOAT32 = 26,
67     MISC_FLOAT64 = 27,
68    
69     // BYTES/TEXT/ARRAY/MAP
70     MINOR_INDEF = 31,
71     };
72    
73 root 1.9 // known tags
74 root 1.8 enum cbor_tag
75     {
76 root 1.35 // extensions
77     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
78     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
79     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
80     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
81     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
82     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
83     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
84    
85     // rfc7049
86     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
87     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
88     CBOR_TAG_POS_BIGNUM = 2, // byte string
89     CBOR_TAG_NEG_BIGNUM = 3, // byte string
90     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
91     CBOR_TAG_BIGFLOAT = 5, // array
92    
93     CBOR_TAG_CONV_B64U = 21, // base64url, any
94     CBOR_TAG_CONV_B64 = 22, // base64, any
95     CBOR_TAG_CONV_HEX = 23, // base16, any
96     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
97    
98     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
99     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
100     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
101     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
102     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
103 root 1.8
104 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
105 root 1.8 };
106    
107 root 1.66 // known forced types, also hardcoded in CBOR.pm
108     enum
109     {
110     AS_CBOR = 0,
111 root 1.68 AS_INT = 1,
112     AS_BYTES = 2,
113     AS_TEXT = 3,
114 root 1.67 AS_FLOAT16 = 4,
115     AS_FLOAT32 = 5,
116     AS_FLOAT64 = 6,
117 root 1.70 AS_MAP = 7,
118 root 1.67 // possibly future enhancements: (generic) float, (generic) string
119 root 1.66 };
120    
121 root 1.80 #define F_SHRINK 0x00000001UL
122     #define F_ALLOW_UNKNOWN 0x00000002UL
123     #define F_ALLOW_SHARING 0x00000004UL
124     #define F_ALLOW_CYCLES 0x00000008UL
125     #define F_ALLOW_WEAK_CYCLES 0x00000010UL
126     #define F_FORBID_OBJECTS 0x00000020UL
127     #define F_PACK_STRINGS 0x00000040UL
128     #define F_TEXT_KEYS 0x00000080UL
129     #define F_TEXT_STRINGS 0x00000100UL
130     #define F_VALIDATE_UTF8 0x00000200UL
131 root 1.1
132     #define INIT_SIZE 32 // initial scalar size to be allocated
133    
134     #define SB do {
135     #define SE } while (0)
136    
137     #define IN_RANGE_INC(type,val,beg,end) \
138     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
139     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
140    
141     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
142    
143     #ifdef USE_ITHREADS
144     # define CBOR_SLOW 1
145     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
146     #else
147     # define CBOR_SLOW 0
148     # define CBOR_STASH cbor_stash
149     #endif
150    
151 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
152 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
153 root 1.1
154     typedef struct {
155     U32 flags;
156     U32 max_depth;
157     STRLEN max_size;
158 root 1.27 SV *filter;
159 root 1.40
160     // for the incremental parser
161     STRLEN incr_pos; // the current offset into the text
162     STRLEN incr_need; // minimum bytes needed to decode
163     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
164 root 1.1 } CBOR;
165    
166 root 1.5 ecb_inline void
167 root 1.1 cbor_init (CBOR *cbor)
168     {
169     Zero (cbor, 1, CBOR);
170     cbor->max_depth = 512;
171     }
172    
173 root 1.27 ecb_inline void
174     cbor_free (CBOR *cbor)
175     {
176     SvREFCNT_dec (cbor->filter);
177 root 1.40 SvREFCNT_dec (cbor->incr_count);
178 root 1.27 }
179    
180 root 1.1 /////////////////////////////////////////////////////////////////////////////
181     // utility functions
182    
183 root 1.5 ecb_inline SV *
184 root 1.1 get_bool (const char *name)
185     {
186     SV *sv = get_sv (name, 1);
187    
188     SvREADONLY_on (sv);
189     SvREADONLY_on (SvRV (sv));
190    
191     return sv;
192     }
193    
194 root 1.5 ecb_inline void
195 root 1.1 shrink (SV *sv)
196     {
197     sv_utf8_downgrade (sv, 1);
198    
199     if (SvLEN (sv) > SvCUR (sv) + 1)
200     {
201     #ifdef SvPV_shrink_to_cur
202     SvPV_shrink_to_cur (sv);
203     #elif defined (SvPV_renew)
204     SvPV_renew (sv, SvCUR (sv) + 1);
205     #endif
206     }
207     }
208    
209 root 1.21 // minimum length of a string to be registered for stringref
210 root 1.71 ecb_inline STRLEN
211 root 1.21 minimum_string_length (UV idx)
212     {
213 root 1.61 return idx <= 23 ? 3
214     : idx <= 0xffU ? 4
215     : idx <= 0xffffU ? 5
216     : idx <= 0xffffffffU ? 7
217     : 11;
218 root 1.21 }
219    
220 root 1.1 /////////////////////////////////////////////////////////////////////////////
221     // encoder
222    
223     // structure used for encoding CBOR
224     typedef struct
225     {
226     char *cur; // SvPVX (sv) + current output position
227     char *end; // SvEND (sv)
228     SV *sv; // result scalar
229     CBOR cbor;
230     U32 depth; // recursion level
231 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
232     UV stringref_idx;
233 root 1.19 HV *shareable; // ptr => index, or 0
234     UV shareable_idx;
235 root 1.1 } enc_t;
236    
237 root 1.5 ecb_inline void
238 root 1.1 need (enc_t *enc, STRLEN len)
239     {
240 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
241 root 1.1 {
242     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
243     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
244     enc->cur = SvPVX (enc->sv) + cur;
245     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
246     }
247     }
248    
249 root 1.70 static void encode_sv (enc_t *enc, SV *sv);
250    
251 root 1.5 ecb_inline void
252 root 1.1 encode_ch (enc_t *enc, char ch)
253     {
254     need (enc, 1);
255     *enc->cur++ = ch;
256     }
257    
258 root 1.66 // used for tags, intregers, element counts and so on
259 root 1.1 static void
260     encode_uint (enc_t *enc, int major, UV len)
261     {
262     need (enc, 9);
263    
264 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
265 root 1.1 *enc->cur++ = major | len;
266 root 1.36 else if (ecb_expect_true (len <= 0xffU))
267 root 1.1 {
268 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
269 root 1.1 *enc->cur++ = len;
270     }
271 root 1.36 else if (len <= 0xffffU)
272 root 1.1 {
273 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
274 root 1.1 *enc->cur++ = len >> 8;
275     *enc->cur++ = len;
276     }
277 root 1.36 else if (len <= 0xffffffffU)
278 root 1.1 {
279 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
280 root 1.1 *enc->cur++ = len >> 24;
281     *enc->cur++ = len >> 16;
282     *enc->cur++ = len >> 8;
283     *enc->cur++ = len;
284     }
285 root 1.4 else
286 root 1.1 {
287 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
288 root 1.1 *enc->cur++ = len >> 56;
289     *enc->cur++ = len >> 48;
290     *enc->cur++ = len >> 40;
291     *enc->cur++ = len >> 32;
292     *enc->cur++ = len >> 24;
293     *enc->cur++ = len >> 16;
294     *enc->cur++ = len >> 8;
295     *enc->cur++ = len;
296     }
297     }
298    
299 root 1.66 // encodes a perl value into a CBOR integer
300 root 1.67 ecb_inline void
301 root 1.66 encode_int (enc_t *enc, SV *sv)
302     {
303     if (SvIsUV (sv))
304     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
305     else if (SvIVX (sv) >= 0)
306     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
307     else
308     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
309     }
310    
311 root 1.21 ecb_inline void
312     encode_tag (enc_t *enc, UV tag)
313     {
314 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
315 root 1.21 }
316    
317 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
318     ecb_noinline static void
319     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
320     {
321     STRLEN ulen = len;
322     U8 *p, *pend = (U8 *)str + len;
323    
324     for (p = (U8 *)str; p < pend; ++p)
325     ulen += *p >> 7; // count set high bits
326    
327     encode_uint (enc, MAJOR_TEXT, ulen);
328    
329     need (enc, ulen);
330     for (p = (U8 *)str; p < pend; ++p)
331     if (*p < 0x80)
332     *enc->cur++ = *p;
333     else
334     {
335     *enc->cur++ = 0xc0 + (*p >> 6);
336     *enc->cur++ = 0x80 + (*p & 63);
337     }
338     }
339    
340 root 1.30 ecb_inline void
341 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
342 root 1.30 {
343 root 1.53 if (ecb_expect_false (upgrade_utf8))
344 root 1.51 if (!utf8)
345     {
346 root 1.53 encode_str_utf8 (enc, utf8, str, len);
347 root 1.51 return;
348     }
349    
350 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
351 root 1.30 need (enc, len);
352     memcpy (enc->cur, str, len);
353     enc->cur += len;
354     }
355    
356 root 1.54 ecb_inline void
357 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
358 root 1.1 {
359 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
360 root 1.21 {
361 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
362 root 1.21
363     if (SvOK (*svp))
364     {
365     // already registered, use stringref
366     encode_tag (enc, CBOR_TAG_STRINGREF);
367 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
368 root 1.21 return;
369     }
370     else if (len >= minimum_string_length (enc->stringref_idx))
371     {
372     // register only
373     sv_setuv (*svp, enc->stringref_idx);
374     ++enc->stringref_idx;
375     }
376     }
377    
378 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
379 root 1.1 }
380    
381 root 1.66 ecb_inline void
382     encode_float16 (enc_t *enc, NV nv)
383     {
384     need (enc, 1+2);
385    
386     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
387    
388     uint16_t fp = ecb_float_to_binary16 (nv);
389    
390     if (!ecb_big_endian ())
391     fp = ecb_bswap16 (fp);
392    
393     memcpy (enc->cur, &fp, 2);
394     enc->cur += 2;
395     }
396    
397     ecb_inline void
398     encode_float32 (enc_t *enc, NV nv)
399     {
400     need (enc, 1+4);
401    
402     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
403    
404     uint32_t fp = ecb_float_to_binary32 (nv);
405    
406     if (!ecb_big_endian ())
407     fp = ecb_bswap32 (fp);
408    
409     memcpy (enc->cur, &fp, 4);
410     enc->cur += 4;
411     }
412    
413     ecb_inline void
414     encode_float64 (enc_t *enc, NV nv)
415     {
416     need (enc, 1+8);
417    
418     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
419    
420     uint64_t fp = ecb_double_to_binary64 (nv);
421    
422     if (!ecb_big_endian ())
423     fp = ecb_bswap64 (fp);
424    
425     memcpy (enc->cur, &fp, 8);
426     enc->cur += 8;
427     }
428    
429     ecb_inline void
430 root 1.69 encode_bool (enc_t *enc, int istrue)
431     {
432     encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
433     }
434    
435 root 1.70 // encodes an arrayref containing key-value pairs as CBOR map
436     ecb_inline void
437     encode_array_as_map (enc_t *enc, SV *sv)
438     {
439     if (enc->depth >= enc->cbor.max_depth)
440     croak (ERR_NESTING_EXCEEDED);
441    
442     ++enc->depth;
443    
444     // as_map does error checking for us, but we re-check in case
445     // things have changed.
446    
447     if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
448     croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
449    
450     AV *av = (AV *)SvRV (sv);
451     int i, len = av_len (av);
452    
453     if (!(len & 1))
454     croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
455    
456     encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
457    
458     for (i = 0; i <= len; ++i)
459     {
460     SV **svp = av_fetch (av, i, 0);
461     encode_sv (enc, svp ? *svp : &PL_sv_undef);
462     }
463    
464     --enc->depth;
465     }
466    
467 root 1.69 ecb_inline void
468 root 1.66 encode_forced (enc_t *enc, UV type, SV *sv)
469     {
470     switch (type)
471     {
472     case AS_CBOR:
473     {
474     STRLEN len;
475     char *str = SvPVbyte (sv, len);
476    
477     need (enc, len);
478     memcpy (enc->cur, str, len);
479     enc->cur += len;
480     }
481     break;
482    
483     case AS_BYTES:
484     {
485     STRLEN len;
486     char *str = SvPVbyte (sv, len);
487     encode_strref (enc, 0, 0, str, len);
488     }
489     break;
490    
491     case AS_TEXT:
492     {
493     STRLEN len;
494     char *str = SvPVutf8 (sv, len);
495     encode_strref (enc, 1, 1, str, len);
496     }
497     break;
498    
499 root 1.69 case AS_INT: encode_int (enc, sv); break;
500    
501 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
502     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
503     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
504    
505 root 1.70 case AS_MAP: encode_array_as_map (enc, sv); break;
506    
507 root 1.66 default:
508     croak ("encountered malformed CBOR::XS::Tagged object");
509     }
510     }
511    
512 root 1.1 static void
513     encode_av (enc_t *enc, AV *av)
514     {
515     int i, len = av_len (av);
516    
517     if (enc->depth >= enc->cbor.max_depth)
518     croak (ERR_NESTING_EXCEEDED);
519    
520     ++enc->depth;
521    
522 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
523 root 1.1
524 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
525 root 1.45 for (i = 0; i <= len; ++i)
526     {
527     SV **svp = av_fetch (av, i, 0);
528     encode_sv (enc, svp ? *svp : &PL_sv_undef);
529     }
530     else
531     for (i = 0; i <= len; ++i)
532 root 1.46 {
533     SV *sv = AvARRAY (av)[i];
534     encode_sv (enc, sv ? sv : &PL_sv_undef);
535     }
536 root 1.1
537     --enc->depth;
538     }
539    
540     static void
541     encode_hv (enc_t *enc, HV *hv)
542     {
543     HE *he;
544    
545     if (enc->depth >= enc->cbor.max_depth)
546     croak (ERR_NESTING_EXCEEDED);
547    
548     ++enc->depth;
549    
550     int pairs = hv_iterinit (hv);
551     int mg = SvMAGICAL (hv);
552    
553 root 1.63 if (ecb_expect_false (mg))
554 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
555 root 1.1 else
556 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
557 root 1.1
558     while ((he = hv_iternext (hv)))
559     {
560 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
561     encode_sv (enc, HeSVKEY (he));
562 root 1.1 else
563 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
564 root 1.1
565 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
566 root 1.1 }
567    
568 root 1.63 if (ecb_expect_false (mg))
569 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
570 root 1.1
571     --enc->depth;
572     }
573    
574     // encode objects, arrays and special \0=false and \1=true values.
575     static void
576     encode_rv (enc_t *enc, SV *sv)
577     {
578 root 1.19 SvGETMAGIC (sv);
579 root 1.1
580 root 1.19 svtype svt = SvTYPE (sv);
581 root 1.18
582 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
583 root 1.1 {
584 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
585     ? types_boolean_stash
586     : gv_stashpv ("Types::Serialiser::Boolean", 1);
587     HV *error_stash = !CBOR_SLOW || types_error_stash
588     ? types_error_stash
589     : gv_stashpv ("Types::Serialiser::Error", 1);
590 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
591     ? cbor_tagged_stash
592     : gv_stashpv ("CBOR::XS::Tagged" , 1);
593 root 1.1
594 root 1.11 HV *stash = SvSTASH (sv);
595    
596     if (stash == boolean_stash)
597 root 1.34 {
598 root 1.69 encode_bool (enc, SvIV (sv));
599 root 1.34 return;
600     }
601 root 1.11 else if (stash == error_stash)
602 root 1.34 {
603 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
604 root 1.34 return;
605     }
606 root 1.11 else if (stash == tagged_stash)
607 root 1.6 {
608     if (svt != SVt_PVAV)
609     croak ("encountered CBOR::XS::Tagged object that isn't an array");
610    
611 root 1.66 switch (av_len ((AV *)sv))
612     {
613     case 2-1:
614     // actually a tagged value
615     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
616     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
617     break;
618    
619     case 3-1:
620     // a forced type [value, type, undef]
621     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
622     break;
623    
624     default:
625     croak ("encountered malformed CBOR::XS::Tagged object");
626     }
627 root 1.34
628     return;
629 root 1.6 }
630 root 1.34 }
631    
632     if (ecb_expect_false (SvREFCNT (sv) > 1)
633     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
634     {
635 root 1.63 if (ecb_expect_false (!enc->shareable))
636 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
637    
638     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
639    
640     if (SvOK (*svp))
641     {
642     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
643 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
644 root 1.34 return;
645     }
646     else
647     {
648     sv_setuv (*svp, enc->shareable_idx);
649     ++enc->shareable_idx;
650     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
651     }
652     }
653    
654     if (ecb_expect_false (SvOBJECT (sv)))
655     {
656     HV *stash = SvSTASH (sv);
657     GV *method;
658    
659 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
660     croak ("encountered object '%s', but forbid_objects is enabled",
661     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
662     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
663 root 1.1 {
664 root 1.11 dSP;
665    
666 root 1.50 ENTER; SAVETMPS;
667     PUSHMARK (SP);
668 root 1.6 // we re-bless the reference to get overload and other niceties right
669 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
670    
671     PUTBACK;
672     // G_SCALAR ensures that return value is 1
673     call_sv ((SV *)GvCV (method), G_SCALAR);
674     SPAGAIN;
675    
676     // catch this surprisingly common error
677     if (SvROK (TOPs) && SvRV (TOPs) == sv)
678     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
679    
680     encode_sv (enc, POPs);
681    
682     PUTBACK;
683    
684     FREETMPS; LEAVE;
685     }
686     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
687     {
688     dSP;
689 root 1.6
690 root 1.50 ENTER; SAVETMPS;
691     PUSHMARK (SP);
692 root 1.11 EXTEND (SP, 2);
693     // we re-bless the reference to get overload and other niceties right
694     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
695     PUSHs (sv_cbor);
696 root 1.1
697 root 1.11 PUTBACK;
698     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
699     SPAGAIN;
700 root 1.6
701 root 1.11 // catch this surprisingly common error
702     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
703     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
704 root 1.6
705 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
706 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
707 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
708 root 1.6
709 root 1.65 {
710     int i;
711    
712     for (i = 0; i < count; ++i)
713     encode_sv (enc, SP[i + 1 - count]);
714    
715     SP -= count;
716     }
717 root 1.6
718 root 1.11 PUTBACK;
719 root 1.6
720 root 1.11 FREETMPS; LEAVE;
721 root 1.1 }
722 root 1.11 else
723     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
724     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
725 root 1.1 }
726     else if (svt == SVt_PVHV)
727     encode_hv (enc, (HV *)sv);
728     else if (svt == SVt_PVAV)
729     encode_av (enc, (AV *)sv);
730 root 1.18 else
731 root 1.1 {
732 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
733     encode_sv (enc, sv);
734 root 1.1 }
735     }
736    
737     static void
738     encode_nv (enc_t *enc, SV *sv)
739     {
740     double nv = SvNVX (sv);
741    
742     need (enc, 9);
743    
744 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
745     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
746 root 1.1 //TODO: maybe I32?
747 root 1.5 else if (ecb_expect_false (nv == (float)nv))
748 root 1.66 encode_float32 (enc, nv);
749 root 1.1 else
750 root 1.66 encode_float64 (enc, nv);
751 root 1.1 }
752    
753     static void
754     encode_sv (enc_t *enc, SV *sv)
755     {
756     SvGETMAGIC (sv);
757    
758     if (SvPOKp (sv))
759     {
760     STRLEN len;
761     char *str = SvPV (sv, len);
762 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
763 root 1.1 }
764     else if (SvNOKp (sv))
765     encode_nv (enc, sv);
766     else if (SvIOKp (sv))
767 root 1.67 encode_int (enc, sv);
768 root 1.1 else if (SvROK (sv))
769     encode_rv (enc, SvRV (sv));
770     else if (!SvOK (sv))
771 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
772 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
773 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
774 root 1.1 else
775     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
776     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
777     }
778    
779     static SV *
780     encode_cbor (SV *scalar, CBOR *cbor)
781     {
782 root 1.48 enc_t enc = { 0 };
783 root 1.1
784 root 1.63 enc.cbor = *cbor;
785     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
786     enc.cur = SvPVX (enc.sv);
787     enc.end = SvEND (enc.sv);
788 root 1.1
789     SvPOK_only (enc.sv);
790 root 1.20
791 root 1.32 if (cbor->flags & F_PACK_STRINGS)
792 root 1.20 {
793     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
794     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
795     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
796     }
797    
798 root 1.1 encode_sv (&enc, scalar);
799    
800     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
801     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
802    
803     if (enc.cbor.flags & F_SHRINK)
804     shrink (enc.sv);
805    
806     return enc.sv;
807     }
808    
809     /////////////////////////////////////////////////////////////////////////////
810     // decoder
811    
812     // structure used for decoding CBOR
813     typedef struct
814     {
815     U8 *cur; // current parser pointer
816     U8 *end; // end of input string
817     const char *err; // parse error, if != 0
818     CBOR cbor;
819     U32 depth; // recursion depth
820     U32 maxdepth; // recursion depth limit
821 root 1.19 AV *shareable;
822 root 1.20 AV *stringref;
823 root 1.27 SV *decode_tagged;
824 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
825 root 1.1 } dec_t;
826    
827 root 1.59 // set dec->err to ERRSV
828     ecb_cold static void
829     err_errsv (dec_t *dec)
830     {
831     if (!dec->err)
832     {
833     dec->err_sv = newSVsv (ERRSV);
834    
835     // chop off the trailing \n
836     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
837     *SvEND (dec->err_sv) = 0;
838    
839     dec->err = SvPVutf8_nolen (dec->err_sv);
840     }
841     }
842 root 1.1
843 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
844     ecb_cold static void
845     err_set (dec_t *dec, const char *reason)
846     {
847     if (!dec->err)
848     dec->err = reason;
849     }
850    
851     ecb_cold static void
852     err_unexpected_end (dec_t *dec)
853     {
854     err_set (dec, "unexpected end of CBOR data");
855     }
856    
857     #define ERR_DO(do) SB do; goto fail; SE
858     #define ERR(reason) ERR_DO (err_set (dec, reason))
859     #define ERR_ERRSV ERR_DO (err_errsv (dec))
860    
861 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
862 root 1.1
863 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
864 root 1.1 #define DEC_DEC_DEPTH --dec->depth
865    
866     static UV
867     decode_uint (dec_t *dec)
868     {
869 root 1.35 U8 m = *dec->cur & MINOR_MASK;
870     ++dec->cur;
871    
872     if (ecb_expect_true (m < LENGTH_EXT1))
873     return m;
874 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
875     {
876     WANT (1);
877     dec->cur += 1;
878     return dec->cur[-1];
879     }
880     else if (ecb_expect_true (m == LENGTH_EXT2))
881     {
882     WANT (2);
883     dec->cur += 2;
884     return (((UV)dec->cur[-2]) << 8)
885     | ((UV)dec->cur[-1]);
886     }
887     else if (ecb_expect_true (m == LENGTH_EXT4))
888     {
889     WANT (4);
890     dec->cur += 4;
891     return (((UV)dec->cur[-4]) << 24)
892     | (((UV)dec->cur[-3]) << 16)
893     | (((UV)dec->cur[-2]) << 8)
894     | ((UV)dec->cur[-1]);
895     }
896     else if (ecb_expect_true (m == LENGTH_EXT8))
897 root 1.1 {
898 root 1.36 WANT (8);
899     dec->cur += 8;
900 root 1.34
901 root 1.36 return
902 root 1.34 #if UVSIZE < 8
903 root 1.36 0
904 root 1.34 #else
905 root 1.36 (((UV)dec->cur[-8]) << 56)
906     | (((UV)dec->cur[-7]) << 48)
907     | (((UV)dec->cur[-6]) << 40)
908     | (((UV)dec->cur[-5]) << 32)
909 root 1.34 #endif
910 root 1.36 | (((UV)dec->cur[-4]) << 24)
911     | (((UV)dec->cur[-3]) << 16)
912     | (((UV)dec->cur[-2]) << 8)
913     | ((UV)dec->cur[-1]);
914 root 1.1 }
915 root 1.36 else
916     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
917 root 1.1
918     fail:
919     return 0;
920     }
921    
922     static SV *decode_sv (dec_t *dec);
923    
924     static SV *
925     decode_av (dec_t *dec)
926     {
927     AV *av = newAV ();
928    
929     DEC_INC_DEPTH;
930    
931 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
932 root 1.1 {
933     ++dec->cur;
934    
935     for (;;)
936     {
937     WANT (1);
938    
939 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
940 root 1.1 {
941     ++dec->cur;
942     break;
943     }
944    
945     av_push (av, decode_sv (dec));
946     }
947     }
948     else
949     {
950 root 1.57 UV i, len = decode_uint (dec);
951 root 1.1
952 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
953 root 1.1 av_fill (av, len - 1);
954    
955     for (i = 0; i < len; ++i)
956     AvARRAY (av)[i] = decode_sv (dec);
957     }
958    
959     DEC_DEC_DEPTH;
960     return newRV_noinc ((SV *)av);
961    
962     fail:
963 root 1.63 SvREFCNT_dec_NN (av);
964 root 1.1 DEC_DEC_DEPTH;
965     return &PL_sv_undef;
966     }
967    
968 root 1.16 static void
969     decode_he (dec_t *dec, HV *hv)
970     {
971     // for speed reasons, we specialcase single-string
972 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
973    
974 root 1.23 if (ecb_expect_true (!dec->stringref))
975 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
976 root 1.21 {
977 root 1.56 STRLEN len = decode_uint (dec);
978 root 1.21 char *key = (char *)dec->cur;
979 root 1.16
980 root 1.49 WANT (len);
981 root 1.21 dec->cur += len;
982    
983 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
984 root 1.16
985 root 1.21 return;
986     }
987 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
988 root 1.21 {
989 root 1.56 STRLEN len = decode_uint (dec);
990 root 1.21 char *key = (char *)dec->cur;
991 root 1.16
992 root 1.49 WANT (len);
993 root 1.21 dec->cur += len;
994 root 1.20
995 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
996 root 1.72 if (!cbor_is_utf8_string ((U8 *)key, len))
997 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
998 root 1.16
999 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
1000 root 1.16
1001 root 1.21 return;
1002     }
1003 root 1.20
1004 root 1.21 SV *k = decode_sv (dec);
1005     SV *v = decode_sv (dec);
1006 root 1.16
1007 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
1008     // methods, and this is hopefully the only place where it can happen,
1009     // so if there is a chance of an exception, take the very slow path.
1010     // since catching exceptions is "undocumented/internal/forbidden" by
1011     // the new p5p powers, we need to call out to a perl function :/
1012     if (ecb_expect_false (SvAMAGIC (k)))
1013     {
1014     dSP;
1015    
1016     ENTER; SAVETMPS;
1017     PUSHMARK (SP);
1018     EXTEND (SP, 3);
1019     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
1020     PUSHs (sv_2mortal (k));
1021     PUSHs (sv_2mortal (v));
1022    
1023     PUTBACK;
1024     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
1025     SPAGAIN;
1026    
1027     FREETMPS; LEAVE;
1028    
1029     if (SvTRUE (ERRSV))
1030     ERR_ERRSV;
1031    
1032     return;
1033     }
1034    
1035 root 1.21 hv_store_ent (hv, k, v, 0);
1036 root 1.63 SvREFCNT_dec_NN (k);
1037 root 1.38
1038     fail:
1039     ;
1040 root 1.16 }
1041    
1042 root 1.1 static SV *
1043     decode_hv (dec_t *dec)
1044     {
1045     HV *hv = newHV ();
1046    
1047     DEC_INC_DEPTH;
1048    
1049 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1050 root 1.1 {
1051     ++dec->cur;
1052    
1053     for (;;)
1054     {
1055     WANT (1);
1056    
1057 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1058 root 1.1 {
1059     ++dec->cur;
1060     break;
1061     }
1062    
1063 root 1.16 decode_he (dec, hv);
1064 root 1.1 }
1065     }
1066     else
1067     {
1068 root 1.57 UV pairs = decode_uint (dec);
1069    
1070     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1071 root 1.1
1072 root 1.16 while (pairs--)
1073     decode_he (dec, hv);
1074 root 1.1 }
1075    
1076     DEC_DEC_DEPTH;
1077     return newRV_noinc ((SV *)hv);
1078    
1079     fail:
1080 root 1.63 SvREFCNT_dec_NN (hv);
1081 root 1.1 DEC_DEC_DEPTH;
1082     return &PL_sv_undef;
1083     }
1084    
1085     static SV *
1086     decode_str (dec_t *dec, int utf8)
1087     {
1088 root 1.6 SV *sv = 0;
1089 root 1.1
1090 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1091 root 1.1 {
1092 root 1.33 // indefinite length strings
1093 root 1.1 ++dec->cur;
1094    
1095 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1096 root 1.33
1097 root 1.1 sv = newSVpvn ("", 0);
1098    
1099     for (;;)
1100     {
1101     WANT (1);
1102    
1103 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1104     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1105 root 1.33 {
1106     ++dec->cur;
1107     break;
1108     }
1109     else
1110     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1111    
1112     STRLEN len = decode_uint (dec);
1113 root 1.1
1114 root 1.33 WANT (len);
1115     sv_catpvn (sv, dec->cur, len);
1116     dec->cur += len;
1117 root 1.1 }
1118     }
1119     else
1120     {
1121     STRLEN len = decode_uint (dec);
1122    
1123     WANT (len);
1124     sv = newSVpvn (dec->cur, len);
1125     dec->cur += len;
1126 root 1.25
1127     if (ecb_expect_false (dec->stringref)
1128     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1129     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1130 root 1.1 }
1131    
1132     if (utf8)
1133 root 1.38 {
1134     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1135 root 1.72 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
1136 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1137    
1138     SvUTF8_on (sv);
1139     }
1140 root 1.1
1141     return sv;
1142    
1143     fail:
1144 root 1.6 SvREFCNT_dec (sv);
1145 root 1.1 return &PL_sv_undef;
1146     }
1147    
1148     static SV *
1149 root 1.3 decode_tagged (dec_t *dec)
1150     {
1151 root 1.19 SV *sv = 0;
1152 root 1.3 UV tag = decode_uint (dec);
1153 root 1.19
1154     WANT (1);
1155 root 1.3
1156 root 1.18 switch (tag)
1157     {
1158     case CBOR_TAG_MAGIC:
1159 root 1.20 sv = decode_sv (dec);
1160     break;
1161 root 1.18
1162     case CBOR_TAG_INDIRECTION:
1163 root 1.20 sv = newRV_noinc (decode_sv (dec));
1164     break;
1165    
1166     case CBOR_TAG_STRINGREF_NAMESPACE:
1167     {
1168 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1169 root 1.55 // erase mortalised caches, e.g. "shareable"
1170     ENTER;
1171 root 1.20
1172     SAVESPTR (dec->stringref);
1173     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1174    
1175     sv = decode_sv (dec);
1176    
1177 root 1.55 LEAVE;
1178 root 1.20 }
1179     break;
1180    
1181     case CBOR_TAG_STRINGREF:
1182     {
1183 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1184 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1185    
1186     UV idx = decode_uint (dec);
1187    
1188 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1189 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1190    
1191     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1192     }
1193     break;
1194 root 1.11
1195 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1196     {
1197     if (ecb_expect_false (!dec->shareable))
1198     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1199    
1200 root 1.80 if (ecb_expect_false (dec->cbor.flags & (F_ALLOW_CYCLES | F_ALLOW_WEAK_CYCLES)))
1201 root 1.37 {
1202 root 1.77 // if cycles are allowed, then we store an AV as value
1203     // while it is being decoded, and gather unresolved
1204     // references in it, to be re4solved after decoding.
1205     int idx, i;
1206     AV *av = newAV ();
1207 root 1.78 av_push (dec->shareable, (SV *)av);
1208 root 1.77 idx = AvFILLp (dec->shareable);
1209    
1210     sv = decode_sv (dec);
1211 root 1.18
1212 root 1.77 // the AV now contains \undef for all unresolved references,
1213     // so we fix them up here.
1214     for (i = 0; i <= AvFILLp (av); ++i)
1215     SvRV_set (AvARRAY (av)[i], SvREFCNT_inc_NN (SvRV (sv)));
1216    
1217 root 1.80 // weaken all recursive references
1218     if (dec->cbor.flags & F_ALLOW_WEAK_CYCLES)
1219     for (i = 0; i <= AvFILLp (av); ++i)
1220     sv_rvweaken (AvARRAY (av)[i]);
1221    
1222 root 1.77 // now replace the AV by a reference to the completed value
1223 root 1.78 SvREFCNT_dec_NN ((SV *)av);
1224 root 1.77 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1225 root 1.37 }
1226     else
1227     {
1228     av_push (dec->shareable, &PL_sv_undef);
1229     int idx = AvFILLp (dec->shareable);
1230     sv = decode_sv (dec);
1231 root 1.78 AvARRAY (dec->shareable)[idx] = SvREFCNT_inc_NN (sv);
1232 root 1.37 }
1233 root 1.19 }
1234 root 1.20 break;
1235 root 1.18
1236     case CBOR_TAG_VALUE_SHAREDREF:
1237 root 1.17 {
1238 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1239 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1240 root 1.18
1241 root 1.19 UV idx = decode_uint (dec);
1242    
1243 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1244 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1245    
1246 root 1.77 sv = AvARRAY (dec->shareable)[idx];
1247 root 1.37
1248 root 1.77 // reference to cycle, we create a new \undef and use that, and also
1249     // registerr it in the AV for later fixing
1250 root 1.80 if (ecb_expect_false (SvTYPE (sv) == SVt_PVAV))
1251 root 1.77 {
1252     AV *av = (AV *)sv;
1253     sv = newRV_noinc (&PL_sv_undef);
1254 root 1.79 av_push (av, SvREFCNT_inc_NN (sv));
1255 root 1.77 }
1256 root 1.80 else if (ecb_expect_false (sv == &PL_sv_undef)) // not yet decoded, but cycles not allowed
1257 root 1.37 ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1258 root 1.77 else // we decoded the object earlier, no cycle
1259     sv = newSVsv (sv);
1260 root 1.17 }
1261 root 1.20 break;
1262 root 1.17
1263 root 1.18 case CBOR_TAG_PERL_OBJECT:
1264     {
1265 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1266     goto filter;
1267    
1268 root 1.19 sv = decode_sv (dec);
1269    
1270 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1271     ERR ("corrupted CBOR data (non-array perl object)");
1272    
1273     AV *av = (AV *)SvRV (sv);
1274     int len = av_len (av) + 1;
1275     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1276    
1277     if (!stash)
1278     ERR ("cannot decode perl-object (package does not exist)");
1279    
1280     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1281    
1282     if (!method)
1283     ERR ("cannot decode perl-object (package does not have a THAW method)");
1284    
1285     dSP;
1286    
1287 root 1.50 ENTER; SAVETMPS;
1288     PUSHMARK (SP);
1289 root 1.18 EXTEND (SP, len + 1);
1290     // we re-bless the reference to get overload and other niceties right
1291     PUSHs (*av_fetch (av, 0, 1));
1292     PUSHs (sv_cbor);
1293    
1294     int i;
1295    
1296     for (i = 1; i < len; ++i)
1297     PUSHs (*av_fetch (av, i, 1));
1298    
1299     PUTBACK;
1300     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1301     SPAGAIN;
1302    
1303     if (SvTRUE (ERRSV))
1304     {
1305     FREETMPS; LEAVE;
1306 root 1.59 ERR_ERRSV;
1307 root 1.18 }
1308    
1309 root 1.63 SvREFCNT_dec_NN (sv);
1310 root 1.18 sv = SvREFCNT_inc (POPs);
1311 root 1.11
1312 root 1.18 PUTBACK;
1313 root 1.11
1314 root 1.18 FREETMPS; LEAVE;
1315     }
1316 root 1.20 break;
1317 root 1.9
1318 root 1.18 default:
1319 root 1.60 filter:
1320 root 1.18 {
1321 root 1.58 SV *tag_sv = newSVuv (tag);
1322    
1323 root 1.19 sv = decode_sv (dec);
1324    
1325 root 1.27 dSP;
1326 root 1.50 ENTER; SAVETMPS;
1327     PUSHMARK (SP);
1328 root 1.27 EXTEND (SP, 2);
1329 root 1.58 PUSHs (tag_sv);
1330 root 1.27 PUSHs (sv);
1331    
1332     PUTBACK;
1333     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1334     SPAGAIN;
1335    
1336     if (SvTRUE (ERRSV))
1337     {
1338 root 1.63 SvREFCNT_dec_NN (tag_sv);
1339 root 1.27 FREETMPS; LEAVE;
1340 root 1.59 ERR_ERRSV;
1341 root 1.27 }
1342    
1343     if (count)
1344     {
1345 root 1.63 SvREFCNT_dec_NN (tag_sv);
1346     SvREFCNT_dec_NN (sv);
1347 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1348     SP -= count;
1349 root 1.27 }
1350     else
1351     {
1352     AV *av = newAV ();
1353 root 1.58 av_push (av, tag_sv);
1354 root 1.27 av_push (av, sv);
1355    
1356     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1357     ? cbor_tagged_stash
1358     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1359     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1360     }
1361 root 1.7
1362 root 1.27 PUTBACK;
1363    
1364     FREETMPS; LEAVE;
1365 root 1.18 }
1366 root 1.20 break;
1367 root 1.11 }
1368 root 1.9
1369 root 1.20 return sv;
1370    
1371 root 1.9 fail:
1372     SvREFCNT_dec (sv);
1373     return &PL_sv_undef;
1374 root 1.3 }
1375    
1376     static SV *
1377 root 1.1 decode_sv (dec_t *dec)
1378     {
1379     WANT (1);
1380    
1381 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1382 root 1.1 {
1383 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1384     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1385     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1386     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1387     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1388     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1389     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1390    
1391     case MAJOR_MISC >> MAJOR_SHIFT:
1392     switch (*dec->cur++ & MINOR_MASK)
1393 root 1.1 {
1394 root 1.35 case SIMPLE_FALSE:
1395 root 1.1 #if CBOR_SLOW
1396 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1397 root 1.1 #endif
1398 root 1.10 return newSVsv (types_false);
1399 root 1.35 case SIMPLE_TRUE:
1400 root 1.1 #if CBOR_SLOW
1401 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1402 root 1.1 #endif
1403 root 1.10 return newSVsv (types_true);
1404 root 1.35 case SIMPLE_NULL:
1405 root 1.1 return newSVsv (&PL_sv_undef);
1406 root 1.35 case SIMPLE_UNDEF:
1407 root 1.10 #if CBOR_SLOW
1408     types_error = get_bool ("Types::Serialiser::error");
1409     #endif
1410     return newSVsv (types_error);
1411 root 1.1
1412 root 1.35 case MISC_FLOAT16:
1413 root 1.2 {
1414     WANT (2);
1415    
1416     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1417     dec->cur += 2;
1418    
1419     return newSVnv (ecb_binary16_to_float (fp));
1420     }
1421 root 1.1
1422 root 1.35 case MISC_FLOAT32:
1423 root 1.1 {
1424     uint32_t fp;
1425     WANT (4);
1426     memcpy (&fp, dec->cur, 4);
1427     dec->cur += 4;
1428    
1429     if (!ecb_big_endian ())
1430     fp = ecb_bswap32 (fp);
1431    
1432     return newSVnv (ecb_binary32_to_float (fp));
1433     }
1434    
1435 root 1.35 case MISC_FLOAT64:
1436 root 1.1 {
1437     uint64_t fp;
1438     WANT (8);
1439     memcpy (&fp, dec->cur, 8);
1440     dec->cur += 8;
1441    
1442     if (!ecb_big_endian ())
1443     fp = ecb_bswap64 (fp);
1444    
1445     return newSVnv (ecb_binary64_to_double (fp));
1446     }
1447    
1448 root 1.35 // 0..19 unassigned simple
1449 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1450     // 28-30 unassigned misc
1451     // 31 break code
1452 root 1.1 default:
1453 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1454 root 1.1 }
1455    
1456     break;
1457     }
1458    
1459     fail:
1460     return &PL_sv_undef;
1461     }
1462    
1463     static SV *
1464     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1465     {
1466 root 1.48 dec_t dec = { 0 };
1467 root 1.1 SV *sv;
1468 root 1.16 STRLEN len;
1469     char *data = SvPVbyte (string, len);
1470 root 1.1
1471 root 1.16 if (len > cbor->max_size && cbor->max_size)
1472     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1473     (unsigned long)len, (unsigned long)cbor->max_size);
1474 root 1.1
1475     dec.cbor = *cbor;
1476 root 1.16 dec.cur = (U8 *)data;
1477     dec.end = (U8 *)data + len;
1478 root 1.1
1479     sv = decode_sv (&dec);
1480    
1481     if (offset_return)
1482     *offset_return = dec.cur;
1483    
1484     if (!(offset_return || !sv))
1485 root 1.2 if (dec.cur != dec.end && !dec.err)
1486     dec.err = "garbage after CBOR object";
1487    
1488     if (dec.err)
1489 root 1.1 {
1490 root 1.39 if (dec.shareable)
1491     {
1492 root 1.58 // need to break cyclic links, which would all be in shareable
1493 root 1.39 int i;
1494     SV **svp;
1495    
1496     for (i = av_len (dec.shareable) + 1; i--; )
1497     if ((svp = av_fetch (dec.shareable, i, 0)))
1498     sv_setsv (*svp, &PL_sv_undef);
1499     }
1500    
1501 root 1.63 SvREFCNT_dec_NN (sv);
1502 root 1.59
1503     if (dec.err_sv)
1504     sv_2mortal (dec.err_sv);
1505    
1506 root 1.74 croak ("%s, at offset %ld (octet 0x%02x)", dec.err, (long)(dec.cur - (U8 *)data), (int)(uint8_t)*dec.cur);
1507 root 1.1 }
1508    
1509     sv = sv_2mortal (sv);
1510    
1511     return sv;
1512     }
1513    
1514     /////////////////////////////////////////////////////////////////////////////
1515 root 1.40 // incremental parser
1516    
1517     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1518    
1519     // returns 0 for notyet, 1 for success or error
1520     static int
1521     incr_parse (CBOR *self, SV *cborstr)
1522     {
1523     STRLEN cur;
1524     SvPV (cborstr, cur);
1525    
1526     while (ecb_expect_true (self->incr_need <= cur))
1527     {
1528     // table of integer count bytes
1529     static I8 incr_len[MINOR_MASK + 1] = {
1530     0, 0, 0, 0, 0, 0, 0, 0,
1531     0, 0, 0, 0, 0, 0, 0, 0,
1532     0, 0, 0, 0, 0, 0, 0, 0,
1533     1, 2, 4, 8,-1,-1,-1,-2
1534     };
1535    
1536     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1537     U8 m = *p & MINOR_MASK;
1538     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1539     I8 ilen = incr_len[m];
1540    
1541     self->incr_need = self->incr_pos + 1;
1542    
1543     if (ecb_expect_false (ilen < 0))
1544     {
1545     if (m != MINOR_INDEF)
1546     return 1; // error
1547    
1548     if (*p == (MAJOR_MISC | MINOR_INDEF))
1549     {
1550     if (count >= 0)
1551     return 1; // error
1552    
1553     count = 1;
1554     }
1555     else
1556     {
1557     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1558     count = -1;
1559     }
1560     }
1561     else
1562     {
1563     self->incr_need += ilen;
1564     if (ecb_expect_false (self->incr_need > cur))
1565     return 0;
1566    
1567     int major = *p >> MAJOR_SHIFT;
1568    
1569     switch (major)
1570     {
1571 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1572     ++count; // tags merely prefix another value
1573     break;
1574    
1575 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1576     case MAJOR_TEXT >> MAJOR_SHIFT:
1577     case MAJOR_ARRAY >> MAJOR_SHIFT:
1578     case MAJOR_MAP >> MAJOR_SHIFT:
1579     {
1580     UV len;
1581    
1582     if (ecb_expect_false (ilen))
1583     {
1584     len = 0;
1585    
1586     do {
1587     len = (len << 8) | *++p;
1588     } while (--ilen);
1589     }
1590     else
1591     len = m;
1592    
1593     switch (major)
1594     {
1595     case MAJOR_BYTES >> MAJOR_SHIFT:
1596     case MAJOR_TEXT >> MAJOR_SHIFT:
1597     self->incr_need += len;
1598     if (ecb_expect_false (self->incr_need > cur))
1599     return 0;
1600    
1601     break;
1602    
1603     case MAJOR_MAP >> MAJOR_SHIFT:
1604     len <<= 1;
1605 root 1.71 /* FALLTHROUGH */
1606 root 1.40 case MAJOR_ARRAY >> MAJOR_SHIFT:
1607     if (len)
1608     {
1609     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1610     count = len + 1;
1611     }
1612     break;
1613     }
1614     }
1615     }
1616     }
1617    
1618     self->incr_pos = self->incr_need;
1619    
1620     if (count > 0)
1621     {
1622     while (!--count)
1623     {
1624     if (!AvFILLp (self->incr_count))
1625     return 1; // done
1626    
1627     SvREFCNT_dec_NN (av_pop (self->incr_count));
1628     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1629     }
1630    
1631     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1632     }
1633     }
1634    
1635     return 0;
1636     }
1637    
1638    
1639     /////////////////////////////////////////////////////////////////////////////
1640 root 1.1 // XS interface functions
1641    
1642     MODULE = CBOR::XS PACKAGE = CBOR::XS
1643    
1644     BOOT:
1645     {
1646     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1647 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1648 root 1.1
1649 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1650     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1651    
1652     types_true = get_bool ("Types::Serialiser::true" );
1653     types_false = get_bool ("Types::Serialiser::false");
1654     types_error = get_bool ("Types::Serialiser::error");
1655 root 1.11
1656 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1657    
1658 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1659     SvREADONLY_on (sv_cbor);
1660 root 1.56
1661     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1662 root 1.1 }
1663    
1664     PROTOTYPES: DISABLE
1665    
1666     void CLONE (...)
1667     CODE:
1668 root 1.10 cbor_stash = 0;
1669     cbor_tagged_stash = 0;
1670     types_error_stash = 0;
1671     types_boolean_stash = 0;
1672 root 1.1
1673     void new (char *klass)
1674     PPCODE:
1675     {
1676     SV *pv = NEWSV (0, sizeof (CBOR));
1677     SvPOK_only (pv);
1678     cbor_init ((CBOR *)SvPVX (pv));
1679     XPUSHs (sv_2mortal (sv_bless (
1680     newRV_noinc (pv),
1681     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1682     )));
1683     }
1684    
1685     void shrink (CBOR *self, int enable = 1)
1686     ALIAS:
1687 root 1.80 shrink = F_SHRINK
1688     allow_unknown = F_ALLOW_UNKNOWN
1689     allow_sharing = F_ALLOW_SHARING
1690     allow_cycles = F_ALLOW_CYCLES
1691     allow_weak_cycles = F_ALLOW_WEAK_CYCLES
1692     forbid_objects = F_FORBID_OBJECTS
1693     pack_strings = F_PACK_STRINGS
1694     text_keys = F_TEXT_KEYS
1695     text_strings = F_TEXT_STRINGS
1696     validate_utf8 = F_VALIDATE_UTF8
1697 root 1.1 PPCODE:
1698     {
1699     if (enable)
1700     self->flags |= ix;
1701     else
1702     self->flags &= ~ix;
1703    
1704     XPUSHs (ST (0));
1705     }
1706    
1707     void get_shrink (CBOR *self)
1708     ALIAS:
1709 root 1.80 get_shrink = F_SHRINK
1710     get_allow_unknown = F_ALLOW_UNKNOWN
1711     get_allow_sharing = F_ALLOW_SHARING
1712     get_allow_cycles = F_ALLOW_CYCLES
1713     get_allow_weak_cycles = F_ALLOW_WEAK_CYCLES
1714     get_forbid_objects = F_FORBID_OBJECTS
1715     get_pack_strings = F_PACK_STRINGS
1716     get_text_keys = F_TEXT_KEYS
1717     get_text_strings = F_TEXT_STRINGS
1718     get_validate_utf8 = F_VALIDATE_UTF8
1719 root 1.1 PPCODE:
1720     XPUSHs (boolSV (self->flags & ix));
1721    
1722     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1723     PPCODE:
1724     self->max_depth = max_depth;
1725     XPUSHs (ST (0));
1726    
1727     U32 get_max_depth (CBOR *self)
1728     CODE:
1729     RETVAL = self->max_depth;
1730     OUTPUT:
1731     RETVAL
1732    
1733     void max_size (CBOR *self, U32 max_size = 0)
1734     PPCODE:
1735     self->max_size = max_size;
1736     XPUSHs (ST (0));
1737    
1738     int get_max_size (CBOR *self)
1739     CODE:
1740     RETVAL = self->max_size;
1741     OUTPUT:
1742     RETVAL
1743    
1744 root 1.27 void filter (CBOR *self, SV *filter = 0)
1745     PPCODE:
1746     SvREFCNT_dec (self->filter);
1747     self->filter = filter ? newSVsv (filter) : filter;
1748     XPUSHs (ST (0));
1749    
1750     SV *get_filter (CBOR *self)
1751     CODE:
1752     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1753     OUTPUT:
1754     RETVAL
1755    
1756 root 1.1 void encode (CBOR *self, SV *scalar)
1757     PPCODE:
1758     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1759     XPUSHs (scalar);
1760    
1761     void decode (CBOR *self, SV *cborstr)
1762     PPCODE:
1763     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1764     XPUSHs (cborstr);
1765    
1766     void decode_prefix (CBOR *self, SV *cborstr)
1767     PPCODE:
1768     {
1769     SV *sv;
1770     char *offset;
1771     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1772     EXTEND (SP, 2);
1773     PUSHs (sv);
1774     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1775     }
1776    
1777 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1778 root 1.42 ALIAS:
1779     incr_parse_multiple = 1
1780 root 1.40 PPCODE:
1781     {
1782     if (SvUTF8 (cborstr))
1783     sv_utf8_downgrade (cborstr, 0);
1784    
1785     if (!self->incr_count)
1786     {
1787     self->incr_count = newAV ();
1788     self->incr_pos = 0;
1789     self->incr_need = 1;
1790    
1791     av_push (self->incr_count, newSViv (1));
1792     }
1793    
1794 root 1.41 do
1795 root 1.40 {
1796     if (!incr_parse (self, cborstr))
1797     {
1798     if (self->incr_need > self->max_size && self->max_size)
1799     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1800     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1801    
1802     break;
1803     }
1804    
1805 root 1.41 SV *sv;
1806     char *offset;
1807 root 1.40
1808 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1809     XPUSHs (sv);
1810 root 1.40
1811 root 1.41 sv_chop (cborstr, offset);
1812 root 1.40
1813 root 1.41 av_clear (self->incr_count);
1814     av_push (self->incr_count, newSViv (1));
1815 root 1.40
1816 root 1.41 self->incr_pos = 0;
1817     self->incr_need = self->incr_pos + 1;
1818 root 1.40 }
1819 root 1.42 while (ix);
1820 root 1.40 }
1821    
1822     void incr_reset (CBOR *self)
1823     CODE:
1824     {
1825     SvREFCNT_dec (self->incr_count);
1826     self->incr_count = 0;
1827     }
1828    
1829 root 1.27 void DESTROY (CBOR *self)
1830     PPCODE:
1831     cbor_free (self);
1832    
1833 root 1.1 PROTOTYPES: ENABLE
1834    
1835     void encode_cbor (SV *scalar)
1836 root 1.36 ALIAS:
1837     encode_cbor = 0
1838     encode_cbor_sharing = F_ALLOW_SHARING
1839 root 1.1 PPCODE:
1840     {
1841     CBOR cbor;
1842     cbor_init (&cbor);
1843 root 1.36 cbor.flags |= ix;
1844 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1845     XPUSHs (scalar);
1846     }
1847    
1848     void decode_cbor (SV *cborstr)
1849     PPCODE:
1850     {
1851     CBOR cbor;
1852     cbor_init (&cbor);
1853     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1854     XPUSHs (cborstr);
1855     }
1856    
1857 root 1.64 #ifdef __AFL_COMPILER
1858    
1859     void
1860     afl_init ()
1861     CODE:
1862     __AFL_INIT ();
1863    
1864     int
1865     afl_loop (unsigned int count = 10000)
1866     CODE:
1867     RETVAL = __AFL_LOOP (count);
1868     OUTPUT:
1869     RETVAL
1870    
1871     #endif
1872