ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.71
Committed: Fri Mar 19 17:30:27 2021 UTC (3 years, 1 month ago) by root
Branch: MAIN
Changes since 1.70: +3 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.35 // known major and minor types
34     enum cbor_type
35     {
36     MAJOR_SHIFT = 5,
37     MINOR_MASK = 0x1f,
38    
39     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41     MAJOR_BYTES = 2 << MAJOR_SHIFT,
42     MAJOR_TEXT = 3 << MAJOR_SHIFT,
43     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44     MAJOR_MAP = 5 << MAJOR_SHIFT,
45     MAJOR_TAG = 6 << MAJOR_SHIFT,
46     MAJOR_MISC = 7 << MAJOR_SHIFT,
47    
48     // INT/STRING/ARRAY/MAP subtypes
49     LENGTH_EXT1 = 24,
50     LENGTH_EXT2 = 25,
51     LENGTH_EXT4 = 26,
52     LENGTH_EXT8 = 27,
53    
54     // SIMPLE types (effectively MISC subtypes)
55     SIMPLE_FALSE = 20,
56     SIMPLE_TRUE = 21,
57     SIMPLE_NULL = 22,
58     SIMPLE_UNDEF = 23,
59    
60     // MISC subtype (unused)
61     MISC_EXT1 = 24,
62     MISC_FLOAT16 = 25,
63     MISC_FLOAT32 = 26,
64     MISC_FLOAT64 = 27,
65    
66     // BYTES/TEXT/ARRAY/MAP
67     MINOR_INDEF = 31,
68     };
69    
70 root 1.9 // known tags
71 root 1.8 enum cbor_tag
72     {
73 root 1.35 // extensions
74     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81    
82     // rfc7049
83     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85     CBOR_TAG_POS_BIGNUM = 2, // byte string
86     CBOR_TAG_NEG_BIGNUM = 3, // byte string
87     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88     CBOR_TAG_BIGFLOAT = 5, // array
89    
90     CBOR_TAG_CONV_B64U = 21, // base64url, any
91     CBOR_TAG_CONV_B64 = 22, // base64, any
92     CBOR_TAG_CONV_HEX = 23, // base16, any
93     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94    
95     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100 root 1.8
101 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 root 1.8 };
103    
104 root 1.66 // known forced types, also hardcoded in CBOR.pm
105     enum
106     {
107     AS_CBOR = 0,
108 root 1.68 AS_INT = 1,
109     AS_BYTES = 2,
110     AS_TEXT = 3,
111 root 1.67 AS_FLOAT16 = 4,
112     AS_FLOAT32 = 5,
113     AS_FLOAT64 = 6,
114 root 1.70 AS_MAP = 7,
115 root 1.67 // possibly future enhancements: (generic) float, (generic) string
116 root 1.66 };
117    
118 root 1.24 #define F_SHRINK 0x00000001UL
119     #define F_ALLOW_UNKNOWN 0x00000002UL
120 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
121 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
122 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
123     #define F_PACK_STRINGS 0x00000020UL
124     #define F_TEXT_KEYS 0x00000040UL
125     #define F_TEXT_STRINGS 0x00000080UL
126     #define F_VALIDATE_UTF8 0x00000100UL
127 root 1.1
128     #define INIT_SIZE 32 // initial scalar size to be allocated
129    
130     #define SB do {
131     #define SE } while (0)
132    
133     #define IN_RANGE_INC(type,val,beg,end) \
134     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
135     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
136    
137     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
138    
139     #ifdef USE_ITHREADS
140     # define CBOR_SLOW 1
141     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
142     #else
143     # define CBOR_SLOW 0
144     # define CBOR_STASH cbor_stash
145     #endif
146    
147 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
148 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
149 root 1.1
150     typedef struct {
151     U32 flags;
152     U32 max_depth;
153     STRLEN max_size;
154 root 1.27 SV *filter;
155 root 1.40
156     // for the incremental parser
157     STRLEN incr_pos; // the current offset into the text
158     STRLEN incr_need; // minimum bytes needed to decode
159     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
160 root 1.1 } CBOR;
161    
162 root 1.5 ecb_inline void
163 root 1.1 cbor_init (CBOR *cbor)
164     {
165     Zero (cbor, 1, CBOR);
166     cbor->max_depth = 512;
167     }
168    
169 root 1.27 ecb_inline void
170     cbor_free (CBOR *cbor)
171     {
172     SvREFCNT_dec (cbor->filter);
173 root 1.40 SvREFCNT_dec (cbor->incr_count);
174 root 1.27 }
175    
176 root 1.1 /////////////////////////////////////////////////////////////////////////////
177     // utility functions
178    
179 root 1.5 ecb_inline SV *
180 root 1.1 get_bool (const char *name)
181     {
182     SV *sv = get_sv (name, 1);
183    
184     SvREADONLY_on (sv);
185     SvREADONLY_on (SvRV (sv));
186    
187     return sv;
188     }
189    
190 root 1.5 ecb_inline void
191 root 1.1 shrink (SV *sv)
192     {
193     sv_utf8_downgrade (sv, 1);
194    
195     if (SvLEN (sv) > SvCUR (sv) + 1)
196     {
197     #ifdef SvPV_shrink_to_cur
198     SvPV_shrink_to_cur (sv);
199     #elif defined (SvPV_renew)
200     SvPV_renew (sv, SvCUR (sv) + 1);
201     #endif
202     }
203     }
204    
205 root 1.21 // minimum length of a string to be registered for stringref
206 root 1.71 ecb_inline STRLEN
207 root 1.21 minimum_string_length (UV idx)
208     {
209 root 1.61 return idx <= 23 ? 3
210     : idx <= 0xffU ? 4
211     : idx <= 0xffffU ? 5
212     : idx <= 0xffffffffU ? 7
213     : 11;
214 root 1.21 }
215    
216 root 1.1 /////////////////////////////////////////////////////////////////////////////
217     // encoder
218    
219     // structure used for encoding CBOR
220     typedef struct
221     {
222     char *cur; // SvPVX (sv) + current output position
223     char *end; // SvEND (sv)
224     SV *sv; // result scalar
225     CBOR cbor;
226     U32 depth; // recursion level
227 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
228     UV stringref_idx;
229 root 1.19 HV *shareable; // ptr => index, or 0
230     UV shareable_idx;
231 root 1.1 } enc_t;
232    
233 root 1.5 ecb_inline void
234 root 1.1 need (enc_t *enc, STRLEN len)
235     {
236 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
237 root 1.1 {
238     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
239     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
240     enc->cur = SvPVX (enc->sv) + cur;
241     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
242     }
243     }
244    
245 root 1.70 static void encode_sv (enc_t *enc, SV *sv);
246    
247 root 1.5 ecb_inline void
248 root 1.1 encode_ch (enc_t *enc, char ch)
249     {
250     need (enc, 1);
251     *enc->cur++ = ch;
252     }
253    
254 root 1.66 // used for tags, intregers, element counts and so on
255 root 1.1 static void
256     encode_uint (enc_t *enc, int major, UV len)
257     {
258     need (enc, 9);
259    
260 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
261 root 1.1 *enc->cur++ = major | len;
262 root 1.36 else if (ecb_expect_true (len <= 0xffU))
263 root 1.1 {
264 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
265 root 1.1 *enc->cur++ = len;
266     }
267 root 1.36 else if (len <= 0xffffU)
268 root 1.1 {
269 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
270 root 1.1 *enc->cur++ = len >> 8;
271     *enc->cur++ = len;
272     }
273 root 1.36 else if (len <= 0xffffffffU)
274 root 1.1 {
275 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
276 root 1.1 *enc->cur++ = len >> 24;
277     *enc->cur++ = len >> 16;
278     *enc->cur++ = len >> 8;
279     *enc->cur++ = len;
280     }
281 root 1.4 else
282 root 1.1 {
283 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
284 root 1.1 *enc->cur++ = len >> 56;
285     *enc->cur++ = len >> 48;
286     *enc->cur++ = len >> 40;
287     *enc->cur++ = len >> 32;
288     *enc->cur++ = len >> 24;
289     *enc->cur++ = len >> 16;
290     *enc->cur++ = len >> 8;
291     *enc->cur++ = len;
292     }
293     }
294    
295 root 1.66 // encodes a perl value into a CBOR integer
296 root 1.67 ecb_inline void
297 root 1.66 encode_int (enc_t *enc, SV *sv)
298     {
299     if (SvIsUV (sv))
300     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
301     else if (SvIVX (sv) >= 0)
302     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
303     else
304     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
305     }
306    
307 root 1.21 ecb_inline void
308     encode_tag (enc_t *enc, UV tag)
309     {
310 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
311 root 1.21 }
312    
313 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
314     ecb_noinline static void
315     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
316     {
317     STRLEN ulen = len;
318     U8 *p, *pend = (U8 *)str + len;
319    
320     for (p = (U8 *)str; p < pend; ++p)
321     ulen += *p >> 7; // count set high bits
322    
323     encode_uint (enc, MAJOR_TEXT, ulen);
324    
325     need (enc, ulen);
326     for (p = (U8 *)str; p < pend; ++p)
327     if (*p < 0x80)
328     *enc->cur++ = *p;
329     else
330     {
331     *enc->cur++ = 0xc0 + (*p >> 6);
332     *enc->cur++ = 0x80 + (*p & 63);
333     }
334     }
335    
336 root 1.30 ecb_inline void
337 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
338 root 1.30 {
339 root 1.53 if (ecb_expect_false (upgrade_utf8))
340 root 1.51 if (!utf8)
341     {
342 root 1.53 encode_str_utf8 (enc, utf8, str, len);
343 root 1.51 return;
344     }
345    
346 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
347 root 1.30 need (enc, len);
348     memcpy (enc->cur, str, len);
349     enc->cur += len;
350     }
351    
352 root 1.54 ecb_inline void
353 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
354 root 1.1 {
355 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
356 root 1.21 {
357 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
358 root 1.21
359     if (SvOK (*svp))
360     {
361     // already registered, use stringref
362     encode_tag (enc, CBOR_TAG_STRINGREF);
363 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
364 root 1.21 return;
365     }
366     else if (len >= minimum_string_length (enc->stringref_idx))
367     {
368     // register only
369     sv_setuv (*svp, enc->stringref_idx);
370     ++enc->stringref_idx;
371     }
372     }
373    
374 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
375 root 1.1 }
376    
377 root 1.66 ecb_inline void
378     encode_float16 (enc_t *enc, NV nv)
379     {
380     need (enc, 1+2);
381    
382     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
383    
384     uint16_t fp = ecb_float_to_binary16 (nv);
385    
386     if (!ecb_big_endian ())
387     fp = ecb_bswap16 (fp);
388    
389     memcpy (enc->cur, &fp, 2);
390     enc->cur += 2;
391     }
392    
393     ecb_inline void
394     encode_float32 (enc_t *enc, NV nv)
395     {
396     need (enc, 1+4);
397    
398     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
399    
400     uint32_t fp = ecb_float_to_binary32 (nv);
401    
402     if (!ecb_big_endian ())
403     fp = ecb_bswap32 (fp);
404    
405     memcpy (enc->cur, &fp, 4);
406     enc->cur += 4;
407     }
408    
409     ecb_inline void
410     encode_float64 (enc_t *enc, NV nv)
411     {
412     need (enc, 1+8);
413    
414     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
415    
416     uint64_t fp = ecb_double_to_binary64 (nv);
417    
418     if (!ecb_big_endian ())
419     fp = ecb_bswap64 (fp);
420    
421     memcpy (enc->cur, &fp, 8);
422     enc->cur += 8;
423     }
424    
425     ecb_inline void
426 root 1.69 encode_bool (enc_t *enc, int istrue)
427     {
428     encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
429     }
430    
431 root 1.70 // encodes an arrayref containing key-value pairs as CBOR map
432     ecb_inline void
433     encode_array_as_map (enc_t *enc, SV *sv)
434     {
435     if (enc->depth >= enc->cbor.max_depth)
436     croak (ERR_NESTING_EXCEEDED);
437    
438     ++enc->depth;
439    
440     // as_map does error checking for us, but we re-check in case
441     // things have changed.
442    
443     if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
444     croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
445    
446     AV *av = (AV *)SvRV (sv);
447     int i, len = av_len (av);
448    
449     if (!(len & 1))
450     croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
451    
452     encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
453    
454     for (i = 0; i <= len; ++i)
455     {
456     SV **svp = av_fetch (av, i, 0);
457     encode_sv (enc, svp ? *svp : &PL_sv_undef);
458     }
459    
460     --enc->depth;
461     }
462    
463 root 1.69 ecb_inline void
464 root 1.66 encode_forced (enc_t *enc, UV type, SV *sv)
465     {
466     switch (type)
467     {
468     case AS_CBOR:
469     {
470     STRLEN len;
471     char *str = SvPVbyte (sv, len);
472    
473     need (enc, len);
474     memcpy (enc->cur, str, len);
475     enc->cur += len;
476     }
477     break;
478    
479     case AS_BYTES:
480     {
481     STRLEN len;
482     char *str = SvPVbyte (sv, len);
483     encode_strref (enc, 0, 0, str, len);
484     }
485     break;
486    
487     case AS_TEXT:
488     {
489     STRLEN len;
490     char *str = SvPVutf8 (sv, len);
491     encode_strref (enc, 1, 1, str, len);
492     }
493     break;
494    
495 root 1.69 case AS_INT: encode_int (enc, sv); break;
496    
497 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
498     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
499     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
500    
501 root 1.70 case AS_MAP: encode_array_as_map (enc, sv); break;
502    
503 root 1.66 default:
504     croak ("encountered malformed CBOR::XS::Tagged object");
505     }
506     }
507    
508 root 1.1 static void
509     encode_av (enc_t *enc, AV *av)
510     {
511     int i, len = av_len (av);
512    
513     if (enc->depth >= enc->cbor.max_depth)
514     croak (ERR_NESTING_EXCEEDED);
515    
516     ++enc->depth;
517    
518 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
519 root 1.1
520 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
521 root 1.45 for (i = 0; i <= len; ++i)
522     {
523     SV **svp = av_fetch (av, i, 0);
524     encode_sv (enc, svp ? *svp : &PL_sv_undef);
525     }
526     else
527     for (i = 0; i <= len; ++i)
528 root 1.46 {
529     SV *sv = AvARRAY (av)[i];
530     encode_sv (enc, sv ? sv : &PL_sv_undef);
531     }
532 root 1.1
533     --enc->depth;
534     }
535    
536     static void
537     encode_hv (enc_t *enc, HV *hv)
538     {
539     HE *he;
540    
541     if (enc->depth >= enc->cbor.max_depth)
542     croak (ERR_NESTING_EXCEEDED);
543    
544     ++enc->depth;
545    
546     int pairs = hv_iterinit (hv);
547     int mg = SvMAGICAL (hv);
548    
549 root 1.63 if (ecb_expect_false (mg))
550 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
551 root 1.1 else
552 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
553 root 1.1
554     while ((he = hv_iternext (hv)))
555     {
556 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
557     encode_sv (enc, HeSVKEY (he));
558 root 1.1 else
559 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
560 root 1.1
561 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
562 root 1.1 }
563    
564 root 1.63 if (ecb_expect_false (mg))
565 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
566 root 1.1
567     --enc->depth;
568     }
569    
570     // encode objects, arrays and special \0=false and \1=true values.
571     static void
572     encode_rv (enc_t *enc, SV *sv)
573     {
574 root 1.19 SvGETMAGIC (sv);
575 root 1.1
576 root 1.19 svtype svt = SvTYPE (sv);
577 root 1.18
578 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
579 root 1.1 {
580 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
581     ? types_boolean_stash
582     : gv_stashpv ("Types::Serialiser::Boolean", 1);
583     HV *error_stash = !CBOR_SLOW || types_error_stash
584     ? types_error_stash
585     : gv_stashpv ("Types::Serialiser::Error", 1);
586 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
587     ? cbor_tagged_stash
588     : gv_stashpv ("CBOR::XS::Tagged" , 1);
589 root 1.1
590 root 1.11 HV *stash = SvSTASH (sv);
591    
592     if (stash == boolean_stash)
593 root 1.34 {
594 root 1.69 encode_bool (enc, SvIV (sv));
595 root 1.34 return;
596     }
597 root 1.11 else if (stash == error_stash)
598 root 1.34 {
599 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
600 root 1.34 return;
601     }
602 root 1.11 else if (stash == tagged_stash)
603 root 1.6 {
604     if (svt != SVt_PVAV)
605     croak ("encountered CBOR::XS::Tagged object that isn't an array");
606    
607 root 1.66 switch (av_len ((AV *)sv))
608     {
609     case 2-1:
610     // actually a tagged value
611     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
612     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
613     break;
614    
615     case 3-1:
616     // a forced type [value, type, undef]
617     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
618     break;
619    
620     default:
621     croak ("encountered malformed CBOR::XS::Tagged object");
622     }
623 root 1.34
624     return;
625 root 1.6 }
626 root 1.34 }
627    
628     if (ecb_expect_false (SvREFCNT (sv) > 1)
629     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
630     {
631 root 1.63 if (ecb_expect_false (!enc->shareable))
632 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
633    
634     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
635    
636     if (SvOK (*svp))
637     {
638     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
639 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
640 root 1.34 return;
641     }
642     else
643     {
644     sv_setuv (*svp, enc->shareable_idx);
645     ++enc->shareable_idx;
646     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
647     }
648     }
649    
650     if (ecb_expect_false (SvOBJECT (sv)))
651     {
652     HV *stash = SvSTASH (sv);
653     GV *method;
654    
655 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
656     croak ("encountered object '%s', but forbid_objects is enabled",
657     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
658     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
659 root 1.1 {
660 root 1.11 dSP;
661    
662 root 1.50 ENTER; SAVETMPS;
663     PUSHMARK (SP);
664 root 1.6 // we re-bless the reference to get overload and other niceties right
665 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
666    
667     PUTBACK;
668     // G_SCALAR ensures that return value is 1
669     call_sv ((SV *)GvCV (method), G_SCALAR);
670     SPAGAIN;
671    
672     // catch this surprisingly common error
673     if (SvROK (TOPs) && SvRV (TOPs) == sv)
674     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
675    
676     encode_sv (enc, POPs);
677    
678     PUTBACK;
679    
680     FREETMPS; LEAVE;
681     }
682     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
683     {
684     dSP;
685 root 1.6
686 root 1.50 ENTER; SAVETMPS;
687     PUSHMARK (SP);
688 root 1.11 EXTEND (SP, 2);
689     // we re-bless the reference to get overload and other niceties right
690     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
691     PUSHs (sv_cbor);
692 root 1.1
693 root 1.11 PUTBACK;
694     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
695     SPAGAIN;
696 root 1.6
697 root 1.11 // catch this surprisingly common error
698     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
699     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
700 root 1.6
701 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
702 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
703 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
704 root 1.6
705 root 1.65 {
706     int i;
707    
708     for (i = 0; i < count; ++i)
709     encode_sv (enc, SP[i + 1 - count]);
710    
711     SP -= count;
712     }
713 root 1.6
714 root 1.11 PUTBACK;
715 root 1.6
716 root 1.11 FREETMPS; LEAVE;
717 root 1.1 }
718 root 1.11 else
719     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
720     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
721 root 1.1 }
722     else if (svt == SVt_PVHV)
723     encode_hv (enc, (HV *)sv);
724     else if (svt == SVt_PVAV)
725     encode_av (enc, (AV *)sv);
726 root 1.18 else
727 root 1.1 {
728 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
729     encode_sv (enc, sv);
730 root 1.1 }
731     }
732    
733     static void
734     encode_nv (enc_t *enc, SV *sv)
735     {
736     double nv = SvNVX (sv);
737    
738     need (enc, 9);
739    
740 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
741     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
742 root 1.1 //TODO: maybe I32?
743 root 1.5 else if (ecb_expect_false (nv == (float)nv))
744 root 1.66 encode_float32 (enc, nv);
745 root 1.1 else
746 root 1.66 encode_float64 (enc, nv);
747 root 1.1 }
748    
749     static void
750     encode_sv (enc_t *enc, SV *sv)
751     {
752     SvGETMAGIC (sv);
753    
754     if (SvPOKp (sv))
755     {
756     STRLEN len;
757     char *str = SvPV (sv, len);
758 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
759 root 1.1 }
760     else if (SvNOKp (sv))
761     encode_nv (enc, sv);
762     else if (SvIOKp (sv))
763 root 1.67 encode_int (enc, sv);
764 root 1.1 else if (SvROK (sv))
765     encode_rv (enc, SvRV (sv));
766     else if (!SvOK (sv))
767 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
768 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
769 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
770 root 1.1 else
771     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
772     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
773     }
774    
775     static SV *
776     encode_cbor (SV *scalar, CBOR *cbor)
777     {
778 root 1.48 enc_t enc = { 0 };
779 root 1.1
780 root 1.63 enc.cbor = *cbor;
781     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
782     enc.cur = SvPVX (enc.sv);
783     enc.end = SvEND (enc.sv);
784 root 1.1
785     SvPOK_only (enc.sv);
786 root 1.20
787 root 1.32 if (cbor->flags & F_PACK_STRINGS)
788 root 1.20 {
789     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
790     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
791     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
792     }
793    
794 root 1.1 encode_sv (&enc, scalar);
795    
796     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
797     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
798    
799     if (enc.cbor.flags & F_SHRINK)
800     shrink (enc.sv);
801    
802     return enc.sv;
803     }
804    
805     /////////////////////////////////////////////////////////////////////////////
806     // decoder
807    
808     // structure used for decoding CBOR
809     typedef struct
810     {
811     U8 *cur; // current parser pointer
812     U8 *end; // end of input string
813     const char *err; // parse error, if != 0
814     CBOR cbor;
815     U32 depth; // recursion depth
816     U32 maxdepth; // recursion depth limit
817 root 1.19 AV *shareable;
818 root 1.20 AV *stringref;
819 root 1.27 SV *decode_tagged;
820 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
821 root 1.1 } dec_t;
822    
823 root 1.59 // set dec->err to ERRSV
824     ecb_cold static void
825     err_errsv (dec_t *dec)
826     {
827     if (!dec->err)
828     {
829     dec->err_sv = newSVsv (ERRSV);
830    
831     // chop off the trailing \n
832     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
833     *SvEND (dec->err_sv) = 0;
834    
835     dec->err = SvPVutf8_nolen (dec->err_sv);
836     }
837     }
838 root 1.1
839 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
840     ecb_cold static void
841     err_set (dec_t *dec, const char *reason)
842     {
843     if (!dec->err)
844     dec->err = reason;
845     }
846    
847     ecb_cold static void
848     err_unexpected_end (dec_t *dec)
849     {
850     err_set (dec, "unexpected end of CBOR data");
851     }
852    
853     #define ERR_DO(do) SB do; goto fail; SE
854     #define ERR(reason) ERR_DO (err_set (dec, reason))
855     #define ERR_ERRSV ERR_DO (err_errsv (dec))
856    
857 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
858 root 1.1
859 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
860 root 1.1 #define DEC_DEC_DEPTH --dec->depth
861    
862     static UV
863     decode_uint (dec_t *dec)
864     {
865 root 1.35 U8 m = *dec->cur & MINOR_MASK;
866     ++dec->cur;
867    
868     if (ecb_expect_true (m < LENGTH_EXT1))
869     return m;
870 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
871     {
872     WANT (1);
873     dec->cur += 1;
874     return dec->cur[-1];
875     }
876     else if (ecb_expect_true (m == LENGTH_EXT2))
877     {
878     WANT (2);
879     dec->cur += 2;
880     return (((UV)dec->cur[-2]) << 8)
881     | ((UV)dec->cur[-1]);
882     }
883     else if (ecb_expect_true (m == LENGTH_EXT4))
884     {
885     WANT (4);
886     dec->cur += 4;
887     return (((UV)dec->cur[-4]) << 24)
888     | (((UV)dec->cur[-3]) << 16)
889     | (((UV)dec->cur[-2]) << 8)
890     | ((UV)dec->cur[-1]);
891     }
892     else if (ecb_expect_true (m == LENGTH_EXT8))
893 root 1.1 {
894 root 1.36 WANT (8);
895     dec->cur += 8;
896 root 1.34
897 root 1.36 return
898 root 1.34 #if UVSIZE < 8
899 root 1.36 0
900 root 1.34 #else
901 root 1.36 (((UV)dec->cur[-8]) << 56)
902     | (((UV)dec->cur[-7]) << 48)
903     | (((UV)dec->cur[-6]) << 40)
904     | (((UV)dec->cur[-5]) << 32)
905 root 1.34 #endif
906 root 1.36 | (((UV)dec->cur[-4]) << 24)
907     | (((UV)dec->cur[-3]) << 16)
908     | (((UV)dec->cur[-2]) << 8)
909     | ((UV)dec->cur[-1]);
910 root 1.1 }
911 root 1.36 else
912     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
913 root 1.1
914     fail:
915     return 0;
916     }
917    
918     static SV *decode_sv (dec_t *dec);
919    
920     static SV *
921     decode_av (dec_t *dec)
922     {
923     AV *av = newAV ();
924    
925     DEC_INC_DEPTH;
926    
927 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
928 root 1.1 {
929     ++dec->cur;
930    
931     for (;;)
932     {
933     WANT (1);
934    
935 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
936 root 1.1 {
937     ++dec->cur;
938     break;
939     }
940    
941     av_push (av, decode_sv (dec));
942     }
943     }
944     else
945     {
946 root 1.57 UV i, len = decode_uint (dec);
947 root 1.1
948 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
949 root 1.1 av_fill (av, len - 1);
950    
951     for (i = 0; i < len; ++i)
952     AvARRAY (av)[i] = decode_sv (dec);
953     }
954    
955     DEC_DEC_DEPTH;
956     return newRV_noinc ((SV *)av);
957    
958     fail:
959 root 1.63 SvREFCNT_dec_NN (av);
960 root 1.1 DEC_DEC_DEPTH;
961     return &PL_sv_undef;
962     }
963    
964 root 1.16 static void
965     decode_he (dec_t *dec, HV *hv)
966     {
967     // for speed reasons, we specialcase single-string
968 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
969    
970 root 1.23 if (ecb_expect_true (!dec->stringref))
971 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
972 root 1.21 {
973 root 1.56 STRLEN len = decode_uint (dec);
974 root 1.21 char *key = (char *)dec->cur;
975 root 1.16
976 root 1.49 WANT (len);
977 root 1.21 dec->cur += len;
978    
979 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
980 root 1.16
981 root 1.21 return;
982     }
983 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
984 root 1.21 {
985 root 1.56 STRLEN len = decode_uint (dec);
986 root 1.21 char *key = (char *)dec->cur;
987 root 1.16
988 root 1.49 WANT (len);
989 root 1.21 dec->cur += len;
990 root 1.20
991 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
992 root 1.71 if (!is_utf8_string ((U8 *)key, len))
993 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
994 root 1.16
995 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
996 root 1.16
997 root 1.21 return;
998     }
999 root 1.20
1000 root 1.21 SV *k = decode_sv (dec);
1001     SV *v = decode_sv (dec);
1002 root 1.16
1003 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
1004     // methods, and this is hopefully the only place where it can happen,
1005     // so if there is a chance of an exception, take the very slow path.
1006     // since catching exceptions is "undocumented/internal/forbidden" by
1007     // the new p5p powers, we need to call out to a perl function :/
1008     if (ecb_expect_false (SvAMAGIC (k)))
1009     {
1010     dSP;
1011    
1012     ENTER; SAVETMPS;
1013     PUSHMARK (SP);
1014     EXTEND (SP, 3);
1015     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
1016     PUSHs (sv_2mortal (k));
1017     PUSHs (sv_2mortal (v));
1018    
1019     PUTBACK;
1020     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
1021     SPAGAIN;
1022    
1023     FREETMPS; LEAVE;
1024    
1025     if (SvTRUE (ERRSV))
1026     ERR_ERRSV;
1027    
1028     return;
1029     }
1030    
1031 root 1.21 hv_store_ent (hv, k, v, 0);
1032 root 1.63 SvREFCNT_dec_NN (k);
1033 root 1.38
1034     fail:
1035     ;
1036 root 1.16 }
1037    
1038 root 1.1 static SV *
1039     decode_hv (dec_t *dec)
1040     {
1041     HV *hv = newHV ();
1042    
1043     DEC_INC_DEPTH;
1044    
1045 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1046 root 1.1 {
1047     ++dec->cur;
1048    
1049     for (;;)
1050     {
1051     WANT (1);
1052    
1053 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1054 root 1.1 {
1055     ++dec->cur;
1056     break;
1057     }
1058    
1059 root 1.16 decode_he (dec, hv);
1060 root 1.1 }
1061     }
1062     else
1063     {
1064 root 1.57 UV pairs = decode_uint (dec);
1065    
1066     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1067 root 1.1
1068 root 1.16 while (pairs--)
1069     decode_he (dec, hv);
1070 root 1.1 }
1071    
1072     DEC_DEC_DEPTH;
1073     return newRV_noinc ((SV *)hv);
1074    
1075     fail:
1076 root 1.63 SvREFCNT_dec_NN (hv);
1077 root 1.1 DEC_DEC_DEPTH;
1078     return &PL_sv_undef;
1079     }
1080    
1081     static SV *
1082     decode_str (dec_t *dec, int utf8)
1083     {
1084 root 1.6 SV *sv = 0;
1085 root 1.1
1086 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1087 root 1.1 {
1088 root 1.33 // indefinite length strings
1089 root 1.1 ++dec->cur;
1090    
1091 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1092 root 1.33
1093 root 1.1 sv = newSVpvn ("", 0);
1094    
1095     for (;;)
1096     {
1097     WANT (1);
1098    
1099 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1100     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1101 root 1.33 {
1102     ++dec->cur;
1103     break;
1104     }
1105     else
1106     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1107    
1108     STRLEN len = decode_uint (dec);
1109 root 1.1
1110 root 1.33 WANT (len);
1111     sv_catpvn (sv, dec->cur, len);
1112     dec->cur += len;
1113 root 1.1 }
1114     }
1115     else
1116     {
1117     STRLEN len = decode_uint (dec);
1118    
1119     WANT (len);
1120     sv = newSVpvn (dec->cur, len);
1121     dec->cur += len;
1122 root 1.25
1123     if (ecb_expect_false (dec->stringref)
1124     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1125     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1126 root 1.1 }
1127    
1128     if (utf8)
1129 root 1.38 {
1130     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1131     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
1132     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1133    
1134     SvUTF8_on (sv);
1135     }
1136 root 1.1
1137     return sv;
1138    
1139     fail:
1140 root 1.6 SvREFCNT_dec (sv);
1141 root 1.1 return &PL_sv_undef;
1142     }
1143    
1144     static SV *
1145 root 1.3 decode_tagged (dec_t *dec)
1146     {
1147 root 1.19 SV *sv = 0;
1148 root 1.3 UV tag = decode_uint (dec);
1149 root 1.19
1150     WANT (1);
1151 root 1.3
1152 root 1.18 switch (tag)
1153     {
1154     case CBOR_TAG_MAGIC:
1155 root 1.20 sv = decode_sv (dec);
1156     break;
1157 root 1.18
1158     case CBOR_TAG_INDIRECTION:
1159 root 1.20 sv = newRV_noinc (decode_sv (dec));
1160     break;
1161    
1162     case CBOR_TAG_STRINGREF_NAMESPACE:
1163     {
1164 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1165 root 1.55 // erase mortalised caches, e.g. "shareable"
1166     ENTER;
1167 root 1.20
1168     SAVESPTR (dec->stringref);
1169     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1170    
1171     sv = decode_sv (dec);
1172    
1173 root 1.55 LEAVE;
1174 root 1.20 }
1175     break;
1176    
1177     case CBOR_TAG_STRINGREF:
1178     {
1179 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1180 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1181    
1182     UV idx = decode_uint (dec);
1183    
1184 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1185 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1186    
1187     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1188     }
1189     break;
1190 root 1.11
1191 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1192     {
1193     if (ecb_expect_false (!dec->shareable))
1194     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1195    
1196 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1197     {
1198     sv = newSV (0);
1199     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1200 root 1.18
1201 root 1.37 SV *osv = decode_sv (dec);
1202     sv_setsv (sv, osv);
1203     SvREFCNT_dec_NN (osv);
1204     }
1205     else
1206     {
1207     av_push (dec->shareable, &PL_sv_undef);
1208     int idx = AvFILLp (dec->shareable);
1209     sv = decode_sv (dec);
1210     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1211     }
1212 root 1.19 }
1213 root 1.20 break;
1214 root 1.18
1215     case CBOR_TAG_VALUE_SHAREDREF:
1216 root 1.17 {
1217 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1218 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1219 root 1.18
1220 root 1.19 UV idx = decode_uint (dec);
1221    
1222 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1223 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1224    
1225 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1226 root 1.37
1227     if (sv == &PL_sv_undef)
1228     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1229 root 1.17 }
1230 root 1.20 break;
1231 root 1.17
1232 root 1.18 case CBOR_TAG_PERL_OBJECT:
1233     {
1234 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1235     goto filter;
1236    
1237 root 1.19 sv = decode_sv (dec);
1238    
1239 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1240     ERR ("corrupted CBOR data (non-array perl object)");
1241    
1242     AV *av = (AV *)SvRV (sv);
1243     int len = av_len (av) + 1;
1244     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1245    
1246     if (!stash)
1247     ERR ("cannot decode perl-object (package does not exist)");
1248    
1249     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1250    
1251     if (!method)
1252     ERR ("cannot decode perl-object (package does not have a THAW method)");
1253    
1254     dSP;
1255    
1256 root 1.50 ENTER; SAVETMPS;
1257     PUSHMARK (SP);
1258 root 1.18 EXTEND (SP, len + 1);
1259     // we re-bless the reference to get overload and other niceties right
1260     PUSHs (*av_fetch (av, 0, 1));
1261     PUSHs (sv_cbor);
1262    
1263     int i;
1264    
1265     for (i = 1; i < len; ++i)
1266     PUSHs (*av_fetch (av, i, 1));
1267    
1268     PUTBACK;
1269     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1270     SPAGAIN;
1271    
1272     if (SvTRUE (ERRSV))
1273     {
1274     FREETMPS; LEAVE;
1275 root 1.59 ERR_ERRSV;
1276 root 1.18 }
1277    
1278 root 1.63 SvREFCNT_dec_NN (sv);
1279 root 1.18 sv = SvREFCNT_inc (POPs);
1280 root 1.11
1281 root 1.18 PUTBACK;
1282 root 1.11
1283 root 1.18 FREETMPS; LEAVE;
1284     }
1285 root 1.20 break;
1286 root 1.9
1287 root 1.18 default:
1288 root 1.60 filter:
1289 root 1.18 {
1290 root 1.58 SV *tag_sv = newSVuv (tag);
1291    
1292 root 1.19 sv = decode_sv (dec);
1293    
1294 root 1.27 dSP;
1295 root 1.50 ENTER; SAVETMPS;
1296     PUSHMARK (SP);
1297 root 1.27 EXTEND (SP, 2);
1298 root 1.58 PUSHs (tag_sv);
1299 root 1.27 PUSHs (sv);
1300    
1301     PUTBACK;
1302     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1303     SPAGAIN;
1304    
1305     if (SvTRUE (ERRSV))
1306     {
1307 root 1.63 SvREFCNT_dec_NN (tag_sv);
1308 root 1.27 FREETMPS; LEAVE;
1309 root 1.59 ERR_ERRSV;
1310 root 1.27 }
1311    
1312     if (count)
1313     {
1314 root 1.63 SvREFCNT_dec_NN (tag_sv);
1315     SvREFCNT_dec_NN (sv);
1316 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1317     SP -= count;
1318 root 1.27 }
1319     else
1320     {
1321     AV *av = newAV ();
1322 root 1.58 av_push (av, tag_sv);
1323 root 1.27 av_push (av, sv);
1324    
1325     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1326     ? cbor_tagged_stash
1327     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1328     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1329     }
1330 root 1.7
1331 root 1.27 PUTBACK;
1332    
1333     FREETMPS; LEAVE;
1334 root 1.18 }
1335 root 1.20 break;
1336 root 1.11 }
1337 root 1.9
1338 root 1.20 return sv;
1339    
1340 root 1.9 fail:
1341     SvREFCNT_dec (sv);
1342     return &PL_sv_undef;
1343 root 1.3 }
1344    
1345     static SV *
1346 root 1.1 decode_sv (dec_t *dec)
1347     {
1348     WANT (1);
1349    
1350 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1351 root 1.1 {
1352 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1353     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1354     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1355     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1356     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1357     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1358     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1359    
1360     case MAJOR_MISC >> MAJOR_SHIFT:
1361     switch (*dec->cur++ & MINOR_MASK)
1362 root 1.1 {
1363 root 1.35 case SIMPLE_FALSE:
1364 root 1.1 #if CBOR_SLOW
1365 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1366 root 1.1 #endif
1367 root 1.10 return newSVsv (types_false);
1368 root 1.35 case SIMPLE_TRUE:
1369 root 1.1 #if CBOR_SLOW
1370 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1371 root 1.1 #endif
1372 root 1.10 return newSVsv (types_true);
1373 root 1.35 case SIMPLE_NULL:
1374 root 1.1 return newSVsv (&PL_sv_undef);
1375 root 1.35 case SIMPLE_UNDEF:
1376 root 1.10 #if CBOR_SLOW
1377     types_error = get_bool ("Types::Serialiser::error");
1378     #endif
1379     return newSVsv (types_error);
1380 root 1.1
1381 root 1.35 case MISC_FLOAT16:
1382 root 1.2 {
1383     WANT (2);
1384    
1385     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1386     dec->cur += 2;
1387    
1388     return newSVnv (ecb_binary16_to_float (fp));
1389     }
1390 root 1.1
1391 root 1.35 case MISC_FLOAT32:
1392 root 1.1 {
1393     uint32_t fp;
1394     WANT (4);
1395     memcpy (&fp, dec->cur, 4);
1396     dec->cur += 4;
1397    
1398     if (!ecb_big_endian ())
1399     fp = ecb_bswap32 (fp);
1400    
1401     return newSVnv (ecb_binary32_to_float (fp));
1402     }
1403    
1404 root 1.35 case MISC_FLOAT64:
1405 root 1.1 {
1406     uint64_t fp;
1407     WANT (8);
1408     memcpy (&fp, dec->cur, 8);
1409     dec->cur += 8;
1410    
1411     if (!ecb_big_endian ())
1412     fp = ecb_bswap64 (fp);
1413    
1414     return newSVnv (ecb_binary64_to_double (fp));
1415     }
1416    
1417 root 1.35 // 0..19 unassigned simple
1418 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1419     // 28-30 unassigned misc
1420     // 31 break code
1421 root 1.1 default:
1422 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1423 root 1.1 }
1424    
1425     break;
1426     }
1427    
1428     fail:
1429     return &PL_sv_undef;
1430     }
1431    
1432     static SV *
1433     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1434     {
1435 root 1.48 dec_t dec = { 0 };
1436 root 1.1 SV *sv;
1437 root 1.16 STRLEN len;
1438     char *data = SvPVbyte (string, len);
1439 root 1.1
1440 root 1.16 if (len > cbor->max_size && cbor->max_size)
1441     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1442     (unsigned long)len, (unsigned long)cbor->max_size);
1443 root 1.1
1444     dec.cbor = *cbor;
1445 root 1.16 dec.cur = (U8 *)data;
1446     dec.end = (U8 *)data + len;
1447 root 1.1
1448     sv = decode_sv (&dec);
1449    
1450     if (offset_return)
1451     *offset_return = dec.cur;
1452    
1453     if (!(offset_return || !sv))
1454 root 1.2 if (dec.cur != dec.end && !dec.err)
1455     dec.err = "garbage after CBOR object";
1456    
1457     if (dec.err)
1458 root 1.1 {
1459 root 1.39 if (dec.shareable)
1460     {
1461 root 1.58 // need to break cyclic links, which would all be in shareable
1462 root 1.39 int i;
1463     SV **svp;
1464    
1465     for (i = av_len (dec.shareable) + 1; i--; )
1466     if ((svp = av_fetch (dec.shareable, i, 0)))
1467     sv_setsv (*svp, &PL_sv_undef);
1468     }
1469    
1470 root 1.63 SvREFCNT_dec_NN (sv);
1471 root 1.59
1472     if (dec.err_sv)
1473     sv_2mortal (dec.err_sv);
1474    
1475 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1476 root 1.1 }
1477    
1478     sv = sv_2mortal (sv);
1479    
1480     return sv;
1481     }
1482    
1483     /////////////////////////////////////////////////////////////////////////////
1484 root 1.40 // incremental parser
1485    
1486     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1487    
1488     // returns 0 for notyet, 1 for success or error
1489     static int
1490     incr_parse (CBOR *self, SV *cborstr)
1491     {
1492     STRLEN cur;
1493     SvPV (cborstr, cur);
1494    
1495     while (ecb_expect_true (self->incr_need <= cur))
1496     {
1497     // table of integer count bytes
1498     static I8 incr_len[MINOR_MASK + 1] = {
1499     0, 0, 0, 0, 0, 0, 0, 0,
1500     0, 0, 0, 0, 0, 0, 0, 0,
1501     0, 0, 0, 0, 0, 0, 0, 0,
1502     1, 2, 4, 8,-1,-1,-1,-2
1503     };
1504    
1505     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1506     U8 m = *p & MINOR_MASK;
1507     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1508     I8 ilen = incr_len[m];
1509    
1510     self->incr_need = self->incr_pos + 1;
1511    
1512     if (ecb_expect_false (ilen < 0))
1513     {
1514     if (m != MINOR_INDEF)
1515     return 1; // error
1516    
1517     if (*p == (MAJOR_MISC | MINOR_INDEF))
1518     {
1519     if (count >= 0)
1520     return 1; // error
1521    
1522     count = 1;
1523     }
1524     else
1525     {
1526     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1527     count = -1;
1528     }
1529     }
1530     else
1531     {
1532     self->incr_need += ilen;
1533     if (ecb_expect_false (self->incr_need > cur))
1534     return 0;
1535    
1536     int major = *p >> MAJOR_SHIFT;
1537    
1538     switch (major)
1539     {
1540 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1541     ++count; // tags merely prefix another value
1542     break;
1543    
1544 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1545     case MAJOR_TEXT >> MAJOR_SHIFT:
1546     case MAJOR_ARRAY >> MAJOR_SHIFT:
1547     case MAJOR_MAP >> MAJOR_SHIFT:
1548     {
1549     UV len;
1550    
1551     if (ecb_expect_false (ilen))
1552     {
1553     len = 0;
1554    
1555     do {
1556     len = (len << 8) | *++p;
1557     } while (--ilen);
1558     }
1559     else
1560     len = m;
1561    
1562     switch (major)
1563     {
1564     case MAJOR_BYTES >> MAJOR_SHIFT:
1565     case MAJOR_TEXT >> MAJOR_SHIFT:
1566     self->incr_need += len;
1567     if (ecb_expect_false (self->incr_need > cur))
1568     return 0;
1569    
1570     break;
1571    
1572     case MAJOR_MAP >> MAJOR_SHIFT:
1573     len <<= 1;
1574 root 1.71 /* FALLTHROUGH */
1575 root 1.40 case MAJOR_ARRAY >> MAJOR_SHIFT:
1576     if (len)
1577     {
1578     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1579     count = len + 1;
1580     }
1581     break;
1582     }
1583     }
1584     }
1585     }
1586    
1587     self->incr_pos = self->incr_need;
1588    
1589     if (count > 0)
1590     {
1591     while (!--count)
1592     {
1593     if (!AvFILLp (self->incr_count))
1594     return 1; // done
1595    
1596     SvREFCNT_dec_NN (av_pop (self->incr_count));
1597     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1598     }
1599    
1600     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1601     }
1602     }
1603    
1604     return 0;
1605     }
1606    
1607    
1608     /////////////////////////////////////////////////////////////////////////////
1609 root 1.1 // XS interface functions
1610    
1611     MODULE = CBOR::XS PACKAGE = CBOR::XS
1612    
1613     BOOT:
1614     {
1615     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1616 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1617 root 1.1
1618 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1619     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1620    
1621     types_true = get_bool ("Types::Serialiser::true" );
1622     types_false = get_bool ("Types::Serialiser::false");
1623     types_error = get_bool ("Types::Serialiser::error");
1624 root 1.11
1625 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1626    
1627 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1628     SvREADONLY_on (sv_cbor);
1629 root 1.56
1630     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1631 root 1.1 }
1632    
1633     PROTOTYPES: DISABLE
1634    
1635     void CLONE (...)
1636     CODE:
1637 root 1.10 cbor_stash = 0;
1638     cbor_tagged_stash = 0;
1639     types_error_stash = 0;
1640     types_boolean_stash = 0;
1641 root 1.1
1642     void new (char *klass)
1643     PPCODE:
1644     {
1645     SV *pv = NEWSV (0, sizeof (CBOR));
1646     SvPOK_only (pv);
1647     cbor_init ((CBOR *)SvPVX (pv));
1648     XPUSHs (sv_2mortal (sv_bless (
1649     newRV_noinc (pv),
1650     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1651     )));
1652     }
1653    
1654     void shrink (CBOR *self, int enable = 1)
1655     ALIAS:
1656     shrink = F_SHRINK
1657     allow_unknown = F_ALLOW_UNKNOWN
1658 root 1.18 allow_sharing = F_ALLOW_SHARING
1659 root 1.37 allow_cycles = F_ALLOW_CYCLES
1660 root 1.60 forbid_objects = F_FORBID_OBJECTS
1661 root 1.32 pack_strings = F_PACK_STRINGS
1662 root 1.54 text_keys = F_TEXT_KEYS
1663     text_strings = F_TEXT_STRINGS
1664 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1665 root 1.1 PPCODE:
1666     {
1667     if (enable)
1668     self->flags |= ix;
1669     else
1670     self->flags &= ~ix;
1671    
1672     XPUSHs (ST (0));
1673     }
1674    
1675     void get_shrink (CBOR *self)
1676     ALIAS:
1677     get_shrink = F_SHRINK
1678     get_allow_unknown = F_ALLOW_UNKNOWN
1679 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1680 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1681 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1682 root 1.32 get_pack_strings = F_PACK_STRINGS
1683 root 1.54 get_text_keys = F_TEXT_KEYS
1684     get_text_strings = F_TEXT_STRINGS
1685 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1686 root 1.1 PPCODE:
1687     XPUSHs (boolSV (self->flags & ix));
1688    
1689     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1690     PPCODE:
1691     self->max_depth = max_depth;
1692     XPUSHs (ST (0));
1693    
1694     U32 get_max_depth (CBOR *self)
1695     CODE:
1696     RETVAL = self->max_depth;
1697     OUTPUT:
1698     RETVAL
1699    
1700     void max_size (CBOR *self, U32 max_size = 0)
1701     PPCODE:
1702     self->max_size = max_size;
1703     XPUSHs (ST (0));
1704    
1705     int get_max_size (CBOR *self)
1706     CODE:
1707     RETVAL = self->max_size;
1708     OUTPUT:
1709     RETVAL
1710    
1711 root 1.27 void filter (CBOR *self, SV *filter = 0)
1712     PPCODE:
1713     SvREFCNT_dec (self->filter);
1714     self->filter = filter ? newSVsv (filter) : filter;
1715     XPUSHs (ST (0));
1716    
1717     SV *get_filter (CBOR *self)
1718     CODE:
1719     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1720     OUTPUT:
1721     RETVAL
1722    
1723 root 1.1 void encode (CBOR *self, SV *scalar)
1724     PPCODE:
1725     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1726     XPUSHs (scalar);
1727    
1728     void decode (CBOR *self, SV *cborstr)
1729     PPCODE:
1730     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1731     XPUSHs (cborstr);
1732    
1733     void decode_prefix (CBOR *self, SV *cborstr)
1734     PPCODE:
1735     {
1736     SV *sv;
1737     char *offset;
1738     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1739     EXTEND (SP, 2);
1740     PUSHs (sv);
1741     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1742     }
1743    
1744 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1745 root 1.42 ALIAS:
1746     incr_parse_multiple = 1
1747 root 1.40 PPCODE:
1748     {
1749     if (SvUTF8 (cborstr))
1750     sv_utf8_downgrade (cborstr, 0);
1751    
1752     if (!self->incr_count)
1753     {
1754     self->incr_count = newAV ();
1755     self->incr_pos = 0;
1756     self->incr_need = 1;
1757    
1758     av_push (self->incr_count, newSViv (1));
1759     }
1760    
1761 root 1.41 do
1762 root 1.40 {
1763     if (!incr_parse (self, cborstr))
1764     {
1765     if (self->incr_need > self->max_size && self->max_size)
1766     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1767     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1768    
1769     break;
1770     }
1771    
1772 root 1.41 SV *sv;
1773     char *offset;
1774 root 1.40
1775 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1776     XPUSHs (sv);
1777 root 1.40
1778 root 1.41 sv_chop (cborstr, offset);
1779 root 1.40
1780 root 1.41 av_clear (self->incr_count);
1781     av_push (self->incr_count, newSViv (1));
1782 root 1.40
1783 root 1.41 self->incr_pos = 0;
1784     self->incr_need = self->incr_pos + 1;
1785 root 1.40 }
1786 root 1.42 while (ix);
1787 root 1.40 }
1788    
1789     void incr_reset (CBOR *self)
1790     CODE:
1791     {
1792     SvREFCNT_dec (self->incr_count);
1793     self->incr_count = 0;
1794     }
1795    
1796 root 1.27 void DESTROY (CBOR *self)
1797     PPCODE:
1798     cbor_free (self);
1799    
1800 root 1.1 PROTOTYPES: ENABLE
1801    
1802     void encode_cbor (SV *scalar)
1803 root 1.36 ALIAS:
1804     encode_cbor = 0
1805     encode_cbor_sharing = F_ALLOW_SHARING
1806 root 1.1 PPCODE:
1807     {
1808     CBOR cbor;
1809     cbor_init (&cbor);
1810 root 1.36 cbor.flags |= ix;
1811 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1812     XPUSHs (scalar);
1813     }
1814    
1815     void decode_cbor (SV *cborstr)
1816     PPCODE:
1817     {
1818     CBOR cbor;
1819     cbor_init (&cbor);
1820     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1821     XPUSHs (cborstr);
1822     }
1823    
1824 root 1.64 #ifdef __AFL_COMPILER
1825    
1826     void
1827     afl_init ()
1828     CODE:
1829     __AFL_INIT ();
1830    
1831     int
1832     afl_loop (unsigned int count = 10000)
1833     CODE:
1834     RETVAL = __AFL_LOOP (count);
1835     OUTPUT:
1836     RETVAL
1837    
1838     #endif
1839