ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.72
Committed: Thu Oct 21 01:14:58 2021 UTC (2 years, 6 months ago) by root
Branch: MAIN
CVS Tags: rel-1_84
Changes since 1.71: +6 -2 lines
Log Message:
1.84

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.72 // perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0
34     #define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len)))
35    
36 root 1.35 // known major and minor types
37     enum cbor_type
38     {
39     MAJOR_SHIFT = 5,
40     MINOR_MASK = 0x1f,
41    
42     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
43     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
44     MAJOR_BYTES = 2 << MAJOR_SHIFT,
45     MAJOR_TEXT = 3 << MAJOR_SHIFT,
46     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
47     MAJOR_MAP = 5 << MAJOR_SHIFT,
48     MAJOR_TAG = 6 << MAJOR_SHIFT,
49     MAJOR_MISC = 7 << MAJOR_SHIFT,
50    
51     // INT/STRING/ARRAY/MAP subtypes
52     LENGTH_EXT1 = 24,
53     LENGTH_EXT2 = 25,
54     LENGTH_EXT4 = 26,
55     LENGTH_EXT8 = 27,
56    
57     // SIMPLE types (effectively MISC subtypes)
58     SIMPLE_FALSE = 20,
59     SIMPLE_TRUE = 21,
60     SIMPLE_NULL = 22,
61     SIMPLE_UNDEF = 23,
62    
63     // MISC subtype (unused)
64     MISC_EXT1 = 24,
65     MISC_FLOAT16 = 25,
66     MISC_FLOAT32 = 26,
67     MISC_FLOAT64 = 27,
68    
69     // BYTES/TEXT/ARRAY/MAP
70     MINOR_INDEF = 31,
71     };
72    
73 root 1.9 // known tags
74 root 1.8 enum cbor_tag
75     {
76 root 1.35 // extensions
77     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
78     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
79     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
80     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
81     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
82     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
83     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
84    
85     // rfc7049
86     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
87     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
88     CBOR_TAG_POS_BIGNUM = 2, // byte string
89     CBOR_TAG_NEG_BIGNUM = 3, // byte string
90     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
91     CBOR_TAG_BIGFLOAT = 5, // array
92    
93     CBOR_TAG_CONV_B64U = 21, // base64url, any
94     CBOR_TAG_CONV_B64 = 22, // base64, any
95     CBOR_TAG_CONV_HEX = 23, // base16, any
96     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
97    
98     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
99     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
100     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
101     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
102     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
103 root 1.8
104 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
105 root 1.8 };
106    
107 root 1.66 // known forced types, also hardcoded in CBOR.pm
108     enum
109     {
110     AS_CBOR = 0,
111 root 1.68 AS_INT = 1,
112     AS_BYTES = 2,
113     AS_TEXT = 3,
114 root 1.67 AS_FLOAT16 = 4,
115     AS_FLOAT32 = 5,
116     AS_FLOAT64 = 6,
117 root 1.70 AS_MAP = 7,
118 root 1.67 // possibly future enhancements: (generic) float, (generic) string
119 root 1.66 };
120    
121 root 1.24 #define F_SHRINK 0x00000001UL
122     #define F_ALLOW_UNKNOWN 0x00000002UL
123 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
124 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
125 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
126     #define F_PACK_STRINGS 0x00000020UL
127     #define F_TEXT_KEYS 0x00000040UL
128     #define F_TEXT_STRINGS 0x00000080UL
129     #define F_VALIDATE_UTF8 0x00000100UL
130 root 1.1
131     #define INIT_SIZE 32 // initial scalar size to be allocated
132    
133     #define SB do {
134     #define SE } while (0)
135    
136     #define IN_RANGE_INC(type,val,beg,end) \
137     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
138     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
139    
140     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
141    
142     #ifdef USE_ITHREADS
143     # define CBOR_SLOW 1
144     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
145     #else
146     # define CBOR_SLOW 0
147     # define CBOR_STASH cbor_stash
148     #endif
149    
150 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
151 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
152 root 1.1
153     typedef struct {
154     U32 flags;
155     U32 max_depth;
156     STRLEN max_size;
157 root 1.27 SV *filter;
158 root 1.40
159     // for the incremental parser
160     STRLEN incr_pos; // the current offset into the text
161     STRLEN incr_need; // minimum bytes needed to decode
162     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
163 root 1.1 } CBOR;
164    
165 root 1.5 ecb_inline void
166 root 1.1 cbor_init (CBOR *cbor)
167     {
168     Zero (cbor, 1, CBOR);
169     cbor->max_depth = 512;
170     }
171    
172 root 1.27 ecb_inline void
173     cbor_free (CBOR *cbor)
174     {
175     SvREFCNT_dec (cbor->filter);
176 root 1.40 SvREFCNT_dec (cbor->incr_count);
177 root 1.27 }
178    
179 root 1.1 /////////////////////////////////////////////////////////////////////////////
180     // utility functions
181    
182 root 1.5 ecb_inline SV *
183 root 1.1 get_bool (const char *name)
184     {
185     SV *sv = get_sv (name, 1);
186    
187     SvREADONLY_on (sv);
188     SvREADONLY_on (SvRV (sv));
189    
190     return sv;
191     }
192    
193 root 1.5 ecb_inline void
194 root 1.1 shrink (SV *sv)
195     {
196     sv_utf8_downgrade (sv, 1);
197    
198     if (SvLEN (sv) > SvCUR (sv) + 1)
199     {
200     #ifdef SvPV_shrink_to_cur
201     SvPV_shrink_to_cur (sv);
202     #elif defined (SvPV_renew)
203     SvPV_renew (sv, SvCUR (sv) + 1);
204     #endif
205     }
206     }
207    
208 root 1.21 // minimum length of a string to be registered for stringref
209 root 1.71 ecb_inline STRLEN
210 root 1.21 minimum_string_length (UV idx)
211     {
212 root 1.61 return idx <= 23 ? 3
213     : idx <= 0xffU ? 4
214     : idx <= 0xffffU ? 5
215     : idx <= 0xffffffffU ? 7
216     : 11;
217 root 1.21 }
218    
219 root 1.1 /////////////////////////////////////////////////////////////////////////////
220     // encoder
221    
222     // structure used for encoding CBOR
223     typedef struct
224     {
225     char *cur; // SvPVX (sv) + current output position
226     char *end; // SvEND (sv)
227     SV *sv; // result scalar
228     CBOR cbor;
229     U32 depth; // recursion level
230 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
231     UV stringref_idx;
232 root 1.19 HV *shareable; // ptr => index, or 0
233     UV shareable_idx;
234 root 1.1 } enc_t;
235    
236 root 1.5 ecb_inline void
237 root 1.1 need (enc_t *enc, STRLEN len)
238     {
239 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
240 root 1.1 {
241     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
242     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
243     enc->cur = SvPVX (enc->sv) + cur;
244     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
245     }
246     }
247    
248 root 1.70 static void encode_sv (enc_t *enc, SV *sv);
249    
250 root 1.5 ecb_inline void
251 root 1.1 encode_ch (enc_t *enc, char ch)
252     {
253     need (enc, 1);
254     *enc->cur++ = ch;
255     }
256    
257 root 1.66 // used for tags, intregers, element counts and so on
258 root 1.1 static void
259     encode_uint (enc_t *enc, int major, UV len)
260     {
261     need (enc, 9);
262    
263 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
264 root 1.1 *enc->cur++ = major | len;
265 root 1.36 else if (ecb_expect_true (len <= 0xffU))
266 root 1.1 {
267 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
268 root 1.1 *enc->cur++ = len;
269     }
270 root 1.36 else if (len <= 0xffffU)
271 root 1.1 {
272 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
273 root 1.1 *enc->cur++ = len >> 8;
274     *enc->cur++ = len;
275     }
276 root 1.36 else if (len <= 0xffffffffU)
277 root 1.1 {
278 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
279 root 1.1 *enc->cur++ = len >> 24;
280     *enc->cur++ = len >> 16;
281     *enc->cur++ = len >> 8;
282     *enc->cur++ = len;
283     }
284 root 1.4 else
285 root 1.1 {
286 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
287 root 1.1 *enc->cur++ = len >> 56;
288     *enc->cur++ = len >> 48;
289     *enc->cur++ = len >> 40;
290     *enc->cur++ = len >> 32;
291     *enc->cur++ = len >> 24;
292     *enc->cur++ = len >> 16;
293     *enc->cur++ = len >> 8;
294     *enc->cur++ = len;
295     }
296     }
297    
298 root 1.66 // encodes a perl value into a CBOR integer
299 root 1.67 ecb_inline void
300 root 1.66 encode_int (enc_t *enc, SV *sv)
301     {
302     if (SvIsUV (sv))
303     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
304     else if (SvIVX (sv) >= 0)
305     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
306     else
307     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
308     }
309    
310 root 1.21 ecb_inline void
311     encode_tag (enc_t *enc, UV tag)
312     {
313 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
314 root 1.21 }
315    
316 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
317     ecb_noinline static void
318     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
319     {
320     STRLEN ulen = len;
321     U8 *p, *pend = (U8 *)str + len;
322    
323     for (p = (U8 *)str; p < pend; ++p)
324     ulen += *p >> 7; // count set high bits
325    
326     encode_uint (enc, MAJOR_TEXT, ulen);
327    
328     need (enc, ulen);
329     for (p = (U8 *)str; p < pend; ++p)
330     if (*p < 0x80)
331     *enc->cur++ = *p;
332     else
333     {
334     *enc->cur++ = 0xc0 + (*p >> 6);
335     *enc->cur++ = 0x80 + (*p & 63);
336     }
337     }
338    
339 root 1.30 ecb_inline void
340 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
341 root 1.30 {
342 root 1.53 if (ecb_expect_false (upgrade_utf8))
343 root 1.51 if (!utf8)
344     {
345 root 1.53 encode_str_utf8 (enc, utf8, str, len);
346 root 1.51 return;
347     }
348    
349 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
350 root 1.30 need (enc, len);
351     memcpy (enc->cur, str, len);
352     enc->cur += len;
353     }
354    
355 root 1.54 ecb_inline void
356 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
357 root 1.1 {
358 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
359 root 1.21 {
360 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
361 root 1.21
362     if (SvOK (*svp))
363     {
364     // already registered, use stringref
365     encode_tag (enc, CBOR_TAG_STRINGREF);
366 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
367 root 1.21 return;
368     }
369     else if (len >= minimum_string_length (enc->stringref_idx))
370     {
371     // register only
372     sv_setuv (*svp, enc->stringref_idx);
373     ++enc->stringref_idx;
374     }
375     }
376    
377 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
378 root 1.1 }
379    
380 root 1.66 ecb_inline void
381     encode_float16 (enc_t *enc, NV nv)
382     {
383     need (enc, 1+2);
384    
385     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
386    
387     uint16_t fp = ecb_float_to_binary16 (nv);
388    
389     if (!ecb_big_endian ())
390     fp = ecb_bswap16 (fp);
391    
392     memcpy (enc->cur, &fp, 2);
393     enc->cur += 2;
394     }
395    
396     ecb_inline void
397     encode_float32 (enc_t *enc, NV nv)
398     {
399     need (enc, 1+4);
400    
401     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
402    
403     uint32_t fp = ecb_float_to_binary32 (nv);
404    
405     if (!ecb_big_endian ())
406     fp = ecb_bswap32 (fp);
407    
408     memcpy (enc->cur, &fp, 4);
409     enc->cur += 4;
410     }
411    
412     ecb_inline void
413     encode_float64 (enc_t *enc, NV nv)
414     {
415     need (enc, 1+8);
416    
417     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
418    
419     uint64_t fp = ecb_double_to_binary64 (nv);
420    
421     if (!ecb_big_endian ())
422     fp = ecb_bswap64 (fp);
423    
424     memcpy (enc->cur, &fp, 8);
425     enc->cur += 8;
426     }
427    
428     ecb_inline void
429 root 1.69 encode_bool (enc_t *enc, int istrue)
430     {
431     encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
432     }
433    
434 root 1.70 // encodes an arrayref containing key-value pairs as CBOR map
435     ecb_inline void
436     encode_array_as_map (enc_t *enc, SV *sv)
437     {
438     if (enc->depth >= enc->cbor.max_depth)
439     croak (ERR_NESTING_EXCEEDED);
440    
441     ++enc->depth;
442    
443     // as_map does error checking for us, but we re-check in case
444     // things have changed.
445    
446     if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
447     croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)");
448    
449     AV *av = (AV *)SvRV (sv);
450     int i, len = av_len (av);
451    
452     if (!(len & 1))
453     croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)");
454    
455     encode_uint (enc, MAJOR_MAP, (len + 1) >> 1);
456    
457     for (i = 0; i <= len; ++i)
458     {
459     SV **svp = av_fetch (av, i, 0);
460     encode_sv (enc, svp ? *svp : &PL_sv_undef);
461     }
462    
463     --enc->depth;
464     }
465    
466 root 1.69 ecb_inline void
467 root 1.66 encode_forced (enc_t *enc, UV type, SV *sv)
468     {
469     switch (type)
470     {
471     case AS_CBOR:
472     {
473     STRLEN len;
474     char *str = SvPVbyte (sv, len);
475    
476     need (enc, len);
477     memcpy (enc->cur, str, len);
478     enc->cur += len;
479     }
480     break;
481    
482     case AS_BYTES:
483     {
484     STRLEN len;
485     char *str = SvPVbyte (sv, len);
486     encode_strref (enc, 0, 0, str, len);
487     }
488     break;
489    
490     case AS_TEXT:
491     {
492     STRLEN len;
493     char *str = SvPVutf8 (sv, len);
494     encode_strref (enc, 1, 1, str, len);
495     }
496     break;
497    
498 root 1.69 case AS_INT: encode_int (enc, sv); break;
499    
500 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
501     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
502     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
503    
504 root 1.70 case AS_MAP: encode_array_as_map (enc, sv); break;
505    
506 root 1.66 default:
507     croak ("encountered malformed CBOR::XS::Tagged object");
508     }
509     }
510    
511 root 1.1 static void
512     encode_av (enc_t *enc, AV *av)
513     {
514     int i, len = av_len (av);
515    
516     if (enc->depth >= enc->cbor.max_depth)
517     croak (ERR_NESTING_EXCEEDED);
518    
519     ++enc->depth;
520    
521 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
522 root 1.1
523 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
524 root 1.45 for (i = 0; i <= len; ++i)
525     {
526     SV **svp = av_fetch (av, i, 0);
527     encode_sv (enc, svp ? *svp : &PL_sv_undef);
528     }
529     else
530     for (i = 0; i <= len; ++i)
531 root 1.46 {
532     SV *sv = AvARRAY (av)[i];
533     encode_sv (enc, sv ? sv : &PL_sv_undef);
534     }
535 root 1.1
536     --enc->depth;
537     }
538    
539     static void
540     encode_hv (enc_t *enc, HV *hv)
541     {
542     HE *he;
543    
544     if (enc->depth >= enc->cbor.max_depth)
545     croak (ERR_NESTING_EXCEEDED);
546    
547     ++enc->depth;
548    
549     int pairs = hv_iterinit (hv);
550     int mg = SvMAGICAL (hv);
551    
552 root 1.63 if (ecb_expect_false (mg))
553 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
554 root 1.1 else
555 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
556 root 1.1
557     while ((he = hv_iternext (hv)))
558     {
559 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
560     encode_sv (enc, HeSVKEY (he));
561 root 1.1 else
562 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
563 root 1.1
564 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
565 root 1.1 }
566    
567 root 1.63 if (ecb_expect_false (mg))
568 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
569 root 1.1
570     --enc->depth;
571     }
572    
573     // encode objects, arrays and special \0=false and \1=true values.
574     static void
575     encode_rv (enc_t *enc, SV *sv)
576     {
577 root 1.19 SvGETMAGIC (sv);
578 root 1.1
579 root 1.19 svtype svt = SvTYPE (sv);
580 root 1.18
581 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
582 root 1.1 {
583 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
584     ? types_boolean_stash
585     : gv_stashpv ("Types::Serialiser::Boolean", 1);
586     HV *error_stash = !CBOR_SLOW || types_error_stash
587     ? types_error_stash
588     : gv_stashpv ("Types::Serialiser::Error", 1);
589 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
590     ? cbor_tagged_stash
591     : gv_stashpv ("CBOR::XS::Tagged" , 1);
592 root 1.1
593 root 1.11 HV *stash = SvSTASH (sv);
594    
595     if (stash == boolean_stash)
596 root 1.34 {
597 root 1.69 encode_bool (enc, SvIV (sv));
598 root 1.34 return;
599     }
600 root 1.11 else if (stash == error_stash)
601 root 1.34 {
602 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
603 root 1.34 return;
604     }
605 root 1.11 else if (stash == tagged_stash)
606 root 1.6 {
607     if (svt != SVt_PVAV)
608     croak ("encountered CBOR::XS::Tagged object that isn't an array");
609    
610 root 1.66 switch (av_len ((AV *)sv))
611     {
612     case 2-1:
613     // actually a tagged value
614     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
615     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
616     break;
617    
618     case 3-1:
619     // a forced type [value, type, undef]
620     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
621     break;
622    
623     default:
624     croak ("encountered malformed CBOR::XS::Tagged object");
625     }
626 root 1.34
627     return;
628 root 1.6 }
629 root 1.34 }
630    
631     if (ecb_expect_false (SvREFCNT (sv) > 1)
632     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
633     {
634 root 1.63 if (ecb_expect_false (!enc->shareable))
635 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
636    
637     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
638    
639     if (SvOK (*svp))
640     {
641     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
642 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
643 root 1.34 return;
644     }
645     else
646     {
647     sv_setuv (*svp, enc->shareable_idx);
648     ++enc->shareable_idx;
649     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
650     }
651     }
652    
653     if (ecb_expect_false (SvOBJECT (sv)))
654     {
655     HV *stash = SvSTASH (sv);
656     GV *method;
657    
658 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
659     croak ("encountered object '%s', but forbid_objects is enabled",
660     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
661     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
662 root 1.1 {
663 root 1.11 dSP;
664    
665 root 1.50 ENTER; SAVETMPS;
666     PUSHMARK (SP);
667 root 1.6 // we re-bless the reference to get overload and other niceties right
668 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
669    
670     PUTBACK;
671     // G_SCALAR ensures that return value is 1
672     call_sv ((SV *)GvCV (method), G_SCALAR);
673     SPAGAIN;
674    
675     // catch this surprisingly common error
676     if (SvROK (TOPs) && SvRV (TOPs) == sv)
677     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
678    
679     encode_sv (enc, POPs);
680    
681     PUTBACK;
682    
683     FREETMPS; LEAVE;
684     }
685     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
686     {
687     dSP;
688 root 1.6
689 root 1.50 ENTER; SAVETMPS;
690     PUSHMARK (SP);
691 root 1.11 EXTEND (SP, 2);
692     // we re-bless the reference to get overload and other niceties right
693     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
694     PUSHs (sv_cbor);
695 root 1.1
696 root 1.11 PUTBACK;
697     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
698     SPAGAIN;
699 root 1.6
700 root 1.11 // catch this surprisingly common error
701     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
702     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
703 root 1.6
704 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
705 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
706 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
707 root 1.6
708 root 1.65 {
709     int i;
710    
711     for (i = 0; i < count; ++i)
712     encode_sv (enc, SP[i + 1 - count]);
713    
714     SP -= count;
715     }
716 root 1.6
717 root 1.11 PUTBACK;
718 root 1.6
719 root 1.11 FREETMPS; LEAVE;
720 root 1.1 }
721 root 1.11 else
722     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
723     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
724 root 1.1 }
725     else if (svt == SVt_PVHV)
726     encode_hv (enc, (HV *)sv);
727     else if (svt == SVt_PVAV)
728     encode_av (enc, (AV *)sv);
729 root 1.18 else
730 root 1.1 {
731 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
732     encode_sv (enc, sv);
733 root 1.1 }
734     }
735    
736     static void
737     encode_nv (enc_t *enc, SV *sv)
738     {
739     double nv = SvNVX (sv);
740    
741     need (enc, 9);
742    
743 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
744     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
745 root 1.1 //TODO: maybe I32?
746 root 1.5 else if (ecb_expect_false (nv == (float)nv))
747 root 1.66 encode_float32 (enc, nv);
748 root 1.1 else
749 root 1.66 encode_float64 (enc, nv);
750 root 1.1 }
751    
752     static void
753     encode_sv (enc_t *enc, SV *sv)
754     {
755     SvGETMAGIC (sv);
756    
757     if (SvPOKp (sv))
758     {
759     STRLEN len;
760     char *str = SvPV (sv, len);
761 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
762 root 1.1 }
763     else if (SvNOKp (sv))
764     encode_nv (enc, sv);
765     else if (SvIOKp (sv))
766 root 1.67 encode_int (enc, sv);
767 root 1.1 else if (SvROK (sv))
768     encode_rv (enc, SvRV (sv));
769     else if (!SvOK (sv))
770 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
771 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
772 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
773 root 1.1 else
774     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
775     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
776     }
777    
778     static SV *
779     encode_cbor (SV *scalar, CBOR *cbor)
780     {
781 root 1.48 enc_t enc = { 0 };
782 root 1.1
783 root 1.63 enc.cbor = *cbor;
784     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
785     enc.cur = SvPVX (enc.sv);
786     enc.end = SvEND (enc.sv);
787 root 1.1
788     SvPOK_only (enc.sv);
789 root 1.20
790 root 1.32 if (cbor->flags & F_PACK_STRINGS)
791 root 1.20 {
792     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
793     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
794     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
795     }
796    
797 root 1.1 encode_sv (&enc, scalar);
798    
799     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
800     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
801    
802     if (enc.cbor.flags & F_SHRINK)
803     shrink (enc.sv);
804    
805     return enc.sv;
806     }
807    
808     /////////////////////////////////////////////////////////////////////////////
809     // decoder
810    
811     // structure used for decoding CBOR
812     typedef struct
813     {
814     U8 *cur; // current parser pointer
815     U8 *end; // end of input string
816     const char *err; // parse error, if != 0
817     CBOR cbor;
818     U32 depth; // recursion depth
819     U32 maxdepth; // recursion depth limit
820 root 1.19 AV *shareable;
821 root 1.20 AV *stringref;
822 root 1.27 SV *decode_tagged;
823 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
824 root 1.1 } dec_t;
825    
826 root 1.59 // set dec->err to ERRSV
827     ecb_cold static void
828     err_errsv (dec_t *dec)
829     {
830     if (!dec->err)
831     {
832     dec->err_sv = newSVsv (ERRSV);
833    
834     // chop off the trailing \n
835     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
836     *SvEND (dec->err_sv) = 0;
837    
838     dec->err = SvPVutf8_nolen (dec->err_sv);
839     }
840     }
841 root 1.1
842 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
843     ecb_cold static void
844     err_set (dec_t *dec, const char *reason)
845     {
846     if (!dec->err)
847     dec->err = reason;
848     }
849    
850     ecb_cold static void
851     err_unexpected_end (dec_t *dec)
852     {
853     err_set (dec, "unexpected end of CBOR data");
854     }
855    
856     #define ERR_DO(do) SB do; goto fail; SE
857     #define ERR(reason) ERR_DO (err_set (dec, reason))
858     #define ERR_ERRSV ERR_DO (err_errsv (dec))
859    
860 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
861 root 1.1
862 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
863 root 1.1 #define DEC_DEC_DEPTH --dec->depth
864    
865     static UV
866     decode_uint (dec_t *dec)
867     {
868 root 1.35 U8 m = *dec->cur & MINOR_MASK;
869     ++dec->cur;
870    
871     if (ecb_expect_true (m < LENGTH_EXT1))
872     return m;
873 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
874     {
875     WANT (1);
876     dec->cur += 1;
877     return dec->cur[-1];
878     }
879     else if (ecb_expect_true (m == LENGTH_EXT2))
880     {
881     WANT (2);
882     dec->cur += 2;
883     return (((UV)dec->cur[-2]) << 8)
884     | ((UV)dec->cur[-1]);
885     }
886     else if (ecb_expect_true (m == LENGTH_EXT4))
887     {
888     WANT (4);
889     dec->cur += 4;
890     return (((UV)dec->cur[-4]) << 24)
891     | (((UV)dec->cur[-3]) << 16)
892     | (((UV)dec->cur[-2]) << 8)
893     | ((UV)dec->cur[-1]);
894     }
895     else if (ecb_expect_true (m == LENGTH_EXT8))
896 root 1.1 {
897 root 1.36 WANT (8);
898     dec->cur += 8;
899 root 1.34
900 root 1.36 return
901 root 1.34 #if UVSIZE < 8
902 root 1.36 0
903 root 1.34 #else
904 root 1.36 (((UV)dec->cur[-8]) << 56)
905     | (((UV)dec->cur[-7]) << 48)
906     | (((UV)dec->cur[-6]) << 40)
907     | (((UV)dec->cur[-5]) << 32)
908 root 1.34 #endif
909 root 1.36 | (((UV)dec->cur[-4]) << 24)
910     | (((UV)dec->cur[-3]) << 16)
911     | (((UV)dec->cur[-2]) << 8)
912     | ((UV)dec->cur[-1]);
913 root 1.1 }
914 root 1.36 else
915     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
916 root 1.1
917     fail:
918     return 0;
919     }
920    
921     static SV *decode_sv (dec_t *dec);
922    
923     static SV *
924     decode_av (dec_t *dec)
925     {
926     AV *av = newAV ();
927    
928     DEC_INC_DEPTH;
929    
930 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
931 root 1.1 {
932     ++dec->cur;
933    
934     for (;;)
935     {
936     WANT (1);
937    
938 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
939 root 1.1 {
940     ++dec->cur;
941     break;
942     }
943    
944     av_push (av, decode_sv (dec));
945     }
946     }
947     else
948     {
949 root 1.57 UV i, len = decode_uint (dec);
950 root 1.1
951 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
952 root 1.1 av_fill (av, len - 1);
953    
954     for (i = 0; i < len; ++i)
955     AvARRAY (av)[i] = decode_sv (dec);
956     }
957    
958     DEC_DEC_DEPTH;
959     return newRV_noinc ((SV *)av);
960    
961     fail:
962 root 1.63 SvREFCNT_dec_NN (av);
963 root 1.1 DEC_DEC_DEPTH;
964     return &PL_sv_undef;
965     }
966    
967 root 1.16 static void
968     decode_he (dec_t *dec, HV *hv)
969     {
970     // for speed reasons, we specialcase single-string
971 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
972    
973 root 1.23 if (ecb_expect_true (!dec->stringref))
974 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
975 root 1.21 {
976 root 1.56 STRLEN len = decode_uint (dec);
977 root 1.21 char *key = (char *)dec->cur;
978 root 1.16
979 root 1.49 WANT (len);
980 root 1.21 dec->cur += len;
981    
982 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
983 root 1.16
984 root 1.21 return;
985     }
986 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
987 root 1.21 {
988 root 1.56 STRLEN len = decode_uint (dec);
989 root 1.21 char *key = (char *)dec->cur;
990 root 1.72 printf ("len %d\n", len);//D
991 root 1.16
992 root 1.49 WANT (len);
993 root 1.21 dec->cur += len;
994 root 1.20
995 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
996 root 1.72 if (!cbor_is_utf8_string ((U8 *)key, len))
997 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
998 root 1.16
999 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
1000 root 1.16
1001 root 1.21 return;
1002     }
1003 root 1.20
1004 root 1.21 SV *k = decode_sv (dec);
1005     SV *v = decode_sv (dec);
1006 root 1.16
1007 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
1008     // methods, and this is hopefully the only place where it can happen,
1009     // so if there is a chance of an exception, take the very slow path.
1010     // since catching exceptions is "undocumented/internal/forbidden" by
1011     // the new p5p powers, we need to call out to a perl function :/
1012     if (ecb_expect_false (SvAMAGIC (k)))
1013     {
1014     dSP;
1015    
1016     ENTER; SAVETMPS;
1017     PUSHMARK (SP);
1018     EXTEND (SP, 3);
1019     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
1020     PUSHs (sv_2mortal (k));
1021     PUSHs (sv_2mortal (v));
1022    
1023     PUTBACK;
1024     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
1025     SPAGAIN;
1026    
1027     FREETMPS; LEAVE;
1028    
1029     if (SvTRUE (ERRSV))
1030     ERR_ERRSV;
1031    
1032     return;
1033     }
1034    
1035 root 1.21 hv_store_ent (hv, k, v, 0);
1036 root 1.63 SvREFCNT_dec_NN (k);
1037 root 1.38
1038     fail:
1039     ;
1040 root 1.16 }
1041    
1042 root 1.1 static SV *
1043     decode_hv (dec_t *dec)
1044     {
1045     HV *hv = newHV ();
1046    
1047     DEC_INC_DEPTH;
1048    
1049 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1050 root 1.1 {
1051     ++dec->cur;
1052    
1053     for (;;)
1054     {
1055     WANT (1);
1056    
1057 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1058 root 1.1 {
1059     ++dec->cur;
1060     break;
1061     }
1062    
1063 root 1.16 decode_he (dec, hv);
1064 root 1.1 }
1065     }
1066     else
1067     {
1068 root 1.57 UV pairs = decode_uint (dec);
1069    
1070     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1071 root 1.1
1072 root 1.16 while (pairs--)
1073     decode_he (dec, hv);
1074 root 1.1 }
1075    
1076     DEC_DEC_DEPTH;
1077     return newRV_noinc ((SV *)hv);
1078    
1079     fail:
1080 root 1.63 SvREFCNT_dec_NN (hv);
1081 root 1.1 DEC_DEC_DEPTH;
1082     return &PL_sv_undef;
1083     }
1084    
1085     static SV *
1086     decode_str (dec_t *dec, int utf8)
1087     {
1088 root 1.6 SV *sv = 0;
1089 root 1.1
1090 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1091 root 1.1 {
1092 root 1.33 // indefinite length strings
1093 root 1.1 ++dec->cur;
1094    
1095 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1096 root 1.33
1097 root 1.1 sv = newSVpvn ("", 0);
1098    
1099     for (;;)
1100     {
1101     WANT (1);
1102    
1103 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1104     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1105 root 1.33 {
1106     ++dec->cur;
1107     break;
1108     }
1109     else
1110     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1111    
1112     STRLEN len = decode_uint (dec);
1113 root 1.1
1114 root 1.33 WANT (len);
1115     sv_catpvn (sv, dec->cur, len);
1116     dec->cur += len;
1117 root 1.1 }
1118     }
1119     else
1120     {
1121     STRLEN len = decode_uint (dec);
1122    
1123     WANT (len);
1124     sv = newSVpvn (dec->cur, len);
1125     dec->cur += len;
1126 root 1.25
1127     if (ecb_expect_false (dec->stringref)
1128     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1129     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1130 root 1.1 }
1131    
1132     if (utf8)
1133 root 1.38 {
1134     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1135 root 1.72 if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv)))
1136 root 1.38 ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1137    
1138     SvUTF8_on (sv);
1139     }
1140 root 1.1
1141     return sv;
1142    
1143     fail:
1144 root 1.6 SvREFCNT_dec (sv);
1145 root 1.1 return &PL_sv_undef;
1146     }
1147    
1148     static SV *
1149 root 1.3 decode_tagged (dec_t *dec)
1150     {
1151 root 1.19 SV *sv = 0;
1152 root 1.3 UV tag = decode_uint (dec);
1153 root 1.19
1154     WANT (1);
1155 root 1.3
1156 root 1.18 switch (tag)
1157     {
1158     case CBOR_TAG_MAGIC:
1159 root 1.20 sv = decode_sv (dec);
1160     break;
1161 root 1.18
1162     case CBOR_TAG_INDIRECTION:
1163 root 1.20 sv = newRV_noinc (decode_sv (dec));
1164     break;
1165    
1166     case CBOR_TAG_STRINGREF_NAMESPACE:
1167     {
1168 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1169 root 1.55 // erase mortalised caches, e.g. "shareable"
1170     ENTER;
1171 root 1.20
1172     SAVESPTR (dec->stringref);
1173     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1174    
1175     sv = decode_sv (dec);
1176    
1177 root 1.55 LEAVE;
1178 root 1.20 }
1179     break;
1180    
1181     case CBOR_TAG_STRINGREF:
1182     {
1183 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1184 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1185    
1186     UV idx = decode_uint (dec);
1187    
1188 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1189 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1190    
1191     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1192     }
1193     break;
1194 root 1.11
1195 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1196     {
1197     if (ecb_expect_false (!dec->shareable))
1198     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1199    
1200 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1201     {
1202     sv = newSV (0);
1203     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1204 root 1.18
1205 root 1.37 SV *osv = decode_sv (dec);
1206     sv_setsv (sv, osv);
1207     SvREFCNT_dec_NN (osv);
1208     }
1209     else
1210     {
1211     av_push (dec->shareable, &PL_sv_undef);
1212     int idx = AvFILLp (dec->shareable);
1213     sv = decode_sv (dec);
1214     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1215     }
1216 root 1.19 }
1217 root 1.20 break;
1218 root 1.18
1219     case CBOR_TAG_VALUE_SHAREDREF:
1220 root 1.17 {
1221 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1222 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1223 root 1.18
1224 root 1.19 UV idx = decode_uint (dec);
1225    
1226 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1227 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1228    
1229 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1230 root 1.37
1231     if (sv == &PL_sv_undef)
1232     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1233 root 1.17 }
1234 root 1.20 break;
1235 root 1.17
1236 root 1.18 case CBOR_TAG_PERL_OBJECT:
1237     {
1238 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1239     goto filter;
1240    
1241 root 1.19 sv = decode_sv (dec);
1242    
1243 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1244     ERR ("corrupted CBOR data (non-array perl object)");
1245    
1246     AV *av = (AV *)SvRV (sv);
1247     int len = av_len (av) + 1;
1248     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1249    
1250     if (!stash)
1251     ERR ("cannot decode perl-object (package does not exist)");
1252    
1253     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1254    
1255     if (!method)
1256     ERR ("cannot decode perl-object (package does not have a THAW method)");
1257    
1258     dSP;
1259    
1260 root 1.50 ENTER; SAVETMPS;
1261     PUSHMARK (SP);
1262 root 1.18 EXTEND (SP, len + 1);
1263     // we re-bless the reference to get overload and other niceties right
1264     PUSHs (*av_fetch (av, 0, 1));
1265     PUSHs (sv_cbor);
1266    
1267     int i;
1268    
1269     for (i = 1; i < len; ++i)
1270     PUSHs (*av_fetch (av, i, 1));
1271    
1272     PUTBACK;
1273     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1274     SPAGAIN;
1275    
1276     if (SvTRUE (ERRSV))
1277     {
1278     FREETMPS; LEAVE;
1279 root 1.59 ERR_ERRSV;
1280 root 1.18 }
1281    
1282 root 1.63 SvREFCNT_dec_NN (sv);
1283 root 1.18 sv = SvREFCNT_inc (POPs);
1284 root 1.11
1285 root 1.18 PUTBACK;
1286 root 1.11
1287 root 1.18 FREETMPS; LEAVE;
1288     }
1289 root 1.20 break;
1290 root 1.9
1291 root 1.18 default:
1292 root 1.60 filter:
1293 root 1.18 {
1294 root 1.58 SV *tag_sv = newSVuv (tag);
1295    
1296 root 1.19 sv = decode_sv (dec);
1297    
1298 root 1.27 dSP;
1299 root 1.50 ENTER; SAVETMPS;
1300     PUSHMARK (SP);
1301 root 1.27 EXTEND (SP, 2);
1302 root 1.58 PUSHs (tag_sv);
1303 root 1.27 PUSHs (sv);
1304    
1305     PUTBACK;
1306     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1307     SPAGAIN;
1308    
1309     if (SvTRUE (ERRSV))
1310     {
1311 root 1.63 SvREFCNT_dec_NN (tag_sv);
1312 root 1.27 FREETMPS; LEAVE;
1313 root 1.59 ERR_ERRSV;
1314 root 1.27 }
1315    
1316     if (count)
1317     {
1318 root 1.63 SvREFCNT_dec_NN (tag_sv);
1319     SvREFCNT_dec_NN (sv);
1320 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1321     SP -= count;
1322 root 1.27 }
1323     else
1324     {
1325     AV *av = newAV ();
1326 root 1.58 av_push (av, tag_sv);
1327 root 1.27 av_push (av, sv);
1328    
1329     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1330     ? cbor_tagged_stash
1331     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1332     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1333     }
1334 root 1.7
1335 root 1.27 PUTBACK;
1336    
1337     FREETMPS; LEAVE;
1338 root 1.18 }
1339 root 1.20 break;
1340 root 1.11 }
1341 root 1.9
1342 root 1.20 return sv;
1343    
1344 root 1.9 fail:
1345     SvREFCNT_dec (sv);
1346     return &PL_sv_undef;
1347 root 1.3 }
1348    
1349     static SV *
1350 root 1.1 decode_sv (dec_t *dec)
1351     {
1352     WANT (1);
1353    
1354 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1355 root 1.1 {
1356 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1357     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1358     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1359     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1360     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1361     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1362     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1363    
1364     case MAJOR_MISC >> MAJOR_SHIFT:
1365     switch (*dec->cur++ & MINOR_MASK)
1366 root 1.1 {
1367 root 1.35 case SIMPLE_FALSE:
1368 root 1.1 #if CBOR_SLOW
1369 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1370 root 1.1 #endif
1371 root 1.10 return newSVsv (types_false);
1372 root 1.35 case SIMPLE_TRUE:
1373 root 1.1 #if CBOR_SLOW
1374 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1375 root 1.1 #endif
1376 root 1.10 return newSVsv (types_true);
1377 root 1.35 case SIMPLE_NULL:
1378 root 1.1 return newSVsv (&PL_sv_undef);
1379 root 1.35 case SIMPLE_UNDEF:
1380 root 1.10 #if CBOR_SLOW
1381     types_error = get_bool ("Types::Serialiser::error");
1382     #endif
1383     return newSVsv (types_error);
1384 root 1.1
1385 root 1.35 case MISC_FLOAT16:
1386 root 1.2 {
1387     WANT (2);
1388    
1389     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1390     dec->cur += 2;
1391    
1392     return newSVnv (ecb_binary16_to_float (fp));
1393     }
1394 root 1.1
1395 root 1.35 case MISC_FLOAT32:
1396 root 1.1 {
1397     uint32_t fp;
1398     WANT (4);
1399     memcpy (&fp, dec->cur, 4);
1400     dec->cur += 4;
1401    
1402     if (!ecb_big_endian ())
1403     fp = ecb_bswap32 (fp);
1404    
1405     return newSVnv (ecb_binary32_to_float (fp));
1406     }
1407    
1408 root 1.35 case MISC_FLOAT64:
1409 root 1.1 {
1410     uint64_t fp;
1411     WANT (8);
1412     memcpy (&fp, dec->cur, 8);
1413     dec->cur += 8;
1414    
1415     if (!ecb_big_endian ())
1416     fp = ecb_bswap64 (fp);
1417    
1418     return newSVnv (ecb_binary64_to_double (fp));
1419     }
1420    
1421 root 1.35 // 0..19 unassigned simple
1422 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1423     // 28-30 unassigned misc
1424     // 31 break code
1425 root 1.1 default:
1426 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1427 root 1.1 }
1428    
1429     break;
1430     }
1431    
1432     fail:
1433     return &PL_sv_undef;
1434     }
1435    
1436     static SV *
1437     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1438     {
1439 root 1.48 dec_t dec = { 0 };
1440 root 1.1 SV *sv;
1441 root 1.16 STRLEN len;
1442     char *data = SvPVbyte (string, len);
1443 root 1.1
1444 root 1.16 if (len > cbor->max_size && cbor->max_size)
1445     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1446     (unsigned long)len, (unsigned long)cbor->max_size);
1447 root 1.1
1448     dec.cbor = *cbor;
1449 root 1.16 dec.cur = (U8 *)data;
1450     dec.end = (U8 *)data + len;
1451 root 1.1
1452     sv = decode_sv (&dec);
1453    
1454     if (offset_return)
1455     *offset_return = dec.cur;
1456    
1457     if (!(offset_return || !sv))
1458 root 1.2 if (dec.cur != dec.end && !dec.err)
1459     dec.err = "garbage after CBOR object";
1460    
1461     if (dec.err)
1462 root 1.1 {
1463 root 1.39 if (dec.shareable)
1464     {
1465 root 1.58 // need to break cyclic links, which would all be in shareable
1466 root 1.39 int i;
1467     SV **svp;
1468    
1469     for (i = av_len (dec.shareable) + 1; i--; )
1470     if ((svp = av_fetch (dec.shareable, i, 0)))
1471     sv_setsv (*svp, &PL_sv_undef);
1472     }
1473    
1474 root 1.63 SvREFCNT_dec_NN (sv);
1475 root 1.59
1476     if (dec.err_sv)
1477     sv_2mortal (dec.err_sv);
1478    
1479 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1480 root 1.1 }
1481    
1482     sv = sv_2mortal (sv);
1483    
1484     return sv;
1485     }
1486    
1487     /////////////////////////////////////////////////////////////////////////////
1488 root 1.40 // incremental parser
1489    
1490     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1491    
1492     // returns 0 for notyet, 1 for success or error
1493     static int
1494     incr_parse (CBOR *self, SV *cborstr)
1495     {
1496     STRLEN cur;
1497     SvPV (cborstr, cur);
1498    
1499     while (ecb_expect_true (self->incr_need <= cur))
1500     {
1501     // table of integer count bytes
1502     static I8 incr_len[MINOR_MASK + 1] = {
1503     0, 0, 0, 0, 0, 0, 0, 0,
1504     0, 0, 0, 0, 0, 0, 0, 0,
1505     0, 0, 0, 0, 0, 0, 0, 0,
1506     1, 2, 4, 8,-1,-1,-1,-2
1507     };
1508    
1509     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1510     U8 m = *p & MINOR_MASK;
1511     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1512     I8 ilen = incr_len[m];
1513    
1514     self->incr_need = self->incr_pos + 1;
1515    
1516     if (ecb_expect_false (ilen < 0))
1517     {
1518     if (m != MINOR_INDEF)
1519     return 1; // error
1520    
1521     if (*p == (MAJOR_MISC | MINOR_INDEF))
1522     {
1523     if (count >= 0)
1524     return 1; // error
1525    
1526     count = 1;
1527     }
1528     else
1529     {
1530     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1531     count = -1;
1532     }
1533     }
1534     else
1535     {
1536     self->incr_need += ilen;
1537     if (ecb_expect_false (self->incr_need > cur))
1538     return 0;
1539    
1540     int major = *p >> MAJOR_SHIFT;
1541    
1542     switch (major)
1543     {
1544 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1545     ++count; // tags merely prefix another value
1546     break;
1547    
1548 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1549     case MAJOR_TEXT >> MAJOR_SHIFT:
1550     case MAJOR_ARRAY >> MAJOR_SHIFT:
1551     case MAJOR_MAP >> MAJOR_SHIFT:
1552     {
1553     UV len;
1554    
1555     if (ecb_expect_false (ilen))
1556     {
1557     len = 0;
1558    
1559     do {
1560     len = (len << 8) | *++p;
1561     } while (--ilen);
1562     }
1563     else
1564     len = m;
1565    
1566     switch (major)
1567     {
1568     case MAJOR_BYTES >> MAJOR_SHIFT:
1569     case MAJOR_TEXT >> MAJOR_SHIFT:
1570     self->incr_need += len;
1571     if (ecb_expect_false (self->incr_need > cur))
1572     return 0;
1573    
1574     break;
1575    
1576     case MAJOR_MAP >> MAJOR_SHIFT:
1577     len <<= 1;
1578 root 1.71 /* FALLTHROUGH */
1579 root 1.40 case MAJOR_ARRAY >> MAJOR_SHIFT:
1580     if (len)
1581     {
1582     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1583     count = len + 1;
1584     }
1585     break;
1586     }
1587     }
1588     }
1589     }
1590    
1591     self->incr_pos = self->incr_need;
1592    
1593     if (count > 0)
1594     {
1595     while (!--count)
1596     {
1597     if (!AvFILLp (self->incr_count))
1598     return 1; // done
1599    
1600     SvREFCNT_dec_NN (av_pop (self->incr_count));
1601     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1602     }
1603    
1604     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1605     }
1606     }
1607    
1608     return 0;
1609     }
1610    
1611    
1612     /////////////////////////////////////////////////////////////////////////////
1613 root 1.1 // XS interface functions
1614    
1615     MODULE = CBOR::XS PACKAGE = CBOR::XS
1616    
1617     BOOT:
1618     {
1619     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1620 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1621 root 1.1
1622 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1623     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1624    
1625     types_true = get_bool ("Types::Serialiser::true" );
1626     types_false = get_bool ("Types::Serialiser::false");
1627     types_error = get_bool ("Types::Serialiser::error");
1628 root 1.11
1629 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1630    
1631 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1632     SvREADONLY_on (sv_cbor);
1633 root 1.56
1634     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1635 root 1.1 }
1636    
1637     PROTOTYPES: DISABLE
1638    
1639     void CLONE (...)
1640     CODE:
1641 root 1.10 cbor_stash = 0;
1642     cbor_tagged_stash = 0;
1643     types_error_stash = 0;
1644     types_boolean_stash = 0;
1645 root 1.1
1646     void new (char *klass)
1647     PPCODE:
1648     {
1649     SV *pv = NEWSV (0, sizeof (CBOR));
1650     SvPOK_only (pv);
1651     cbor_init ((CBOR *)SvPVX (pv));
1652     XPUSHs (sv_2mortal (sv_bless (
1653     newRV_noinc (pv),
1654     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1655     )));
1656     }
1657    
1658     void shrink (CBOR *self, int enable = 1)
1659     ALIAS:
1660     shrink = F_SHRINK
1661     allow_unknown = F_ALLOW_UNKNOWN
1662 root 1.18 allow_sharing = F_ALLOW_SHARING
1663 root 1.37 allow_cycles = F_ALLOW_CYCLES
1664 root 1.60 forbid_objects = F_FORBID_OBJECTS
1665 root 1.32 pack_strings = F_PACK_STRINGS
1666 root 1.54 text_keys = F_TEXT_KEYS
1667     text_strings = F_TEXT_STRINGS
1668 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1669 root 1.1 PPCODE:
1670     {
1671     if (enable)
1672     self->flags |= ix;
1673     else
1674     self->flags &= ~ix;
1675    
1676     XPUSHs (ST (0));
1677     }
1678    
1679     void get_shrink (CBOR *self)
1680     ALIAS:
1681     get_shrink = F_SHRINK
1682     get_allow_unknown = F_ALLOW_UNKNOWN
1683 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1684 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1685 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1686 root 1.32 get_pack_strings = F_PACK_STRINGS
1687 root 1.54 get_text_keys = F_TEXT_KEYS
1688     get_text_strings = F_TEXT_STRINGS
1689 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1690 root 1.1 PPCODE:
1691     XPUSHs (boolSV (self->flags & ix));
1692    
1693     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1694     PPCODE:
1695     self->max_depth = max_depth;
1696     XPUSHs (ST (0));
1697    
1698     U32 get_max_depth (CBOR *self)
1699     CODE:
1700     RETVAL = self->max_depth;
1701     OUTPUT:
1702     RETVAL
1703    
1704     void max_size (CBOR *self, U32 max_size = 0)
1705     PPCODE:
1706     self->max_size = max_size;
1707     XPUSHs (ST (0));
1708    
1709     int get_max_size (CBOR *self)
1710     CODE:
1711     RETVAL = self->max_size;
1712     OUTPUT:
1713     RETVAL
1714    
1715 root 1.27 void filter (CBOR *self, SV *filter = 0)
1716     PPCODE:
1717     SvREFCNT_dec (self->filter);
1718     self->filter = filter ? newSVsv (filter) : filter;
1719     XPUSHs (ST (0));
1720    
1721     SV *get_filter (CBOR *self)
1722     CODE:
1723     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1724     OUTPUT:
1725     RETVAL
1726    
1727 root 1.1 void encode (CBOR *self, SV *scalar)
1728     PPCODE:
1729     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1730     XPUSHs (scalar);
1731    
1732     void decode (CBOR *self, SV *cborstr)
1733     PPCODE:
1734     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1735     XPUSHs (cborstr);
1736    
1737     void decode_prefix (CBOR *self, SV *cborstr)
1738     PPCODE:
1739     {
1740     SV *sv;
1741     char *offset;
1742     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1743     EXTEND (SP, 2);
1744     PUSHs (sv);
1745     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1746     }
1747    
1748 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1749 root 1.42 ALIAS:
1750     incr_parse_multiple = 1
1751 root 1.40 PPCODE:
1752     {
1753     if (SvUTF8 (cborstr))
1754     sv_utf8_downgrade (cborstr, 0);
1755    
1756     if (!self->incr_count)
1757     {
1758     self->incr_count = newAV ();
1759     self->incr_pos = 0;
1760     self->incr_need = 1;
1761    
1762     av_push (self->incr_count, newSViv (1));
1763     }
1764    
1765 root 1.41 do
1766 root 1.40 {
1767     if (!incr_parse (self, cborstr))
1768     {
1769     if (self->incr_need > self->max_size && self->max_size)
1770     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1771     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1772    
1773     break;
1774     }
1775    
1776 root 1.41 SV *sv;
1777     char *offset;
1778 root 1.40
1779 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1780     XPUSHs (sv);
1781 root 1.40
1782 root 1.41 sv_chop (cborstr, offset);
1783 root 1.40
1784 root 1.41 av_clear (self->incr_count);
1785     av_push (self->incr_count, newSViv (1));
1786 root 1.40
1787 root 1.41 self->incr_pos = 0;
1788     self->incr_need = self->incr_pos + 1;
1789 root 1.40 }
1790 root 1.42 while (ix);
1791 root 1.40 }
1792    
1793     void incr_reset (CBOR *self)
1794     CODE:
1795     {
1796     SvREFCNT_dec (self->incr_count);
1797     self->incr_count = 0;
1798     }
1799    
1800 root 1.27 void DESTROY (CBOR *self)
1801     PPCODE:
1802     cbor_free (self);
1803    
1804 root 1.1 PROTOTYPES: ENABLE
1805    
1806     void encode_cbor (SV *scalar)
1807 root 1.36 ALIAS:
1808     encode_cbor = 0
1809     encode_cbor_sharing = F_ALLOW_SHARING
1810 root 1.1 PPCODE:
1811     {
1812     CBOR cbor;
1813     cbor_init (&cbor);
1814 root 1.36 cbor.flags |= ix;
1815 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1816     XPUSHs (scalar);
1817     }
1818    
1819     void decode_cbor (SV *cborstr)
1820     PPCODE:
1821     {
1822     CBOR cbor;
1823     cbor_init (&cbor);
1824     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1825     XPUSHs (cborstr);
1826     }
1827    
1828 root 1.64 #ifdef __AFL_COMPILER
1829    
1830     void
1831     afl_init ()
1832     CODE:
1833     __AFL_INIT ();
1834    
1835     int
1836     afl_loop (unsigned int count = 10000)
1837     CODE:
1838     RETVAL = __AFL_LOOP (count);
1839     OUTPUT:
1840     RETVAL
1841    
1842     #endif
1843