ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.68
Committed: Mon Nov 30 18:30:29 2020 UTC (3 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-1_81
Changes since 1.67: +3 -3 lines
Log Message:
1.81

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.35 // known major and minor types
34     enum cbor_type
35     {
36     MAJOR_SHIFT = 5,
37     MINOR_MASK = 0x1f,
38    
39     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41     MAJOR_BYTES = 2 << MAJOR_SHIFT,
42     MAJOR_TEXT = 3 << MAJOR_SHIFT,
43     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44     MAJOR_MAP = 5 << MAJOR_SHIFT,
45     MAJOR_TAG = 6 << MAJOR_SHIFT,
46     MAJOR_MISC = 7 << MAJOR_SHIFT,
47    
48     // INT/STRING/ARRAY/MAP subtypes
49     LENGTH_EXT1 = 24,
50     LENGTH_EXT2 = 25,
51     LENGTH_EXT4 = 26,
52     LENGTH_EXT8 = 27,
53    
54     // SIMPLE types (effectively MISC subtypes)
55     SIMPLE_FALSE = 20,
56     SIMPLE_TRUE = 21,
57     SIMPLE_NULL = 22,
58     SIMPLE_UNDEF = 23,
59    
60     // MISC subtype (unused)
61     MISC_EXT1 = 24,
62     MISC_FLOAT16 = 25,
63     MISC_FLOAT32 = 26,
64     MISC_FLOAT64 = 27,
65    
66     // BYTES/TEXT/ARRAY/MAP
67     MINOR_INDEF = 31,
68     };
69    
70 root 1.9 // known tags
71 root 1.8 enum cbor_tag
72     {
73 root 1.35 // extensions
74     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81    
82     // rfc7049
83     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85     CBOR_TAG_POS_BIGNUM = 2, // byte string
86     CBOR_TAG_NEG_BIGNUM = 3, // byte string
87     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88     CBOR_TAG_BIGFLOAT = 5, // array
89    
90     CBOR_TAG_CONV_B64U = 21, // base64url, any
91     CBOR_TAG_CONV_B64 = 22, // base64, any
92     CBOR_TAG_CONV_HEX = 23, // base16, any
93     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94    
95     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100 root 1.8
101 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 root 1.8 };
103    
104 root 1.66 // known forced types, also hardcoded in CBOR.pm
105     enum
106     {
107     AS_CBOR = 0,
108 root 1.68 AS_INT = 1,
109     AS_BYTES = 2,
110     AS_TEXT = 3,
111 root 1.67 AS_FLOAT16 = 4,
112     AS_FLOAT32 = 5,
113     AS_FLOAT64 = 6,
114     // possibly future enhancements: (generic) float, (generic) string
115 root 1.66 };
116    
117 root 1.24 #define F_SHRINK 0x00000001UL
118     #define F_ALLOW_UNKNOWN 0x00000002UL
119 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
120 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
121 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
122     #define F_PACK_STRINGS 0x00000020UL
123     #define F_TEXT_KEYS 0x00000040UL
124     #define F_TEXT_STRINGS 0x00000080UL
125     #define F_VALIDATE_UTF8 0x00000100UL
126 root 1.1
127     #define INIT_SIZE 32 // initial scalar size to be allocated
128    
129     #define SB do {
130     #define SE } while (0)
131    
132     #define IN_RANGE_INC(type,val,beg,end) \
133     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
134     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
135    
136     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
137    
138     #ifdef USE_ITHREADS
139     # define CBOR_SLOW 1
140     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
141     #else
142     # define CBOR_SLOW 0
143     # define CBOR_STASH cbor_stash
144     #endif
145    
146 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
147 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
148 root 1.1
149     typedef struct {
150     U32 flags;
151     U32 max_depth;
152     STRLEN max_size;
153 root 1.27 SV *filter;
154 root 1.40
155     // for the incremental parser
156     STRLEN incr_pos; // the current offset into the text
157     STRLEN incr_need; // minimum bytes needed to decode
158     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
159 root 1.1 } CBOR;
160    
161 root 1.5 ecb_inline void
162 root 1.1 cbor_init (CBOR *cbor)
163     {
164     Zero (cbor, 1, CBOR);
165     cbor->max_depth = 512;
166     }
167    
168 root 1.27 ecb_inline void
169     cbor_free (CBOR *cbor)
170     {
171     SvREFCNT_dec (cbor->filter);
172 root 1.40 SvREFCNT_dec (cbor->incr_count);
173 root 1.27 }
174    
175 root 1.1 /////////////////////////////////////////////////////////////////////////////
176     // utility functions
177    
178 root 1.5 ecb_inline SV *
179 root 1.1 get_bool (const char *name)
180     {
181     SV *sv = get_sv (name, 1);
182    
183     SvREADONLY_on (sv);
184     SvREADONLY_on (SvRV (sv));
185    
186     return sv;
187     }
188    
189 root 1.5 ecb_inline void
190 root 1.1 shrink (SV *sv)
191     {
192     sv_utf8_downgrade (sv, 1);
193    
194     if (SvLEN (sv) > SvCUR (sv) + 1)
195     {
196     #ifdef SvPV_shrink_to_cur
197     SvPV_shrink_to_cur (sv);
198     #elif defined (SvPV_renew)
199     SvPV_renew (sv, SvCUR (sv) + 1);
200     #endif
201     }
202     }
203    
204 root 1.21 // minimum length of a string to be registered for stringref
205     ecb_inline int
206     minimum_string_length (UV idx)
207     {
208 root 1.61 return idx <= 23 ? 3
209     : idx <= 0xffU ? 4
210     : idx <= 0xffffU ? 5
211     : idx <= 0xffffffffU ? 7
212     : 11;
213 root 1.21 }
214    
215 root 1.1 /////////////////////////////////////////////////////////////////////////////
216     // encoder
217    
218     // structure used for encoding CBOR
219     typedef struct
220     {
221     char *cur; // SvPVX (sv) + current output position
222     char *end; // SvEND (sv)
223     SV *sv; // result scalar
224     CBOR cbor;
225     U32 depth; // recursion level
226 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
227     UV stringref_idx;
228 root 1.19 HV *shareable; // ptr => index, or 0
229     UV shareable_idx;
230 root 1.1 } enc_t;
231    
232 root 1.5 ecb_inline void
233 root 1.1 need (enc_t *enc, STRLEN len)
234     {
235 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
236 root 1.1 {
237     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
238     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
239     enc->cur = SvPVX (enc->sv) + cur;
240     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
241     }
242     }
243    
244 root 1.5 ecb_inline void
245 root 1.1 encode_ch (enc_t *enc, char ch)
246     {
247     need (enc, 1);
248     *enc->cur++ = ch;
249     }
250    
251 root 1.66 // used for tags, intregers, element counts and so on
252 root 1.1 static void
253     encode_uint (enc_t *enc, int major, UV len)
254     {
255     need (enc, 9);
256    
257 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
258 root 1.1 *enc->cur++ = major | len;
259 root 1.36 else if (ecb_expect_true (len <= 0xffU))
260 root 1.1 {
261 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
262 root 1.1 *enc->cur++ = len;
263     }
264 root 1.36 else if (len <= 0xffffU)
265 root 1.1 {
266 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
267 root 1.1 *enc->cur++ = len >> 8;
268     *enc->cur++ = len;
269     }
270 root 1.36 else if (len <= 0xffffffffU)
271 root 1.1 {
272 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
273 root 1.1 *enc->cur++ = len >> 24;
274     *enc->cur++ = len >> 16;
275     *enc->cur++ = len >> 8;
276     *enc->cur++ = len;
277     }
278 root 1.4 else
279 root 1.1 {
280 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
281 root 1.1 *enc->cur++ = len >> 56;
282     *enc->cur++ = len >> 48;
283     *enc->cur++ = len >> 40;
284     *enc->cur++ = len >> 32;
285     *enc->cur++ = len >> 24;
286     *enc->cur++ = len >> 16;
287     *enc->cur++ = len >> 8;
288     *enc->cur++ = len;
289     }
290     }
291    
292 root 1.66 // encodes a perl value into a CBOR integer
293 root 1.67 ecb_inline void
294 root 1.66 encode_int (enc_t *enc, SV *sv)
295     {
296     if (SvIsUV (sv))
297     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
298     else if (SvIVX (sv) >= 0)
299     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
300     else
301     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
302     }
303    
304 root 1.21 ecb_inline void
305     encode_tag (enc_t *enc, UV tag)
306     {
307 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
308 root 1.21 }
309    
310 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
311     ecb_noinline static void
312     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
313     {
314     STRLEN ulen = len;
315     U8 *p, *pend = (U8 *)str + len;
316    
317     for (p = (U8 *)str; p < pend; ++p)
318     ulen += *p >> 7; // count set high bits
319    
320     encode_uint (enc, MAJOR_TEXT, ulen);
321    
322     need (enc, ulen);
323     for (p = (U8 *)str; p < pend; ++p)
324     if (*p < 0x80)
325     *enc->cur++ = *p;
326     else
327     {
328     *enc->cur++ = 0xc0 + (*p >> 6);
329     *enc->cur++ = 0x80 + (*p & 63);
330     }
331     }
332    
333 root 1.30 ecb_inline void
334 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
335 root 1.30 {
336 root 1.53 if (ecb_expect_false (upgrade_utf8))
337 root 1.51 if (!utf8)
338     {
339 root 1.53 encode_str_utf8 (enc, utf8, str, len);
340 root 1.51 return;
341     }
342    
343 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
344 root 1.30 need (enc, len);
345     memcpy (enc->cur, str, len);
346     enc->cur += len;
347     }
348    
349 root 1.54 ecb_inline void
350 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
351 root 1.1 {
352 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
353 root 1.21 {
354 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
355 root 1.21
356     if (SvOK (*svp))
357     {
358     // already registered, use stringref
359     encode_tag (enc, CBOR_TAG_STRINGREF);
360 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
361 root 1.21 return;
362     }
363     else if (len >= minimum_string_length (enc->stringref_idx))
364     {
365     // register only
366     sv_setuv (*svp, enc->stringref_idx);
367     ++enc->stringref_idx;
368     }
369     }
370    
371 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
372 root 1.1 }
373    
374 root 1.66 ecb_inline void
375     encode_float16 (enc_t *enc, NV nv)
376     {
377     need (enc, 1+2);
378    
379     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
380    
381     uint16_t fp = ecb_float_to_binary16 (nv);
382    
383     if (!ecb_big_endian ())
384     fp = ecb_bswap16 (fp);
385    
386     memcpy (enc->cur, &fp, 2);
387     enc->cur += 2;
388     }
389    
390     ecb_inline void
391     encode_float32 (enc_t *enc, NV nv)
392     {
393     need (enc, 1+4);
394    
395     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
396    
397     uint32_t fp = ecb_float_to_binary32 (nv);
398    
399     if (!ecb_big_endian ())
400     fp = ecb_bswap32 (fp);
401    
402     memcpy (enc->cur, &fp, 4);
403     enc->cur += 4;
404     }
405    
406     ecb_inline void
407     encode_float64 (enc_t *enc, NV nv)
408     {
409     need (enc, 1+8);
410    
411     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
412    
413     uint64_t fp = ecb_double_to_binary64 (nv);
414    
415     if (!ecb_big_endian ())
416     fp = ecb_bswap64 (fp);
417    
418     memcpy (enc->cur, &fp, 8);
419     enc->cur += 8;
420     }
421    
422     ecb_inline void
423     encode_forced (enc_t *enc, UV type, SV *sv)
424     {
425     switch (type)
426     {
427     case AS_CBOR:
428     {
429     STRLEN len;
430     char *str = SvPVbyte (sv, len);
431    
432     need (enc, len);
433     memcpy (enc->cur, str, len);
434     enc->cur += len;
435     }
436     break;
437    
438     case AS_BYTES:
439     {
440     STRLEN len;
441     char *str = SvPVbyte (sv, len);
442     encode_strref (enc, 0, 0, str, len);
443     }
444     break;
445    
446     case AS_TEXT:
447     {
448     STRLEN len;
449     char *str = SvPVutf8 (sv, len);
450     encode_strref (enc, 1, 1, str, len);
451     }
452     break;
453    
454 root 1.67 case AS_INT: encode_int (enc, sv); break;
455 root 1.66 case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
456     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
457     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
458    
459     default:
460     croak ("encountered malformed CBOR::XS::Tagged object");
461     }
462     }
463    
464 root 1.1 static void encode_sv (enc_t *enc, SV *sv);
465    
466     static void
467     encode_av (enc_t *enc, AV *av)
468     {
469     int i, len = av_len (av);
470    
471     if (enc->depth >= enc->cbor.max_depth)
472     croak (ERR_NESTING_EXCEEDED);
473    
474     ++enc->depth;
475    
476 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
477 root 1.1
478 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
479 root 1.45 for (i = 0; i <= len; ++i)
480     {
481     SV **svp = av_fetch (av, i, 0);
482     encode_sv (enc, svp ? *svp : &PL_sv_undef);
483     }
484     else
485     for (i = 0; i <= len; ++i)
486 root 1.46 {
487     SV *sv = AvARRAY (av)[i];
488     encode_sv (enc, sv ? sv : &PL_sv_undef);
489     }
490 root 1.1
491     --enc->depth;
492     }
493    
494     static void
495     encode_hv (enc_t *enc, HV *hv)
496     {
497     HE *he;
498    
499     if (enc->depth >= enc->cbor.max_depth)
500     croak (ERR_NESTING_EXCEEDED);
501    
502     ++enc->depth;
503    
504     int pairs = hv_iterinit (hv);
505     int mg = SvMAGICAL (hv);
506    
507 root 1.63 if (ecb_expect_false (mg))
508 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
509 root 1.1 else
510 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
511 root 1.1
512     while ((he = hv_iternext (hv)))
513     {
514 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
515     encode_sv (enc, HeSVKEY (he));
516 root 1.1 else
517 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
518 root 1.1
519 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
520 root 1.1 }
521    
522 root 1.63 if (ecb_expect_false (mg))
523 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
524 root 1.1
525     --enc->depth;
526     }
527    
528     // encode objects, arrays and special \0=false and \1=true values.
529     static void
530     encode_rv (enc_t *enc, SV *sv)
531     {
532 root 1.19 SvGETMAGIC (sv);
533 root 1.1
534 root 1.19 svtype svt = SvTYPE (sv);
535 root 1.18
536 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
537 root 1.1 {
538 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
539     ? types_boolean_stash
540     : gv_stashpv ("Types::Serialiser::Boolean", 1);
541     HV *error_stash = !CBOR_SLOW || types_error_stash
542     ? types_error_stash
543     : gv_stashpv ("Types::Serialiser::Error", 1);
544 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
545     ? cbor_tagged_stash
546     : gv_stashpv ("CBOR::XS::Tagged" , 1);
547 root 1.1
548 root 1.11 HV *stash = SvSTASH (sv);
549    
550     if (stash == boolean_stash)
551 root 1.34 {
552 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
553 root 1.34 return;
554     }
555 root 1.11 else if (stash == error_stash)
556 root 1.34 {
557 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
558 root 1.34 return;
559     }
560 root 1.11 else if (stash == tagged_stash)
561 root 1.6 {
562     if (svt != SVt_PVAV)
563     croak ("encountered CBOR::XS::Tagged object that isn't an array");
564    
565 root 1.66 switch (av_len ((AV *)sv))
566     {
567     case 2-1:
568     // actually a tagged value
569     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
570     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
571     break;
572    
573     case 3-1:
574     // a forced type [value, type, undef]
575     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
576     break;
577    
578     default:
579     croak ("encountered malformed CBOR::XS::Tagged object");
580     }
581 root 1.34
582     return;
583 root 1.6 }
584 root 1.34 }
585    
586     if (ecb_expect_false (SvREFCNT (sv) > 1)
587     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
588     {
589 root 1.63 if (ecb_expect_false (!enc->shareable))
590 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
591    
592     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
593    
594     if (SvOK (*svp))
595     {
596     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
597 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
598 root 1.34 return;
599     }
600     else
601     {
602     sv_setuv (*svp, enc->shareable_idx);
603     ++enc->shareable_idx;
604     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
605     }
606     }
607    
608     if (ecb_expect_false (SvOBJECT (sv)))
609     {
610     HV *stash = SvSTASH (sv);
611     GV *method;
612    
613 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
614     croak ("encountered object '%s', but forbid_objects is enabled",
615     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
616     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
617 root 1.1 {
618 root 1.11 dSP;
619    
620 root 1.50 ENTER; SAVETMPS;
621     PUSHMARK (SP);
622 root 1.6 // we re-bless the reference to get overload and other niceties right
623 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
624    
625     PUTBACK;
626     // G_SCALAR ensures that return value is 1
627     call_sv ((SV *)GvCV (method), G_SCALAR);
628     SPAGAIN;
629    
630     // catch this surprisingly common error
631     if (SvROK (TOPs) && SvRV (TOPs) == sv)
632     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
633    
634     encode_sv (enc, POPs);
635    
636     PUTBACK;
637    
638     FREETMPS; LEAVE;
639     }
640     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
641     {
642     dSP;
643 root 1.6
644 root 1.50 ENTER; SAVETMPS;
645     PUSHMARK (SP);
646 root 1.11 EXTEND (SP, 2);
647     // we re-bless the reference to get overload and other niceties right
648     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
649     PUSHs (sv_cbor);
650 root 1.1
651 root 1.11 PUTBACK;
652     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
653     SPAGAIN;
654 root 1.6
655 root 1.11 // catch this surprisingly common error
656     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
657     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
658 root 1.6
659 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
660 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
661 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
662 root 1.6
663 root 1.65 {
664     int i;
665    
666     for (i = 0; i < count; ++i)
667     encode_sv (enc, SP[i + 1 - count]);
668    
669     SP -= count;
670     }
671 root 1.6
672 root 1.11 PUTBACK;
673 root 1.6
674 root 1.11 FREETMPS; LEAVE;
675 root 1.1 }
676 root 1.11 else
677     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
678     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
679 root 1.1 }
680     else if (svt == SVt_PVHV)
681     encode_hv (enc, (HV *)sv);
682     else if (svt == SVt_PVAV)
683     encode_av (enc, (AV *)sv);
684 root 1.18 else
685 root 1.1 {
686 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
687     encode_sv (enc, sv);
688 root 1.1 }
689     }
690    
691     static void
692     encode_nv (enc_t *enc, SV *sv)
693     {
694     double nv = SvNVX (sv);
695    
696     need (enc, 9);
697    
698 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
699     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
700 root 1.1 //TODO: maybe I32?
701 root 1.5 else if (ecb_expect_false (nv == (float)nv))
702 root 1.66 encode_float32 (enc, nv);
703 root 1.1 else
704 root 1.66 encode_float64 (enc, nv);
705 root 1.1 }
706    
707     static void
708     encode_sv (enc_t *enc, SV *sv)
709     {
710     SvGETMAGIC (sv);
711    
712     if (SvPOKp (sv))
713     {
714     STRLEN len;
715     char *str = SvPV (sv, len);
716 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
717 root 1.1 }
718     else if (SvNOKp (sv))
719     encode_nv (enc, sv);
720     else if (SvIOKp (sv))
721 root 1.67 encode_int (enc, sv);
722 root 1.1 else if (SvROK (sv))
723     encode_rv (enc, SvRV (sv));
724     else if (!SvOK (sv))
725 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
726 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
727 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
728 root 1.1 else
729     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
730     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
731     }
732    
733     static SV *
734     encode_cbor (SV *scalar, CBOR *cbor)
735     {
736 root 1.48 enc_t enc = { 0 };
737 root 1.1
738 root 1.63 enc.cbor = *cbor;
739     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
740     enc.cur = SvPVX (enc.sv);
741     enc.end = SvEND (enc.sv);
742 root 1.1
743     SvPOK_only (enc.sv);
744 root 1.20
745 root 1.32 if (cbor->flags & F_PACK_STRINGS)
746 root 1.20 {
747     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
748     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
749     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
750     }
751    
752 root 1.1 encode_sv (&enc, scalar);
753    
754     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
755     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
756    
757     if (enc.cbor.flags & F_SHRINK)
758     shrink (enc.sv);
759    
760     return enc.sv;
761     }
762    
763     /////////////////////////////////////////////////////////////////////////////
764     // decoder
765    
766     // structure used for decoding CBOR
767     typedef struct
768     {
769     U8 *cur; // current parser pointer
770     U8 *end; // end of input string
771     const char *err; // parse error, if != 0
772     CBOR cbor;
773     U32 depth; // recursion depth
774     U32 maxdepth; // recursion depth limit
775 root 1.19 AV *shareable;
776 root 1.20 AV *stringref;
777 root 1.27 SV *decode_tagged;
778 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
779 root 1.1 } dec_t;
780    
781 root 1.59 // set dec->err to ERRSV
782     ecb_cold static void
783     err_errsv (dec_t *dec)
784     {
785     if (!dec->err)
786     {
787     dec->err_sv = newSVsv (ERRSV);
788    
789     // chop off the trailing \n
790     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
791     *SvEND (dec->err_sv) = 0;
792    
793     dec->err = SvPVutf8_nolen (dec->err_sv);
794     }
795     }
796 root 1.1
797 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
798     ecb_cold static void
799     err_set (dec_t *dec, const char *reason)
800     {
801     if (!dec->err)
802     dec->err = reason;
803     }
804    
805     ecb_cold static void
806     err_unexpected_end (dec_t *dec)
807     {
808     err_set (dec, "unexpected end of CBOR data");
809     }
810    
811     #define ERR_DO(do) SB do; goto fail; SE
812     #define ERR(reason) ERR_DO (err_set (dec, reason))
813     #define ERR_ERRSV ERR_DO (err_errsv (dec))
814    
815 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
816 root 1.1
817 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
818 root 1.1 #define DEC_DEC_DEPTH --dec->depth
819    
820     static UV
821     decode_uint (dec_t *dec)
822     {
823 root 1.35 U8 m = *dec->cur & MINOR_MASK;
824     ++dec->cur;
825    
826     if (ecb_expect_true (m < LENGTH_EXT1))
827     return m;
828 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
829     {
830     WANT (1);
831     dec->cur += 1;
832     return dec->cur[-1];
833     }
834     else if (ecb_expect_true (m == LENGTH_EXT2))
835     {
836     WANT (2);
837     dec->cur += 2;
838     return (((UV)dec->cur[-2]) << 8)
839     | ((UV)dec->cur[-1]);
840     }
841     else if (ecb_expect_true (m == LENGTH_EXT4))
842     {
843     WANT (4);
844     dec->cur += 4;
845     return (((UV)dec->cur[-4]) << 24)
846     | (((UV)dec->cur[-3]) << 16)
847     | (((UV)dec->cur[-2]) << 8)
848     | ((UV)dec->cur[-1]);
849     }
850     else if (ecb_expect_true (m == LENGTH_EXT8))
851 root 1.1 {
852 root 1.36 WANT (8);
853     dec->cur += 8;
854 root 1.34
855 root 1.36 return
856 root 1.34 #if UVSIZE < 8
857 root 1.36 0
858 root 1.34 #else
859 root 1.36 (((UV)dec->cur[-8]) << 56)
860     | (((UV)dec->cur[-7]) << 48)
861     | (((UV)dec->cur[-6]) << 40)
862     | (((UV)dec->cur[-5]) << 32)
863 root 1.34 #endif
864 root 1.36 | (((UV)dec->cur[-4]) << 24)
865     | (((UV)dec->cur[-3]) << 16)
866     | (((UV)dec->cur[-2]) << 8)
867     | ((UV)dec->cur[-1]);
868 root 1.1 }
869 root 1.36 else
870     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
871 root 1.1
872     fail:
873     return 0;
874     }
875    
876     static SV *decode_sv (dec_t *dec);
877    
878     static SV *
879     decode_av (dec_t *dec)
880     {
881     AV *av = newAV ();
882    
883     DEC_INC_DEPTH;
884    
885 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
886 root 1.1 {
887     ++dec->cur;
888    
889     for (;;)
890     {
891     WANT (1);
892    
893 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
894 root 1.1 {
895     ++dec->cur;
896     break;
897     }
898    
899     av_push (av, decode_sv (dec));
900     }
901     }
902     else
903     {
904 root 1.57 UV i, len = decode_uint (dec);
905 root 1.1
906 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
907 root 1.1 av_fill (av, len - 1);
908    
909     for (i = 0; i < len; ++i)
910     AvARRAY (av)[i] = decode_sv (dec);
911     }
912    
913     DEC_DEC_DEPTH;
914     return newRV_noinc ((SV *)av);
915    
916     fail:
917 root 1.63 SvREFCNT_dec_NN (av);
918 root 1.1 DEC_DEC_DEPTH;
919     return &PL_sv_undef;
920     }
921    
922 root 1.16 static void
923     decode_he (dec_t *dec, HV *hv)
924     {
925     // for speed reasons, we specialcase single-string
926 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
927    
928 root 1.23 if (ecb_expect_true (!dec->stringref))
929 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
930 root 1.21 {
931 root 1.56 STRLEN len = decode_uint (dec);
932 root 1.21 char *key = (char *)dec->cur;
933 root 1.16
934 root 1.49 WANT (len);
935 root 1.21 dec->cur += len;
936    
937 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
938 root 1.16
939 root 1.21 return;
940     }
941 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
942 root 1.21 {
943 root 1.56 STRLEN len = decode_uint (dec);
944 root 1.21 char *key = (char *)dec->cur;
945 root 1.16
946 root 1.49 WANT (len);
947 root 1.21 dec->cur += len;
948 root 1.20
949 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
950     if (!is_utf8_string (key, len))
951     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
952 root 1.16
953 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
954 root 1.16
955 root 1.21 return;
956     }
957 root 1.20
958 root 1.21 SV *k = decode_sv (dec);
959     SV *v = decode_sv (dec);
960 root 1.16
961 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
962     // methods, and this is hopefully the only place where it can happen,
963     // so if there is a chance of an exception, take the very slow path.
964     // since catching exceptions is "undocumented/internal/forbidden" by
965     // the new p5p powers, we need to call out to a perl function :/
966     if (ecb_expect_false (SvAMAGIC (k)))
967     {
968     dSP;
969    
970     ENTER; SAVETMPS;
971     PUSHMARK (SP);
972     EXTEND (SP, 3);
973     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
974     PUSHs (sv_2mortal (k));
975     PUSHs (sv_2mortal (v));
976    
977     PUTBACK;
978     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
979     SPAGAIN;
980    
981     FREETMPS; LEAVE;
982    
983     if (SvTRUE (ERRSV))
984     ERR_ERRSV;
985    
986     return;
987     }
988    
989 root 1.21 hv_store_ent (hv, k, v, 0);
990 root 1.63 SvREFCNT_dec_NN (k);
991 root 1.38
992     fail:
993     ;
994 root 1.16 }
995    
996 root 1.1 static SV *
997     decode_hv (dec_t *dec)
998     {
999     HV *hv = newHV ();
1000    
1001     DEC_INC_DEPTH;
1002    
1003 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1004 root 1.1 {
1005     ++dec->cur;
1006    
1007     for (;;)
1008     {
1009     WANT (1);
1010    
1011 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1012 root 1.1 {
1013     ++dec->cur;
1014     break;
1015     }
1016    
1017 root 1.16 decode_he (dec, hv);
1018 root 1.1 }
1019     }
1020     else
1021     {
1022 root 1.57 UV pairs = decode_uint (dec);
1023    
1024     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1025 root 1.1
1026 root 1.16 while (pairs--)
1027     decode_he (dec, hv);
1028 root 1.1 }
1029    
1030     DEC_DEC_DEPTH;
1031     return newRV_noinc ((SV *)hv);
1032    
1033     fail:
1034 root 1.63 SvREFCNT_dec_NN (hv);
1035 root 1.1 DEC_DEC_DEPTH;
1036     return &PL_sv_undef;
1037     }
1038    
1039     static SV *
1040     decode_str (dec_t *dec, int utf8)
1041     {
1042 root 1.6 SV *sv = 0;
1043 root 1.1
1044 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1045 root 1.1 {
1046 root 1.33 // indefinite length strings
1047 root 1.1 ++dec->cur;
1048    
1049 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1050 root 1.33
1051 root 1.1 sv = newSVpvn ("", 0);
1052    
1053     for (;;)
1054     {
1055     WANT (1);
1056    
1057 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1058     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1059 root 1.33 {
1060     ++dec->cur;
1061     break;
1062     }
1063     else
1064     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1065    
1066     STRLEN len = decode_uint (dec);
1067 root 1.1
1068 root 1.33 WANT (len);
1069     sv_catpvn (sv, dec->cur, len);
1070     dec->cur += len;
1071 root 1.1 }
1072     }
1073     else
1074     {
1075     STRLEN len = decode_uint (dec);
1076    
1077     WANT (len);
1078     sv = newSVpvn (dec->cur, len);
1079     dec->cur += len;
1080 root 1.25
1081     if (ecb_expect_false (dec->stringref)
1082     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1083     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1084 root 1.1 }
1085    
1086     if (utf8)
1087 root 1.38 {
1088     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1089     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
1090     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1091    
1092     SvUTF8_on (sv);
1093     }
1094 root 1.1
1095     return sv;
1096    
1097     fail:
1098 root 1.6 SvREFCNT_dec (sv);
1099 root 1.1 return &PL_sv_undef;
1100     }
1101    
1102     static SV *
1103 root 1.3 decode_tagged (dec_t *dec)
1104     {
1105 root 1.19 SV *sv = 0;
1106 root 1.3 UV tag = decode_uint (dec);
1107 root 1.19
1108     WANT (1);
1109 root 1.3
1110 root 1.18 switch (tag)
1111     {
1112     case CBOR_TAG_MAGIC:
1113 root 1.20 sv = decode_sv (dec);
1114     break;
1115 root 1.18
1116     case CBOR_TAG_INDIRECTION:
1117 root 1.20 sv = newRV_noinc (decode_sv (dec));
1118     break;
1119    
1120     case CBOR_TAG_STRINGREF_NAMESPACE:
1121     {
1122 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1123 root 1.55 // erase mortalised caches, e.g. "shareable"
1124     ENTER;
1125 root 1.20
1126     SAVESPTR (dec->stringref);
1127     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1128    
1129     sv = decode_sv (dec);
1130    
1131 root 1.55 LEAVE;
1132 root 1.20 }
1133     break;
1134    
1135     case CBOR_TAG_STRINGREF:
1136     {
1137 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1138 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1139    
1140     UV idx = decode_uint (dec);
1141    
1142 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1143 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1144    
1145     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1146     }
1147     break;
1148 root 1.11
1149 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1150     {
1151     if (ecb_expect_false (!dec->shareable))
1152     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1153    
1154 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1155     {
1156     sv = newSV (0);
1157     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1158 root 1.18
1159 root 1.37 SV *osv = decode_sv (dec);
1160     sv_setsv (sv, osv);
1161     SvREFCNT_dec_NN (osv);
1162     }
1163     else
1164     {
1165     av_push (dec->shareable, &PL_sv_undef);
1166     int idx = AvFILLp (dec->shareable);
1167     sv = decode_sv (dec);
1168     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1169     }
1170 root 1.19 }
1171 root 1.20 break;
1172 root 1.18
1173     case CBOR_TAG_VALUE_SHAREDREF:
1174 root 1.17 {
1175 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1176 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1177 root 1.18
1178 root 1.19 UV idx = decode_uint (dec);
1179    
1180 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1181 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1182    
1183 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1184 root 1.37
1185     if (sv == &PL_sv_undef)
1186     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1187 root 1.17 }
1188 root 1.20 break;
1189 root 1.17
1190 root 1.18 case CBOR_TAG_PERL_OBJECT:
1191     {
1192 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1193     goto filter;
1194    
1195 root 1.19 sv = decode_sv (dec);
1196    
1197 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1198     ERR ("corrupted CBOR data (non-array perl object)");
1199    
1200     AV *av = (AV *)SvRV (sv);
1201     int len = av_len (av) + 1;
1202     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1203    
1204     if (!stash)
1205     ERR ("cannot decode perl-object (package does not exist)");
1206    
1207     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1208    
1209     if (!method)
1210     ERR ("cannot decode perl-object (package does not have a THAW method)");
1211    
1212     dSP;
1213    
1214 root 1.50 ENTER; SAVETMPS;
1215     PUSHMARK (SP);
1216 root 1.18 EXTEND (SP, len + 1);
1217     // we re-bless the reference to get overload and other niceties right
1218     PUSHs (*av_fetch (av, 0, 1));
1219     PUSHs (sv_cbor);
1220    
1221     int i;
1222    
1223     for (i = 1; i < len; ++i)
1224     PUSHs (*av_fetch (av, i, 1));
1225    
1226     PUTBACK;
1227     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1228     SPAGAIN;
1229    
1230     if (SvTRUE (ERRSV))
1231     {
1232     FREETMPS; LEAVE;
1233 root 1.59 ERR_ERRSV;
1234 root 1.18 }
1235    
1236 root 1.63 SvREFCNT_dec_NN (sv);
1237 root 1.18 sv = SvREFCNT_inc (POPs);
1238 root 1.11
1239 root 1.18 PUTBACK;
1240 root 1.11
1241 root 1.18 FREETMPS; LEAVE;
1242     }
1243 root 1.20 break;
1244 root 1.9
1245 root 1.18 default:
1246 root 1.60 filter:
1247 root 1.18 {
1248 root 1.58 SV *tag_sv = newSVuv (tag);
1249    
1250 root 1.19 sv = decode_sv (dec);
1251    
1252 root 1.27 dSP;
1253 root 1.50 ENTER; SAVETMPS;
1254     PUSHMARK (SP);
1255 root 1.27 EXTEND (SP, 2);
1256 root 1.58 PUSHs (tag_sv);
1257 root 1.27 PUSHs (sv);
1258    
1259     PUTBACK;
1260     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1261     SPAGAIN;
1262    
1263     if (SvTRUE (ERRSV))
1264     {
1265 root 1.63 SvREFCNT_dec_NN (tag_sv);
1266 root 1.27 FREETMPS; LEAVE;
1267 root 1.59 ERR_ERRSV;
1268 root 1.27 }
1269    
1270     if (count)
1271     {
1272 root 1.63 SvREFCNT_dec_NN (tag_sv);
1273     SvREFCNT_dec_NN (sv);
1274 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1275     SP -= count;
1276 root 1.27 }
1277     else
1278     {
1279     AV *av = newAV ();
1280 root 1.58 av_push (av, tag_sv);
1281 root 1.27 av_push (av, sv);
1282    
1283     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1284     ? cbor_tagged_stash
1285     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1286     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1287     }
1288 root 1.7
1289 root 1.27 PUTBACK;
1290    
1291     FREETMPS; LEAVE;
1292 root 1.18 }
1293 root 1.20 break;
1294 root 1.11 }
1295 root 1.9
1296 root 1.20 return sv;
1297    
1298 root 1.9 fail:
1299     SvREFCNT_dec (sv);
1300     return &PL_sv_undef;
1301 root 1.3 }
1302    
1303     static SV *
1304 root 1.1 decode_sv (dec_t *dec)
1305     {
1306     WANT (1);
1307    
1308 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1309 root 1.1 {
1310 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1311     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1312     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1313     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1314     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1315     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1316     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1317    
1318     case MAJOR_MISC >> MAJOR_SHIFT:
1319     switch (*dec->cur++ & MINOR_MASK)
1320 root 1.1 {
1321 root 1.35 case SIMPLE_FALSE:
1322 root 1.1 #if CBOR_SLOW
1323 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1324 root 1.1 #endif
1325 root 1.10 return newSVsv (types_false);
1326 root 1.35 case SIMPLE_TRUE:
1327 root 1.1 #if CBOR_SLOW
1328 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1329 root 1.1 #endif
1330 root 1.10 return newSVsv (types_true);
1331 root 1.35 case SIMPLE_NULL:
1332 root 1.1 return newSVsv (&PL_sv_undef);
1333 root 1.35 case SIMPLE_UNDEF:
1334 root 1.10 #if CBOR_SLOW
1335     types_error = get_bool ("Types::Serialiser::error");
1336     #endif
1337     return newSVsv (types_error);
1338 root 1.1
1339 root 1.35 case MISC_FLOAT16:
1340 root 1.2 {
1341     WANT (2);
1342    
1343     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1344     dec->cur += 2;
1345    
1346     return newSVnv (ecb_binary16_to_float (fp));
1347     }
1348 root 1.1
1349 root 1.35 case MISC_FLOAT32:
1350 root 1.1 {
1351     uint32_t fp;
1352     WANT (4);
1353     memcpy (&fp, dec->cur, 4);
1354     dec->cur += 4;
1355    
1356     if (!ecb_big_endian ())
1357     fp = ecb_bswap32 (fp);
1358    
1359     return newSVnv (ecb_binary32_to_float (fp));
1360     }
1361    
1362 root 1.35 case MISC_FLOAT64:
1363 root 1.1 {
1364     uint64_t fp;
1365     WANT (8);
1366     memcpy (&fp, dec->cur, 8);
1367     dec->cur += 8;
1368    
1369     if (!ecb_big_endian ())
1370     fp = ecb_bswap64 (fp);
1371    
1372     return newSVnv (ecb_binary64_to_double (fp));
1373     }
1374    
1375 root 1.35 // 0..19 unassigned simple
1376 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1377     // 28-30 unassigned misc
1378     // 31 break code
1379 root 1.1 default:
1380 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1381 root 1.1 }
1382    
1383     break;
1384     }
1385    
1386     fail:
1387     return &PL_sv_undef;
1388     }
1389    
1390     static SV *
1391     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1392     {
1393 root 1.48 dec_t dec = { 0 };
1394 root 1.1 SV *sv;
1395 root 1.16 STRLEN len;
1396     char *data = SvPVbyte (string, len);
1397 root 1.1
1398 root 1.16 if (len > cbor->max_size && cbor->max_size)
1399     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1400     (unsigned long)len, (unsigned long)cbor->max_size);
1401 root 1.1
1402     dec.cbor = *cbor;
1403 root 1.16 dec.cur = (U8 *)data;
1404     dec.end = (U8 *)data + len;
1405 root 1.1
1406     sv = decode_sv (&dec);
1407    
1408     if (offset_return)
1409     *offset_return = dec.cur;
1410    
1411     if (!(offset_return || !sv))
1412 root 1.2 if (dec.cur != dec.end && !dec.err)
1413     dec.err = "garbage after CBOR object";
1414    
1415     if (dec.err)
1416 root 1.1 {
1417 root 1.39 if (dec.shareable)
1418     {
1419 root 1.58 // need to break cyclic links, which would all be in shareable
1420 root 1.39 int i;
1421     SV **svp;
1422    
1423     for (i = av_len (dec.shareable) + 1; i--; )
1424     if ((svp = av_fetch (dec.shareable, i, 0)))
1425     sv_setsv (*svp, &PL_sv_undef);
1426     }
1427    
1428 root 1.63 SvREFCNT_dec_NN (sv);
1429 root 1.59
1430     if (dec.err_sv)
1431     sv_2mortal (dec.err_sv);
1432    
1433 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1434 root 1.1 }
1435    
1436     sv = sv_2mortal (sv);
1437    
1438     return sv;
1439     }
1440    
1441     /////////////////////////////////////////////////////////////////////////////
1442 root 1.40 // incremental parser
1443    
1444     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1445    
1446     // returns 0 for notyet, 1 for success or error
1447     static int
1448     incr_parse (CBOR *self, SV *cborstr)
1449     {
1450     STRLEN cur;
1451     SvPV (cborstr, cur);
1452    
1453     while (ecb_expect_true (self->incr_need <= cur))
1454     {
1455     // table of integer count bytes
1456     static I8 incr_len[MINOR_MASK + 1] = {
1457     0, 0, 0, 0, 0, 0, 0, 0,
1458     0, 0, 0, 0, 0, 0, 0, 0,
1459     0, 0, 0, 0, 0, 0, 0, 0,
1460     1, 2, 4, 8,-1,-1,-1,-2
1461     };
1462    
1463     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1464     U8 m = *p & MINOR_MASK;
1465     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1466     I8 ilen = incr_len[m];
1467    
1468     self->incr_need = self->incr_pos + 1;
1469    
1470     if (ecb_expect_false (ilen < 0))
1471     {
1472     if (m != MINOR_INDEF)
1473     return 1; // error
1474    
1475     if (*p == (MAJOR_MISC | MINOR_INDEF))
1476     {
1477     if (count >= 0)
1478     return 1; // error
1479    
1480     count = 1;
1481     }
1482     else
1483     {
1484     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1485     count = -1;
1486     }
1487     }
1488     else
1489     {
1490     self->incr_need += ilen;
1491     if (ecb_expect_false (self->incr_need > cur))
1492     return 0;
1493    
1494     int major = *p >> MAJOR_SHIFT;
1495    
1496     switch (major)
1497     {
1498 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1499     ++count; // tags merely prefix another value
1500     break;
1501    
1502 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1503     case MAJOR_TEXT >> MAJOR_SHIFT:
1504     case MAJOR_ARRAY >> MAJOR_SHIFT:
1505     case MAJOR_MAP >> MAJOR_SHIFT:
1506     {
1507     UV len;
1508    
1509     if (ecb_expect_false (ilen))
1510     {
1511     len = 0;
1512    
1513     do {
1514     len = (len << 8) | *++p;
1515     } while (--ilen);
1516     }
1517     else
1518     len = m;
1519    
1520     switch (major)
1521     {
1522     case MAJOR_BYTES >> MAJOR_SHIFT:
1523     case MAJOR_TEXT >> MAJOR_SHIFT:
1524     self->incr_need += len;
1525     if (ecb_expect_false (self->incr_need > cur))
1526     return 0;
1527    
1528     break;
1529    
1530     case MAJOR_MAP >> MAJOR_SHIFT:
1531     len <<= 1;
1532     case MAJOR_ARRAY >> MAJOR_SHIFT:
1533     if (len)
1534     {
1535     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1536     count = len + 1;
1537     }
1538     break;
1539     }
1540     }
1541     }
1542     }
1543    
1544     self->incr_pos = self->incr_need;
1545    
1546     if (count > 0)
1547     {
1548     while (!--count)
1549     {
1550     if (!AvFILLp (self->incr_count))
1551     return 1; // done
1552    
1553     SvREFCNT_dec_NN (av_pop (self->incr_count));
1554     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1555     }
1556    
1557     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1558     }
1559     }
1560    
1561     return 0;
1562     }
1563    
1564    
1565     /////////////////////////////////////////////////////////////////////////////
1566 root 1.1 // XS interface functions
1567    
1568     MODULE = CBOR::XS PACKAGE = CBOR::XS
1569    
1570     BOOT:
1571     {
1572     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1573 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1574 root 1.1
1575 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1576     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1577    
1578     types_true = get_bool ("Types::Serialiser::true" );
1579     types_false = get_bool ("Types::Serialiser::false");
1580     types_error = get_bool ("Types::Serialiser::error");
1581 root 1.11
1582 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1583    
1584 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1585     SvREADONLY_on (sv_cbor);
1586 root 1.56
1587     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1588 root 1.1 }
1589    
1590     PROTOTYPES: DISABLE
1591    
1592     void CLONE (...)
1593     CODE:
1594 root 1.10 cbor_stash = 0;
1595     cbor_tagged_stash = 0;
1596     types_error_stash = 0;
1597     types_boolean_stash = 0;
1598 root 1.1
1599     void new (char *klass)
1600     PPCODE:
1601     {
1602     SV *pv = NEWSV (0, sizeof (CBOR));
1603     SvPOK_only (pv);
1604     cbor_init ((CBOR *)SvPVX (pv));
1605     XPUSHs (sv_2mortal (sv_bless (
1606     newRV_noinc (pv),
1607     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1608     )));
1609     }
1610    
1611     void shrink (CBOR *self, int enable = 1)
1612     ALIAS:
1613     shrink = F_SHRINK
1614     allow_unknown = F_ALLOW_UNKNOWN
1615 root 1.18 allow_sharing = F_ALLOW_SHARING
1616 root 1.37 allow_cycles = F_ALLOW_CYCLES
1617 root 1.60 forbid_objects = F_FORBID_OBJECTS
1618 root 1.32 pack_strings = F_PACK_STRINGS
1619 root 1.54 text_keys = F_TEXT_KEYS
1620     text_strings = F_TEXT_STRINGS
1621 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1622 root 1.1 PPCODE:
1623     {
1624     if (enable)
1625     self->flags |= ix;
1626     else
1627     self->flags &= ~ix;
1628    
1629     XPUSHs (ST (0));
1630     }
1631    
1632     void get_shrink (CBOR *self)
1633     ALIAS:
1634     get_shrink = F_SHRINK
1635     get_allow_unknown = F_ALLOW_UNKNOWN
1636 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1637 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1638 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1639 root 1.32 get_pack_strings = F_PACK_STRINGS
1640 root 1.54 get_text_keys = F_TEXT_KEYS
1641     get_text_strings = F_TEXT_STRINGS
1642 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1643 root 1.1 PPCODE:
1644     XPUSHs (boolSV (self->flags & ix));
1645    
1646     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1647     PPCODE:
1648     self->max_depth = max_depth;
1649     XPUSHs (ST (0));
1650    
1651     U32 get_max_depth (CBOR *self)
1652     CODE:
1653     RETVAL = self->max_depth;
1654     OUTPUT:
1655     RETVAL
1656    
1657     void max_size (CBOR *self, U32 max_size = 0)
1658     PPCODE:
1659     self->max_size = max_size;
1660     XPUSHs (ST (0));
1661    
1662     int get_max_size (CBOR *self)
1663     CODE:
1664     RETVAL = self->max_size;
1665     OUTPUT:
1666     RETVAL
1667    
1668 root 1.27 void filter (CBOR *self, SV *filter = 0)
1669     PPCODE:
1670     SvREFCNT_dec (self->filter);
1671     self->filter = filter ? newSVsv (filter) : filter;
1672     XPUSHs (ST (0));
1673    
1674     SV *get_filter (CBOR *self)
1675     CODE:
1676     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1677     OUTPUT:
1678     RETVAL
1679    
1680 root 1.1 void encode (CBOR *self, SV *scalar)
1681     PPCODE:
1682     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1683     XPUSHs (scalar);
1684    
1685     void decode (CBOR *self, SV *cborstr)
1686     PPCODE:
1687     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1688     XPUSHs (cborstr);
1689    
1690     void decode_prefix (CBOR *self, SV *cborstr)
1691     PPCODE:
1692     {
1693     SV *sv;
1694     char *offset;
1695     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1696     EXTEND (SP, 2);
1697     PUSHs (sv);
1698     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1699     }
1700    
1701 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1702 root 1.42 ALIAS:
1703     incr_parse_multiple = 1
1704 root 1.40 PPCODE:
1705     {
1706     if (SvUTF8 (cborstr))
1707     sv_utf8_downgrade (cborstr, 0);
1708    
1709     if (!self->incr_count)
1710     {
1711     self->incr_count = newAV ();
1712     self->incr_pos = 0;
1713     self->incr_need = 1;
1714    
1715     av_push (self->incr_count, newSViv (1));
1716     }
1717    
1718 root 1.41 do
1719 root 1.40 {
1720     if (!incr_parse (self, cborstr))
1721     {
1722     if (self->incr_need > self->max_size && self->max_size)
1723     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1724     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1725    
1726     break;
1727     }
1728    
1729 root 1.41 SV *sv;
1730     char *offset;
1731 root 1.40
1732 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1733     XPUSHs (sv);
1734 root 1.40
1735 root 1.41 sv_chop (cborstr, offset);
1736 root 1.40
1737 root 1.41 av_clear (self->incr_count);
1738     av_push (self->incr_count, newSViv (1));
1739 root 1.40
1740 root 1.41 self->incr_pos = 0;
1741     self->incr_need = self->incr_pos + 1;
1742 root 1.40 }
1743 root 1.42 while (ix);
1744 root 1.40 }
1745    
1746     void incr_reset (CBOR *self)
1747     CODE:
1748     {
1749     SvREFCNT_dec (self->incr_count);
1750     self->incr_count = 0;
1751     }
1752    
1753 root 1.27 void DESTROY (CBOR *self)
1754     PPCODE:
1755     cbor_free (self);
1756    
1757 root 1.1 PROTOTYPES: ENABLE
1758    
1759     void encode_cbor (SV *scalar)
1760 root 1.36 ALIAS:
1761     encode_cbor = 0
1762     encode_cbor_sharing = F_ALLOW_SHARING
1763 root 1.1 PPCODE:
1764     {
1765     CBOR cbor;
1766     cbor_init (&cbor);
1767 root 1.36 cbor.flags |= ix;
1768 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1769     XPUSHs (scalar);
1770     }
1771    
1772     void decode_cbor (SV *cborstr)
1773     PPCODE:
1774     {
1775     CBOR cbor;
1776     cbor_init (&cbor);
1777     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1778     XPUSHs (cborstr);
1779     }
1780    
1781 root 1.64 #ifdef __AFL_COMPILER
1782    
1783     void
1784     afl_init ()
1785     CODE:
1786     __AFL_INIT ();
1787    
1788     int
1789     afl_loop (unsigned int count = 10000)
1790     CODE:
1791     RETVAL = __AFL_LOOP (count);
1792     OUTPUT:
1793     RETVAL
1794    
1795     #endif
1796