ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.66
Committed: Sun Nov 29 21:32:01 2020 UTC (3 years, 5 months ago) by root
Branch: MAIN
Changes since 1.65: +133 -32 lines
Log Message:
ecb.h

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.35 // known major and minor types
34     enum cbor_type
35     {
36     MAJOR_SHIFT = 5,
37     MINOR_MASK = 0x1f,
38    
39     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41     MAJOR_BYTES = 2 << MAJOR_SHIFT,
42     MAJOR_TEXT = 3 << MAJOR_SHIFT,
43     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44     MAJOR_MAP = 5 << MAJOR_SHIFT,
45     MAJOR_TAG = 6 << MAJOR_SHIFT,
46     MAJOR_MISC = 7 << MAJOR_SHIFT,
47    
48     // INT/STRING/ARRAY/MAP subtypes
49     LENGTH_EXT1 = 24,
50     LENGTH_EXT2 = 25,
51     LENGTH_EXT4 = 26,
52     LENGTH_EXT8 = 27,
53    
54     // SIMPLE types (effectively MISC subtypes)
55     SIMPLE_FALSE = 20,
56     SIMPLE_TRUE = 21,
57     SIMPLE_NULL = 22,
58     SIMPLE_UNDEF = 23,
59    
60     // MISC subtype (unused)
61     MISC_EXT1 = 24,
62     MISC_FLOAT16 = 25,
63     MISC_FLOAT32 = 26,
64     MISC_FLOAT64 = 27,
65    
66     // BYTES/TEXT/ARRAY/MAP
67     MINOR_INDEF = 31,
68     };
69    
70 root 1.9 // known tags
71 root 1.8 enum cbor_tag
72     {
73 root 1.35 // extensions
74     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81    
82     // rfc7049
83     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85     CBOR_TAG_POS_BIGNUM = 2, // byte string
86     CBOR_TAG_NEG_BIGNUM = 3, // byte string
87     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88     CBOR_TAG_BIGFLOAT = 5, // array
89    
90     CBOR_TAG_CONV_B64U = 21, // base64url, any
91     CBOR_TAG_CONV_B64 = 22, // base64, any
92     CBOR_TAG_CONV_HEX = 23, // base16, any
93     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94    
95     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100 root 1.8
101 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 root 1.8 };
103    
104 root 1.66 // known forced types, also hardcoded in CBOR.pm
105     enum
106     {
107     AS_CBOR = 0,
108     AS_BYTES = 1,
109     AS_TEXT = 2,
110     AS_FLOAT16 = 3,
111     AS_FLOAT32 = 4,
112     AS_FLOAT64 = 5,
113     // possibly future enhancements: float, integer
114     };
115    
116 root 1.24 #define F_SHRINK 0x00000001UL
117     #define F_ALLOW_UNKNOWN 0x00000002UL
118 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
119 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
120 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
121     #define F_PACK_STRINGS 0x00000020UL
122     #define F_TEXT_KEYS 0x00000040UL
123     #define F_TEXT_STRINGS 0x00000080UL
124     #define F_VALIDATE_UTF8 0x00000100UL
125 root 1.1
126     #define INIT_SIZE 32 // initial scalar size to be allocated
127    
128     #define SB do {
129     #define SE } while (0)
130    
131     #define IN_RANGE_INC(type,val,beg,end) \
132     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
133     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
134    
135     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
136    
137     #ifdef USE_ITHREADS
138     # define CBOR_SLOW 1
139     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
140     #else
141     # define CBOR_SLOW 0
142     # define CBOR_STASH cbor_stash
143     #endif
144    
145 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
146 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
147 root 1.1
148     typedef struct {
149     U32 flags;
150     U32 max_depth;
151     STRLEN max_size;
152 root 1.27 SV *filter;
153 root 1.40
154     // for the incremental parser
155     STRLEN incr_pos; // the current offset into the text
156     STRLEN incr_need; // minimum bytes needed to decode
157     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
158 root 1.1 } CBOR;
159    
160 root 1.5 ecb_inline void
161 root 1.1 cbor_init (CBOR *cbor)
162     {
163     Zero (cbor, 1, CBOR);
164     cbor->max_depth = 512;
165     }
166    
167 root 1.27 ecb_inline void
168     cbor_free (CBOR *cbor)
169     {
170     SvREFCNT_dec (cbor->filter);
171 root 1.40 SvREFCNT_dec (cbor->incr_count);
172 root 1.27 }
173    
174 root 1.1 /////////////////////////////////////////////////////////////////////////////
175     // utility functions
176    
177 root 1.5 ecb_inline SV *
178 root 1.1 get_bool (const char *name)
179     {
180     SV *sv = get_sv (name, 1);
181    
182     SvREADONLY_on (sv);
183     SvREADONLY_on (SvRV (sv));
184    
185     return sv;
186     }
187    
188 root 1.5 ecb_inline void
189 root 1.1 shrink (SV *sv)
190     {
191     sv_utf8_downgrade (sv, 1);
192    
193     if (SvLEN (sv) > SvCUR (sv) + 1)
194     {
195     #ifdef SvPV_shrink_to_cur
196     SvPV_shrink_to_cur (sv);
197     #elif defined (SvPV_renew)
198     SvPV_renew (sv, SvCUR (sv) + 1);
199     #endif
200     }
201     }
202    
203 root 1.21 // minimum length of a string to be registered for stringref
204     ecb_inline int
205     minimum_string_length (UV idx)
206     {
207 root 1.61 return idx <= 23 ? 3
208     : idx <= 0xffU ? 4
209     : idx <= 0xffffU ? 5
210     : idx <= 0xffffffffU ? 7
211     : 11;
212 root 1.21 }
213    
214 root 1.1 /////////////////////////////////////////////////////////////////////////////
215     // encoder
216    
217     // structure used for encoding CBOR
218     typedef struct
219     {
220     char *cur; // SvPVX (sv) + current output position
221     char *end; // SvEND (sv)
222     SV *sv; // result scalar
223     CBOR cbor;
224     U32 depth; // recursion level
225 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
226     UV stringref_idx;
227 root 1.19 HV *shareable; // ptr => index, or 0
228     UV shareable_idx;
229 root 1.1 } enc_t;
230    
231 root 1.5 ecb_inline void
232 root 1.1 need (enc_t *enc, STRLEN len)
233     {
234 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
235 root 1.1 {
236     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
237     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
238     enc->cur = SvPVX (enc->sv) + cur;
239     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
240     }
241     }
242    
243 root 1.5 ecb_inline void
244 root 1.1 encode_ch (enc_t *enc, char ch)
245     {
246     need (enc, 1);
247     *enc->cur++ = ch;
248     }
249    
250 root 1.66 // used for tags, intregers, element counts and so on
251 root 1.1 static void
252     encode_uint (enc_t *enc, int major, UV len)
253     {
254     need (enc, 9);
255    
256 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
257 root 1.1 *enc->cur++ = major | len;
258 root 1.36 else if (ecb_expect_true (len <= 0xffU))
259 root 1.1 {
260 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
261 root 1.1 *enc->cur++ = len;
262     }
263 root 1.36 else if (len <= 0xffffU)
264 root 1.1 {
265 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
266 root 1.1 *enc->cur++ = len >> 8;
267     *enc->cur++ = len;
268     }
269 root 1.36 else if (len <= 0xffffffffU)
270 root 1.1 {
271 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
272 root 1.1 *enc->cur++ = len >> 24;
273     *enc->cur++ = len >> 16;
274     *enc->cur++ = len >> 8;
275     *enc->cur++ = len;
276     }
277 root 1.4 else
278 root 1.1 {
279 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
280 root 1.1 *enc->cur++ = len >> 56;
281     *enc->cur++ = len >> 48;
282     *enc->cur++ = len >> 40;
283     *enc->cur++ = len >> 32;
284     *enc->cur++ = len >> 24;
285     *enc->cur++ = len >> 16;
286     *enc->cur++ = len >> 8;
287     *enc->cur++ = len;
288     }
289     }
290    
291 root 1.66 // encodes a perl value into a CBOR integer
292     ecb_inline
293     encode_int (enc_t *enc, SV *sv)
294     {
295     if (SvIsUV (sv))
296     encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
297     else if (SvIVX (sv) >= 0)
298     encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
299     else
300     encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
301     }
302    
303 root 1.21 ecb_inline void
304     encode_tag (enc_t *enc, UV tag)
305     {
306 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
307 root 1.21 }
308    
309 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
310     ecb_noinline static void
311     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
312     {
313     STRLEN ulen = len;
314     U8 *p, *pend = (U8 *)str + len;
315    
316     for (p = (U8 *)str; p < pend; ++p)
317     ulen += *p >> 7; // count set high bits
318    
319     encode_uint (enc, MAJOR_TEXT, ulen);
320    
321     need (enc, ulen);
322     for (p = (U8 *)str; p < pend; ++p)
323     if (*p < 0x80)
324     *enc->cur++ = *p;
325     else
326     {
327     *enc->cur++ = 0xc0 + (*p >> 6);
328     *enc->cur++ = 0x80 + (*p & 63);
329     }
330     }
331    
332 root 1.30 ecb_inline void
333 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
334 root 1.30 {
335 root 1.53 if (ecb_expect_false (upgrade_utf8))
336 root 1.51 if (!utf8)
337     {
338 root 1.53 encode_str_utf8 (enc, utf8, str, len);
339 root 1.51 return;
340     }
341    
342 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
343 root 1.30 need (enc, len);
344     memcpy (enc->cur, str, len);
345     enc->cur += len;
346     }
347    
348 root 1.54 ecb_inline void
349 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
350 root 1.1 {
351 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
352 root 1.21 {
353 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
354 root 1.21
355     if (SvOK (*svp))
356     {
357     // already registered, use stringref
358     encode_tag (enc, CBOR_TAG_STRINGREF);
359 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
360 root 1.21 return;
361     }
362     else if (len >= minimum_string_length (enc->stringref_idx))
363     {
364     // register only
365     sv_setuv (*svp, enc->stringref_idx);
366     ++enc->stringref_idx;
367     }
368     }
369    
370 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
371 root 1.1 }
372    
373 root 1.66 ecb_inline void
374     encode_float16 (enc_t *enc, NV nv)
375     {
376     need (enc, 1+2);
377    
378     *enc->cur++ = MAJOR_MISC | MISC_FLOAT16;
379    
380     uint16_t fp = ecb_float_to_binary16 (nv);
381    
382     if (!ecb_big_endian ())
383     fp = ecb_bswap16 (fp);
384    
385     memcpy (enc->cur, &fp, 2);
386     enc->cur += 2;
387     }
388    
389     ecb_inline void
390     encode_float32 (enc_t *enc, NV nv)
391     {
392     need (enc, 1+4);
393    
394     *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
395    
396     uint32_t fp = ecb_float_to_binary32 (nv);
397    
398     if (!ecb_big_endian ())
399     fp = ecb_bswap32 (fp);
400    
401     memcpy (enc->cur, &fp, 4);
402     enc->cur += 4;
403     }
404    
405     ecb_inline void
406     encode_float64 (enc_t *enc, NV nv)
407     {
408     need (enc, 1+8);
409    
410     *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
411    
412     uint64_t fp = ecb_double_to_binary64 (nv);
413    
414     if (!ecb_big_endian ())
415     fp = ecb_bswap64 (fp);
416    
417     memcpy (enc->cur, &fp, 8);
418     enc->cur += 8;
419     }
420    
421     ecb_inline void
422     encode_forced (enc_t *enc, UV type, SV *sv)
423     {
424     switch (type)
425     {
426     case AS_CBOR:
427     {
428     STRLEN len;
429     char *str = SvPVbyte (sv, len);
430    
431     need (enc, len);
432     memcpy (enc->cur, str, len);
433     enc->cur += len;
434     }
435     break;
436    
437     case AS_BYTES:
438     {
439     STRLEN len;
440     char *str = SvPVbyte (sv, len);
441     encode_strref (enc, 0, 0, str, len);
442     }
443     break;
444    
445     case AS_TEXT:
446     {
447     STRLEN len;
448     char *str = SvPVutf8 (sv, len);
449     encode_strref (enc, 1, 1, str, len);
450     }
451     break;
452    
453     case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break;
454     case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break;
455     case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break;
456    
457     default:
458     croak ("encountered malformed CBOR::XS::Tagged object");
459     }
460     }
461    
462 root 1.1 static void encode_sv (enc_t *enc, SV *sv);
463    
464     static void
465     encode_av (enc_t *enc, AV *av)
466     {
467     int i, len = av_len (av);
468    
469     if (enc->depth >= enc->cbor.max_depth)
470     croak (ERR_NESTING_EXCEEDED);
471    
472     ++enc->depth;
473    
474 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
475 root 1.1
476 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
477 root 1.45 for (i = 0; i <= len; ++i)
478     {
479     SV **svp = av_fetch (av, i, 0);
480     encode_sv (enc, svp ? *svp : &PL_sv_undef);
481     }
482     else
483     for (i = 0; i <= len; ++i)
484 root 1.46 {
485     SV *sv = AvARRAY (av)[i];
486     encode_sv (enc, sv ? sv : &PL_sv_undef);
487     }
488 root 1.1
489     --enc->depth;
490     }
491    
492     static void
493     encode_hv (enc_t *enc, HV *hv)
494     {
495     HE *he;
496    
497     if (enc->depth >= enc->cbor.max_depth)
498     croak (ERR_NESTING_EXCEEDED);
499    
500     ++enc->depth;
501    
502     int pairs = hv_iterinit (hv);
503     int mg = SvMAGICAL (hv);
504    
505 root 1.63 if (ecb_expect_false (mg))
506 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
507 root 1.1 else
508 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
509 root 1.1
510     while ((he = hv_iternext (hv)))
511     {
512 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
513     encode_sv (enc, HeSVKEY (he));
514 root 1.1 else
515 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
516 root 1.1
517 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
518 root 1.1 }
519    
520 root 1.63 if (ecb_expect_false (mg))
521 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
522 root 1.1
523     --enc->depth;
524     }
525    
526     // encode objects, arrays and special \0=false and \1=true values.
527     static void
528     encode_rv (enc_t *enc, SV *sv)
529     {
530 root 1.19 SvGETMAGIC (sv);
531 root 1.1
532 root 1.19 svtype svt = SvTYPE (sv);
533 root 1.18
534 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
535 root 1.1 {
536 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
537     ? types_boolean_stash
538     : gv_stashpv ("Types::Serialiser::Boolean", 1);
539     HV *error_stash = !CBOR_SLOW || types_error_stash
540     ? types_error_stash
541     : gv_stashpv ("Types::Serialiser::Error", 1);
542 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
543     ? cbor_tagged_stash
544     : gv_stashpv ("CBOR::XS::Tagged" , 1);
545 root 1.1
546 root 1.11 HV *stash = SvSTASH (sv);
547    
548     if (stash == boolean_stash)
549 root 1.34 {
550 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
551 root 1.34 return;
552     }
553 root 1.11 else if (stash == error_stash)
554 root 1.34 {
555 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
556 root 1.34 return;
557     }
558 root 1.11 else if (stash == tagged_stash)
559 root 1.6 {
560     if (svt != SVt_PVAV)
561     croak ("encountered CBOR::XS::Tagged object that isn't an array");
562    
563 root 1.66 switch (av_len ((AV *)sv))
564     {
565     case 2-1:
566     // actually a tagged value
567     encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
568     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
569     break;
570    
571     case 3-1:
572     // a forced type [value, type, undef]
573     encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1));
574     break;
575    
576     default:
577     croak ("encountered malformed CBOR::XS::Tagged object");
578     }
579 root 1.34
580     return;
581 root 1.6 }
582 root 1.34 }
583    
584     if (ecb_expect_false (SvREFCNT (sv) > 1)
585     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
586     {
587 root 1.63 if (ecb_expect_false (!enc->shareable))
588 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
589    
590     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
591    
592     if (SvOK (*svp))
593     {
594     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
595 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
596 root 1.34 return;
597     }
598     else
599     {
600     sv_setuv (*svp, enc->shareable_idx);
601     ++enc->shareable_idx;
602     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
603     }
604     }
605    
606     if (ecb_expect_false (SvOBJECT (sv)))
607     {
608     HV *stash = SvSTASH (sv);
609     GV *method;
610    
611 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
612     croak ("encountered object '%s', but forbid_objects is enabled",
613     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
614     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
615 root 1.1 {
616 root 1.11 dSP;
617    
618 root 1.50 ENTER; SAVETMPS;
619     PUSHMARK (SP);
620 root 1.6 // we re-bless the reference to get overload and other niceties right
621 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
622    
623     PUTBACK;
624     // G_SCALAR ensures that return value is 1
625     call_sv ((SV *)GvCV (method), G_SCALAR);
626     SPAGAIN;
627    
628     // catch this surprisingly common error
629     if (SvROK (TOPs) && SvRV (TOPs) == sv)
630     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
631    
632     encode_sv (enc, POPs);
633    
634     PUTBACK;
635    
636     FREETMPS; LEAVE;
637     }
638     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
639     {
640     dSP;
641 root 1.6
642 root 1.50 ENTER; SAVETMPS;
643     PUSHMARK (SP);
644 root 1.11 EXTEND (SP, 2);
645     // we re-bless the reference to get overload and other niceties right
646     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
647     PUSHs (sv_cbor);
648 root 1.1
649 root 1.11 PUTBACK;
650     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
651     SPAGAIN;
652 root 1.6
653 root 1.11 // catch this surprisingly common error
654     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
655     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
656 root 1.6
657 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
658 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
659 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
660 root 1.6
661 root 1.65 {
662     int i;
663    
664     for (i = 0; i < count; ++i)
665     encode_sv (enc, SP[i + 1 - count]);
666    
667     SP -= count;
668     }
669 root 1.6
670 root 1.11 PUTBACK;
671 root 1.6
672 root 1.11 FREETMPS; LEAVE;
673 root 1.1 }
674 root 1.11 else
675     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
676     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
677 root 1.1 }
678     else if (svt == SVt_PVHV)
679     encode_hv (enc, (HV *)sv);
680     else if (svt == SVt_PVAV)
681     encode_av (enc, (AV *)sv);
682 root 1.18 else
683 root 1.1 {
684 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
685     encode_sv (enc, sv);
686 root 1.1 }
687     }
688    
689     static void
690     encode_nv (enc_t *enc, SV *sv)
691     {
692     double nv = SvNVX (sv);
693    
694     need (enc, 9);
695    
696 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
697     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
698 root 1.1 //TODO: maybe I32?
699 root 1.5 else if (ecb_expect_false (nv == (float)nv))
700 root 1.66 encode_float32 (enc, nv);
701 root 1.1 else
702 root 1.66 encode_float64 (enc, nv);
703 root 1.1 }
704    
705     static void
706     encode_sv (enc_t *enc, SV *sv)
707     {
708     SvGETMAGIC (sv);
709    
710     if (SvPOKp (sv))
711     {
712     STRLEN len;
713     char *str = SvPV (sv, len);
714 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
715 root 1.1 }
716     else if (SvNOKp (sv))
717     encode_nv (enc, sv);
718     else if (SvIOKp (sv))
719 root 1.66 encode_int (e,v sv);
720 root 1.1 else if (SvROK (sv))
721     encode_rv (enc, SvRV (sv));
722     else if (!SvOK (sv))
723 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
724 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
725 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
726 root 1.1 else
727     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
728     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
729     }
730    
731     static SV *
732     encode_cbor (SV *scalar, CBOR *cbor)
733     {
734 root 1.48 enc_t enc = { 0 };
735 root 1.1
736 root 1.63 enc.cbor = *cbor;
737     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
738     enc.cur = SvPVX (enc.sv);
739     enc.end = SvEND (enc.sv);
740 root 1.1
741     SvPOK_only (enc.sv);
742 root 1.20
743 root 1.32 if (cbor->flags & F_PACK_STRINGS)
744 root 1.20 {
745     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
746     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
747     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
748     }
749    
750 root 1.1 encode_sv (&enc, scalar);
751    
752     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
753     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
754    
755     if (enc.cbor.flags & F_SHRINK)
756     shrink (enc.sv);
757    
758     return enc.sv;
759     }
760    
761     /////////////////////////////////////////////////////////////////////////////
762     // decoder
763    
764     // structure used for decoding CBOR
765     typedef struct
766     {
767     U8 *cur; // current parser pointer
768     U8 *end; // end of input string
769     const char *err; // parse error, if != 0
770     CBOR cbor;
771     U32 depth; // recursion depth
772     U32 maxdepth; // recursion depth limit
773 root 1.19 AV *shareable;
774 root 1.20 AV *stringref;
775 root 1.27 SV *decode_tagged;
776 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
777 root 1.1 } dec_t;
778    
779 root 1.59 // set dec->err to ERRSV
780     ecb_cold static void
781     err_errsv (dec_t *dec)
782     {
783     if (!dec->err)
784     {
785     dec->err_sv = newSVsv (ERRSV);
786    
787     // chop off the trailing \n
788     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
789     *SvEND (dec->err_sv) = 0;
790    
791     dec->err = SvPVutf8_nolen (dec->err_sv);
792     }
793     }
794 root 1.1
795 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
796     ecb_cold static void
797     err_set (dec_t *dec, const char *reason)
798     {
799     if (!dec->err)
800     dec->err = reason;
801     }
802    
803     ecb_cold static void
804     err_unexpected_end (dec_t *dec)
805     {
806     err_set (dec, "unexpected end of CBOR data");
807     }
808    
809     #define ERR_DO(do) SB do; goto fail; SE
810     #define ERR(reason) ERR_DO (err_set (dec, reason))
811     #define ERR_ERRSV ERR_DO (err_errsv (dec))
812    
813 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
814 root 1.1
815 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
816 root 1.1 #define DEC_DEC_DEPTH --dec->depth
817    
818     static UV
819     decode_uint (dec_t *dec)
820     {
821 root 1.35 U8 m = *dec->cur & MINOR_MASK;
822     ++dec->cur;
823    
824     if (ecb_expect_true (m < LENGTH_EXT1))
825     return m;
826 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
827     {
828     WANT (1);
829     dec->cur += 1;
830     return dec->cur[-1];
831     }
832     else if (ecb_expect_true (m == LENGTH_EXT2))
833     {
834     WANT (2);
835     dec->cur += 2;
836     return (((UV)dec->cur[-2]) << 8)
837     | ((UV)dec->cur[-1]);
838     }
839     else if (ecb_expect_true (m == LENGTH_EXT4))
840     {
841     WANT (4);
842     dec->cur += 4;
843     return (((UV)dec->cur[-4]) << 24)
844     | (((UV)dec->cur[-3]) << 16)
845     | (((UV)dec->cur[-2]) << 8)
846     | ((UV)dec->cur[-1]);
847     }
848     else if (ecb_expect_true (m == LENGTH_EXT8))
849 root 1.1 {
850 root 1.36 WANT (8);
851     dec->cur += 8;
852 root 1.34
853 root 1.36 return
854 root 1.34 #if UVSIZE < 8
855 root 1.36 0
856 root 1.34 #else
857 root 1.36 (((UV)dec->cur[-8]) << 56)
858     | (((UV)dec->cur[-7]) << 48)
859     | (((UV)dec->cur[-6]) << 40)
860     | (((UV)dec->cur[-5]) << 32)
861 root 1.34 #endif
862 root 1.36 | (((UV)dec->cur[-4]) << 24)
863     | (((UV)dec->cur[-3]) << 16)
864     | (((UV)dec->cur[-2]) << 8)
865     | ((UV)dec->cur[-1]);
866 root 1.1 }
867 root 1.36 else
868     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
869 root 1.1
870     fail:
871     return 0;
872     }
873    
874     static SV *decode_sv (dec_t *dec);
875    
876     static SV *
877     decode_av (dec_t *dec)
878     {
879     AV *av = newAV ();
880    
881     DEC_INC_DEPTH;
882    
883 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
884 root 1.1 {
885     ++dec->cur;
886    
887     for (;;)
888     {
889     WANT (1);
890    
891 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
892 root 1.1 {
893     ++dec->cur;
894     break;
895     }
896    
897     av_push (av, decode_sv (dec));
898     }
899     }
900     else
901     {
902 root 1.57 UV i, len = decode_uint (dec);
903 root 1.1
904 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
905 root 1.1 av_fill (av, len - 1);
906    
907     for (i = 0; i < len; ++i)
908     AvARRAY (av)[i] = decode_sv (dec);
909     }
910    
911     DEC_DEC_DEPTH;
912     return newRV_noinc ((SV *)av);
913    
914     fail:
915 root 1.63 SvREFCNT_dec_NN (av);
916 root 1.1 DEC_DEC_DEPTH;
917     return &PL_sv_undef;
918     }
919    
920 root 1.16 static void
921     decode_he (dec_t *dec, HV *hv)
922     {
923     // for speed reasons, we specialcase single-string
924 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
925    
926 root 1.23 if (ecb_expect_true (!dec->stringref))
927 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
928 root 1.21 {
929 root 1.56 STRLEN len = decode_uint (dec);
930 root 1.21 char *key = (char *)dec->cur;
931 root 1.16
932 root 1.49 WANT (len);
933 root 1.21 dec->cur += len;
934    
935 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
936 root 1.16
937 root 1.21 return;
938     }
939 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
940 root 1.21 {
941 root 1.56 STRLEN len = decode_uint (dec);
942 root 1.21 char *key = (char *)dec->cur;
943 root 1.16
944 root 1.49 WANT (len);
945 root 1.21 dec->cur += len;
946 root 1.20
947 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
948     if (!is_utf8_string (key, len))
949     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
950 root 1.16
951 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
952 root 1.16
953 root 1.21 return;
954     }
955 root 1.20
956 root 1.21 SV *k = decode_sv (dec);
957     SV *v = decode_sv (dec);
958 root 1.16
959 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
960     // methods, and this is hopefully the only place where it can happen,
961     // so if there is a chance of an exception, take the very slow path.
962     // since catching exceptions is "undocumented/internal/forbidden" by
963     // the new p5p powers, we need to call out to a perl function :/
964     if (ecb_expect_false (SvAMAGIC (k)))
965     {
966     dSP;
967    
968     ENTER; SAVETMPS;
969     PUSHMARK (SP);
970     EXTEND (SP, 3);
971     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
972     PUSHs (sv_2mortal (k));
973     PUSHs (sv_2mortal (v));
974    
975     PUTBACK;
976     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
977     SPAGAIN;
978    
979     FREETMPS; LEAVE;
980    
981     if (SvTRUE (ERRSV))
982     ERR_ERRSV;
983    
984     return;
985     }
986    
987 root 1.21 hv_store_ent (hv, k, v, 0);
988 root 1.63 SvREFCNT_dec_NN (k);
989 root 1.38
990     fail:
991     ;
992 root 1.16 }
993    
994 root 1.1 static SV *
995     decode_hv (dec_t *dec)
996     {
997     HV *hv = newHV ();
998    
999     DEC_INC_DEPTH;
1000    
1001 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
1002 root 1.1 {
1003     ++dec->cur;
1004    
1005     for (;;)
1006     {
1007     WANT (1);
1008    
1009 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
1010 root 1.1 {
1011     ++dec->cur;
1012     break;
1013     }
1014    
1015 root 1.16 decode_he (dec, hv);
1016 root 1.1 }
1017     }
1018     else
1019     {
1020 root 1.57 UV pairs = decode_uint (dec);
1021    
1022     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
1023 root 1.1
1024 root 1.16 while (pairs--)
1025     decode_he (dec, hv);
1026 root 1.1 }
1027    
1028     DEC_DEC_DEPTH;
1029     return newRV_noinc ((SV *)hv);
1030    
1031     fail:
1032 root 1.63 SvREFCNT_dec_NN (hv);
1033 root 1.1 DEC_DEC_DEPTH;
1034     return &PL_sv_undef;
1035     }
1036    
1037     static SV *
1038     decode_str (dec_t *dec, int utf8)
1039     {
1040 root 1.6 SV *sv = 0;
1041 root 1.1
1042 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
1043 root 1.1 {
1044 root 1.33 // indefinite length strings
1045 root 1.1 ++dec->cur;
1046    
1047 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
1048 root 1.33
1049 root 1.1 sv = newSVpvn ("", 0);
1050    
1051     for (;;)
1052     {
1053     WANT (1);
1054    
1055 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
1056     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
1057 root 1.33 {
1058     ++dec->cur;
1059     break;
1060     }
1061     else
1062     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
1063    
1064     STRLEN len = decode_uint (dec);
1065 root 1.1
1066 root 1.33 WANT (len);
1067     sv_catpvn (sv, dec->cur, len);
1068     dec->cur += len;
1069 root 1.1 }
1070     }
1071     else
1072     {
1073     STRLEN len = decode_uint (dec);
1074    
1075     WANT (len);
1076     sv = newSVpvn (dec->cur, len);
1077     dec->cur += len;
1078 root 1.25
1079     if (ecb_expect_false (dec->stringref)
1080     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
1081     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
1082 root 1.1 }
1083    
1084     if (utf8)
1085 root 1.38 {
1086     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
1087     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
1088     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
1089    
1090     SvUTF8_on (sv);
1091     }
1092 root 1.1
1093     return sv;
1094    
1095     fail:
1096 root 1.6 SvREFCNT_dec (sv);
1097 root 1.1 return &PL_sv_undef;
1098     }
1099    
1100     static SV *
1101 root 1.3 decode_tagged (dec_t *dec)
1102     {
1103 root 1.19 SV *sv = 0;
1104 root 1.3 UV tag = decode_uint (dec);
1105 root 1.19
1106     WANT (1);
1107 root 1.3
1108 root 1.18 switch (tag)
1109     {
1110     case CBOR_TAG_MAGIC:
1111 root 1.20 sv = decode_sv (dec);
1112     break;
1113 root 1.18
1114     case CBOR_TAG_INDIRECTION:
1115 root 1.20 sv = newRV_noinc (decode_sv (dec));
1116     break;
1117    
1118     case CBOR_TAG_STRINGREF_NAMESPACE:
1119     {
1120 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1121 root 1.55 // erase mortalised caches, e.g. "shareable"
1122     ENTER;
1123 root 1.20
1124     SAVESPTR (dec->stringref);
1125     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1126    
1127     sv = decode_sv (dec);
1128    
1129 root 1.55 LEAVE;
1130 root 1.20 }
1131     break;
1132    
1133     case CBOR_TAG_STRINGREF:
1134     {
1135 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1136 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1137    
1138     UV idx = decode_uint (dec);
1139    
1140 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1141 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1142    
1143     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1144     }
1145     break;
1146 root 1.11
1147 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1148     {
1149     if (ecb_expect_false (!dec->shareable))
1150     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1151    
1152 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1153     {
1154     sv = newSV (0);
1155     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1156 root 1.18
1157 root 1.37 SV *osv = decode_sv (dec);
1158     sv_setsv (sv, osv);
1159     SvREFCNT_dec_NN (osv);
1160     }
1161     else
1162     {
1163     av_push (dec->shareable, &PL_sv_undef);
1164     int idx = AvFILLp (dec->shareable);
1165     sv = decode_sv (dec);
1166     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1167     }
1168 root 1.19 }
1169 root 1.20 break;
1170 root 1.18
1171     case CBOR_TAG_VALUE_SHAREDREF:
1172 root 1.17 {
1173 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1174 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1175 root 1.18
1176 root 1.19 UV idx = decode_uint (dec);
1177    
1178 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1179 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1180    
1181 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1182 root 1.37
1183     if (sv == &PL_sv_undef)
1184     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1185 root 1.17 }
1186 root 1.20 break;
1187 root 1.17
1188 root 1.18 case CBOR_TAG_PERL_OBJECT:
1189     {
1190 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1191     goto filter;
1192    
1193 root 1.19 sv = decode_sv (dec);
1194    
1195 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1196     ERR ("corrupted CBOR data (non-array perl object)");
1197    
1198     AV *av = (AV *)SvRV (sv);
1199     int len = av_len (av) + 1;
1200     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1201    
1202     if (!stash)
1203     ERR ("cannot decode perl-object (package does not exist)");
1204    
1205     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1206    
1207     if (!method)
1208     ERR ("cannot decode perl-object (package does not have a THAW method)");
1209    
1210     dSP;
1211    
1212 root 1.50 ENTER; SAVETMPS;
1213     PUSHMARK (SP);
1214 root 1.18 EXTEND (SP, len + 1);
1215     // we re-bless the reference to get overload and other niceties right
1216     PUSHs (*av_fetch (av, 0, 1));
1217     PUSHs (sv_cbor);
1218    
1219     int i;
1220    
1221     for (i = 1; i < len; ++i)
1222     PUSHs (*av_fetch (av, i, 1));
1223    
1224     PUTBACK;
1225     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1226     SPAGAIN;
1227    
1228     if (SvTRUE (ERRSV))
1229     {
1230     FREETMPS; LEAVE;
1231 root 1.59 ERR_ERRSV;
1232 root 1.18 }
1233    
1234 root 1.63 SvREFCNT_dec_NN (sv);
1235 root 1.18 sv = SvREFCNT_inc (POPs);
1236 root 1.11
1237 root 1.18 PUTBACK;
1238 root 1.11
1239 root 1.18 FREETMPS; LEAVE;
1240     }
1241 root 1.20 break;
1242 root 1.9
1243 root 1.18 default:
1244 root 1.60 filter:
1245 root 1.18 {
1246 root 1.58 SV *tag_sv = newSVuv (tag);
1247    
1248 root 1.19 sv = decode_sv (dec);
1249    
1250 root 1.27 dSP;
1251 root 1.50 ENTER; SAVETMPS;
1252     PUSHMARK (SP);
1253 root 1.27 EXTEND (SP, 2);
1254 root 1.58 PUSHs (tag_sv);
1255 root 1.27 PUSHs (sv);
1256    
1257     PUTBACK;
1258     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1259     SPAGAIN;
1260    
1261     if (SvTRUE (ERRSV))
1262     {
1263 root 1.63 SvREFCNT_dec_NN (tag_sv);
1264 root 1.27 FREETMPS; LEAVE;
1265 root 1.59 ERR_ERRSV;
1266 root 1.27 }
1267    
1268     if (count)
1269     {
1270 root 1.63 SvREFCNT_dec_NN (tag_sv);
1271     SvREFCNT_dec_NN (sv);
1272 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1273     SP -= count;
1274 root 1.27 }
1275     else
1276     {
1277     AV *av = newAV ();
1278 root 1.58 av_push (av, tag_sv);
1279 root 1.27 av_push (av, sv);
1280    
1281     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1282     ? cbor_tagged_stash
1283     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1284     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1285     }
1286 root 1.7
1287 root 1.27 PUTBACK;
1288    
1289     FREETMPS; LEAVE;
1290 root 1.18 }
1291 root 1.20 break;
1292 root 1.11 }
1293 root 1.9
1294 root 1.20 return sv;
1295    
1296 root 1.9 fail:
1297     SvREFCNT_dec (sv);
1298     return &PL_sv_undef;
1299 root 1.3 }
1300    
1301     static SV *
1302 root 1.1 decode_sv (dec_t *dec)
1303     {
1304     WANT (1);
1305    
1306 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1307 root 1.1 {
1308 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1309     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1310     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1311     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1312     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1313     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1314     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1315    
1316     case MAJOR_MISC >> MAJOR_SHIFT:
1317     switch (*dec->cur++ & MINOR_MASK)
1318 root 1.1 {
1319 root 1.35 case SIMPLE_FALSE:
1320 root 1.1 #if CBOR_SLOW
1321 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1322 root 1.1 #endif
1323 root 1.10 return newSVsv (types_false);
1324 root 1.35 case SIMPLE_TRUE:
1325 root 1.1 #if CBOR_SLOW
1326 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1327 root 1.1 #endif
1328 root 1.10 return newSVsv (types_true);
1329 root 1.35 case SIMPLE_NULL:
1330 root 1.1 return newSVsv (&PL_sv_undef);
1331 root 1.35 case SIMPLE_UNDEF:
1332 root 1.10 #if CBOR_SLOW
1333     types_error = get_bool ("Types::Serialiser::error");
1334     #endif
1335     return newSVsv (types_error);
1336 root 1.1
1337 root 1.35 case MISC_FLOAT16:
1338 root 1.2 {
1339     WANT (2);
1340    
1341     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1342     dec->cur += 2;
1343    
1344     return newSVnv (ecb_binary16_to_float (fp));
1345     }
1346 root 1.1
1347 root 1.35 case MISC_FLOAT32:
1348 root 1.1 {
1349     uint32_t fp;
1350     WANT (4);
1351     memcpy (&fp, dec->cur, 4);
1352     dec->cur += 4;
1353    
1354     if (!ecb_big_endian ())
1355     fp = ecb_bswap32 (fp);
1356    
1357     return newSVnv (ecb_binary32_to_float (fp));
1358     }
1359    
1360 root 1.35 case MISC_FLOAT64:
1361 root 1.1 {
1362     uint64_t fp;
1363     WANT (8);
1364     memcpy (&fp, dec->cur, 8);
1365     dec->cur += 8;
1366    
1367     if (!ecb_big_endian ())
1368     fp = ecb_bswap64 (fp);
1369    
1370     return newSVnv (ecb_binary64_to_double (fp));
1371     }
1372    
1373 root 1.35 // 0..19 unassigned simple
1374 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1375     // 28-30 unassigned misc
1376     // 31 break code
1377 root 1.1 default:
1378 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1379 root 1.1 }
1380    
1381     break;
1382     }
1383    
1384     fail:
1385     return &PL_sv_undef;
1386     }
1387    
1388     static SV *
1389     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1390     {
1391 root 1.48 dec_t dec = { 0 };
1392 root 1.1 SV *sv;
1393 root 1.16 STRLEN len;
1394     char *data = SvPVbyte (string, len);
1395 root 1.1
1396 root 1.16 if (len > cbor->max_size && cbor->max_size)
1397     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1398     (unsigned long)len, (unsigned long)cbor->max_size);
1399 root 1.1
1400     dec.cbor = *cbor;
1401 root 1.16 dec.cur = (U8 *)data;
1402     dec.end = (U8 *)data + len;
1403 root 1.1
1404     sv = decode_sv (&dec);
1405    
1406     if (offset_return)
1407     *offset_return = dec.cur;
1408    
1409     if (!(offset_return || !sv))
1410 root 1.2 if (dec.cur != dec.end && !dec.err)
1411     dec.err = "garbage after CBOR object";
1412    
1413     if (dec.err)
1414 root 1.1 {
1415 root 1.39 if (dec.shareable)
1416     {
1417 root 1.58 // need to break cyclic links, which would all be in shareable
1418 root 1.39 int i;
1419     SV **svp;
1420    
1421     for (i = av_len (dec.shareable) + 1; i--; )
1422     if ((svp = av_fetch (dec.shareable, i, 0)))
1423     sv_setsv (*svp, &PL_sv_undef);
1424     }
1425    
1426 root 1.63 SvREFCNT_dec_NN (sv);
1427 root 1.59
1428     if (dec.err_sv)
1429     sv_2mortal (dec.err_sv);
1430    
1431 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1432 root 1.1 }
1433    
1434     sv = sv_2mortal (sv);
1435    
1436     return sv;
1437     }
1438    
1439     /////////////////////////////////////////////////////////////////////////////
1440 root 1.40 // incremental parser
1441    
1442     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1443    
1444     // returns 0 for notyet, 1 for success or error
1445     static int
1446     incr_parse (CBOR *self, SV *cborstr)
1447     {
1448     STRLEN cur;
1449     SvPV (cborstr, cur);
1450    
1451     while (ecb_expect_true (self->incr_need <= cur))
1452     {
1453     // table of integer count bytes
1454     static I8 incr_len[MINOR_MASK + 1] = {
1455     0, 0, 0, 0, 0, 0, 0, 0,
1456     0, 0, 0, 0, 0, 0, 0, 0,
1457     0, 0, 0, 0, 0, 0, 0, 0,
1458     1, 2, 4, 8,-1,-1,-1,-2
1459     };
1460    
1461     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1462     U8 m = *p & MINOR_MASK;
1463     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1464     I8 ilen = incr_len[m];
1465    
1466     self->incr_need = self->incr_pos + 1;
1467    
1468     if (ecb_expect_false (ilen < 0))
1469     {
1470     if (m != MINOR_INDEF)
1471     return 1; // error
1472    
1473     if (*p == (MAJOR_MISC | MINOR_INDEF))
1474     {
1475     if (count >= 0)
1476     return 1; // error
1477    
1478     count = 1;
1479     }
1480     else
1481     {
1482     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1483     count = -1;
1484     }
1485     }
1486     else
1487     {
1488     self->incr_need += ilen;
1489     if (ecb_expect_false (self->incr_need > cur))
1490     return 0;
1491    
1492     int major = *p >> MAJOR_SHIFT;
1493    
1494     switch (major)
1495     {
1496 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1497     ++count; // tags merely prefix another value
1498     break;
1499    
1500 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1501     case MAJOR_TEXT >> MAJOR_SHIFT:
1502     case MAJOR_ARRAY >> MAJOR_SHIFT:
1503     case MAJOR_MAP >> MAJOR_SHIFT:
1504     {
1505     UV len;
1506    
1507     if (ecb_expect_false (ilen))
1508     {
1509     len = 0;
1510    
1511     do {
1512     len = (len << 8) | *++p;
1513     } while (--ilen);
1514     }
1515     else
1516     len = m;
1517    
1518     switch (major)
1519     {
1520     case MAJOR_BYTES >> MAJOR_SHIFT:
1521     case MAJOR_TEXT >> MAJOR_SHIFT:
1522     self->incr_need += len;
1523     if (ecb_expect_false (self->incr_need > cur))
1524     return 0;
1525    
1526     break;
1527    
1528     case MAJOR_MAP >> MAJOR_SHIFT:
1529     len <<= 1;
1530     case MAJOR_ARRAY >> MAJOR_SHIFT:
1531     if (len)
1532     {
1533     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1534     count = len + 1;
1535     }
1536     break;
1537     }
1538     }
1539     }
1540     }
1541    
1542     self->incr_pos = self->incr_need;
1543    
1544     if (count > 0)
1545     {
1546     while (!--count)
1547     {
1548     if (!AvFILLp (self->incr_count))
1549     return 1; // done
1550    
1551     SvREFCNT_dec_NN (av_pop (self->incr_count));
1552     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1553     }
1554    
1555     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1556     }
1557     }
1558    
1559     return 0;
1560     }
1561    
1562    
1563     /////////////////////////////////////////////////////////////////////////////
1564 root 1.1 // XS interface functions
1565    
1566     MODULE = CBOR::XS PACKAGE = CBOR::XS
1567    
1568     BOOT:
1569     {
1570     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1571 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1572 root 1.1
1573 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1574     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1575    
1576     types_true = get_bool ("Types::Serialiser::true" );
1577     types_false = get_bool ("Types::Serialiser::false");
1578     types_error = get_bool ("Types::Serialiser::error");
1579 root 1.11
1580 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1581    
1582 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1583     SvREADONLY_on (sv_cbor);
1584 root 1.56
1585     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1586 root 1.1 }
1587    
1588     PROTOTYPES: DISABLE
1589    
1590     void CLONE (...)
1591     CODE:
1592 root 1.10 cbor_stash = 0;
1593     cbor_tagged_stash = 0;
1594     types_error_stash = 0;
1595     types_boolean_stash = 0;
1596 root 1.1
1597     void new (char *klass)
1598     PPCODE:
1599     {
1600     SV *pv = NEWSV (0, sizeof (CBOR));
1601     SvPOK_only (pv);
1602     cbor_init ((CBOR *)SvPVX (pv));
1603     XPUSHs (sv_2mortal (sv_bless (
1604     newRV_noinc (pv),
1605     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1606     )));
1607     }
1608    
1609     void shrink (CBOR *self, int enable = 1)
1610     ALIAS:
1611     shrink = F_SHRINK
1612     allow_unknown = F_ALLOW_UNKNOWN
1613 root 1.18 allow_sharing = F_ALLOW_SHARING
1614 root 1.37 allow_cycles = F_ALLOW_CYCLES
1615 root 1.60 forbid_objects = F_FORBID_OBJECTS
1616 root 1.32 pack_strings = F_PACK_STRINGS
1617 root 1.54 text_keys = F_TEXT_KEYS
1618     text_strings = F_TEXT_STRINGS
1619 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1620 root 1.1 PPCODE:
1621     {
1622     if (enable)
1623     self->flags |= ix;
1624     else
1625     self->flags &= ~ix;
1626    
1627     XPUSHs (ST (0));
1628     }
1629    
1630     void get_shrink (CBOR *self)
1631     ALIAS:
1632     get_shrink = F_SHRINK
1633     get_allow_unknown = F_ALLOW_UNKNOWN
1634 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1635 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1636 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1637 root 1.32 get_pack_strings = F_PACK_STRINGS
1638 root 1.54 get_text_keys = F_TEXT_KEYS
1639     get_text_strings = F_TEXT_STRINGS
1640 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1641 root 1.1 PPCODE:
1642     XPUSHs (boolSV (self->flags & ix));
1643    
1644     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1645     PPCODE:
1646     self->max_depth = max_depth;
1647     XPUSHs (ST (0));
1648    
1649     U32 get_max_depth (CBOR *self)
1650     CODE:
1651     RETVAL = self->max_depth;
1652     OUTPUT:
1653     RETVAL
1654    
1655     void max_size (CBOR *self, U32 max_size = 0)
1656     PPCODE:
1657     self->max_size = max_size;
1658     XPUSHs (ST (0));
1659    
1660     int get_max_size (CBOR *self)
1661     CODE:
1662     RETVAL = self->max_size;
1663     OUTPUT:
1664     RETVAL
1665    
1666 root 1.27 void filter (CBOR *self, SV *filter = 0)
1667     PPCODE:
1668     SvREFCNT_dec (self->filter);
1669     self->filter = filter ? newSVsv (filter) : filter;
1670     XPUSHs (ST (0));
1671    
1672     SV *get_filter (CBOR *self)
1673     CODE:
1674     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1675     OUTPUT:
1676     RETVAL
1677    
1678 root 1.1 void encode (CBOR *self, SV *scalar)
1679     PPCODE:
1680     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1681     XPUSHs (scalar);
1682    
1683     void decode (CBOR *self, SV *cborstr)
1684     PPCODE:
1685     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1686     XPUSHs (cborstr);
1687    
1688     void decode_prefix (CBOR *self, SV *cborstr)
1689     PPCODE:
1690     {
1691     SV *sv;
1692     char *offset;
1693     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1694     EXTEND (SP, 2);
1695     PUSHs (sv);
1696     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1697     }
1698    
1699 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1700 root 1.42 ALIAS:
1701     incr_parse_multiple = 1
1702 root 1.40 PPCODE:
1703     {
1704     if (SvUTF8 (cborstr))
1705     sv_utf8_downgrade (cborstr, 0);
1706    
1707     if (!self->incr_count)
1708     {
1709     self->incr_count = newAV ();
1710     self->incr_pos = 0;
1711     self->incr_need = 1;
1712    
1713     av_push (self->incr_count, newSViv (1));
1714     }
1715    
1716 root 1.41 do
1717 root 1.40 {
1718     if (!incr_parse (self, cborstr))
1719     {
1720     if (self->incr_need > self->max_size && self->max_size)
1721     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1722     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1723    
1724     break;
1725     }
1726    
1727 root 1.41 SV *sv;
1728     char *offset;
1729 root 1.40
1730 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1731     XPUSHs (sv);
1732 root 1.40
1733 root 1.41 sv_chop (cborstr, offset);
1734 root 1.40
1735 root 1.41 av_clear (self->incr_count);
1736     av_push (self->incr_count, newSViv (1));
1737 root 1.40
1738 root 1.41 self->incr_pos = 0;
1739     self->incr_need = self->incr_pos + 1;
1740 root 1.40 }
1741 root 1.42 while (ix);
1742 root 1.40 }
1743    
1744     void incr_reset (CBOR *self)
1745     CODE:
1746     {
1747     SvREFCNT_dec (self->incr_count);
1748     self->incr_count = 0;
1749     }
1750    
1751 root 1.27 void DESTROY (CBOR *self)
1752     PPCODE:
1753     cbor_free (self);
1754    
1755 root 1.1 PROTOTYPES: ENABLE
1756    
1757     void encode_cbor (SV *scalar)
1758 root 1.36 ALIAS:
1759     encode_cbor = 0
1760     encode_cbor_sharing = F_ALLOW_SHARING
1761 root 1.1 PPCODE:
1762     {
1763     CBOR cbor;
1764     cbor_init (&cbor);
1765 root 1.36 cbor.flags |= ix;
1766 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1767     XPUSHs (scalar);
1768     }
1769    
1770     void decode_cbor (SV *cborstr)
1771     PPCODE:
1772     {
1773     CBOR cbor;
1774     cbor_init (&cbor);
1775     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1776     XPUSHs (cborstr);
1777     }
1778    
1779 root 1.64 #ifdef __AFL_COMPILER
1780    
1781     void
1782     afl_init ()
1783     CODE:
1784     __AFL_INIT ();
1785    
1786     int
1787     afl_loop (unsigned int count = 10000)
1788     CODE:
1789     RETVAL = __AFL_LOOP (count);
1790     OUTPUT:
1791     RETVAL
1792    
1793     #endif
1794