ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.65
Committed: Thu Nov 15 19:52:41 2018 UTC (5 years, 6 months ago) by root
Branch: MAIN
CVS Tags: rel-1_71
Changes since 1.64: +10 -5 lines
Log Message:
1.71

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.35 // known major and minor types
34     enum cbor_type
35     {
36     MAJOR_SHIFT = 5,
37     MINOR_MASK = 0x1f,
38    
39     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41     MAJOR_BYTES = 2 << MAJOR_SHIFT,
42     MAJOR_TEXT = 3 << MAJOR_SHIFT,
43     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44     MAJOR_MAP = 5 << MAJOR_SHIFT,
45     MAJOR_TAG = 6 << MAJOR_SHIFT,
46     MAJOR_MISC = 7 << MAJOR_SHIFT,
47    
48     // INT/STRING/ARRAY/MAP subtypes
49     LENGTH_EXT1 = 24,
50     LENGTH_EXT2 = 25,
51     LENGTH_EXT4 = 26,
52     LENGTH_EXT8 = 27,
53    
54     // SIMPLE types (effectively MISC subtypes)
55     SIMPLE_FALSE = 20,
56     SIMPLE_TRUE = 21,
57     SIMPLE_NULL = 22,
58     SIMPLE_UNDEF = 23,
59    
60     // MISC subtype (unused)
61     MISC_EXT1 = 24,
62     MISC_FLOAT16 = 25,
63     MISC_FLOAT32 = 26,
64     MISC_FLOAT64 = 27,
65    
66     // BYTES/TEXT/ARRAY/MAP
67     MINOR_INDEF = 31,
68     };
69    
70 root 1.9 // known tags
71 root 1.8 enum cbor_tag
72     {
73 root 1.35 // extensions
74     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81    
82     // rfc7049
83     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85     CBOR_TAG_POS_BIGNUM = 2, // byte string
86     CBOR_TAG_NEG_BIGNUM = 3, // byte string
87     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88     CBOR_TAG_BIGFLOAT = 5, // array
89    
90     CBOR_TAG_CONV_B64U = 21, // base64url, any
91     CBOR_TAG_CONV_B64 = 22, // base64, any
92     CBOR_TAG_CONV_HEX = 23, // base16, any
93     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94    
95     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100 root 1.8
101 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 root 1.8 };
103    
104 root 1.24 #define F_SHRINK 0x00000001UL
105     #define F_ALLOW_UNKNOWN 0x00000002UL
106 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
107 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
108 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
109     #define F_PACK_STRINGS 0x00000020UL
110     #define F_TEXT_KEYS 0x00000040UL
111     #define F_TEXT_STRINGS 0x00000080UL
112     #define F_VALIDATE_UTF8 0x00000100UL
113 root 1.1
114     #define INIT_SIZE 32 // initial scalar size to be allocated
115    
116     #define SB do {
117     #define SE } while (0)
118    
119     #define IN_RANGE_INC(type,val,beg,end) \
120     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
121     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
122    
123     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
124    
125     #ifdef USE_ITHREADS
126     # define CBOR_SLOW 1
127     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
128     #else
129     # define CBOR_SLOW 0
130     # define CBOR_STASH cbor_stash
131     #endif
132    
133 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
134 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
135 root 1.1
136     typedef struct {
137     U32 flags;
138     U32 max_depth;
139     STRLEN max_size;
140 root 1.27 SV *filter;
141 root 1.40
142     // for the incremental parser
143     STRLEN incr_pos; // the current offset into the text
144     STRLEN incr_need; // minimum bytes needed to decode
145     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
146 root 1.1 } CBOR;
147    
148 root 1.5 ecb_inline void
149 root 1.1 cbor_init (CBOR *cbor)
150     {
151     Zero (cbor, 1, CBOR);
152     cbor->max_depth = 512;
153     }
154    
155 root 1.27 ecb_inline void
156     cbor_free (CBOR *cbor)
157     {
158     SvREFCNT_dec (cbor->filter);
159 root 1.40 SvREFCNT_dec (cbor->incr_count);
160 root 1.27 }
161    
162 root 1.1 /////////////////////////////////////////////////////////////////////////////
163     // utility functions
164    
165 root 1.5 ecb_inline SV *
166 root 1.1 get_bool (const char *name)
167     {
168     SV *sv = get_sv (name, 1);
169    
170     SvREADONLY_on (sv);
171     SvREADONLY_on (SvRV (sv));
172    
173     return sv;
174     }
175    
176 root 1.5 ecb_inline void
177 root 1.1 shrink (SV *sv)
178     {
179     sv_utf8_downgrade (sv, 1);
180    
181     if (SvLEN (sv) > SvCUR (sv) + 1)
182     {
183     #ifdef SvPV_shrink_to_cur
184     SvPV_shrink_to_cur (sv);
185     #elif defined (SvPV_renew)
186     SvPV_renew (sv, SvCUR (sv) + 1);
187     #endif
188     }
189     }
190    
191 root 1.21 // minimum length of a string to be registered for stringref
192     ecb_inline int
193     minimum_string_length (UV idx)
194     {
195 root 1.61 return idx <= 23 ? 3
196     : idx <= 0xffU ? 4
197     : idx <= 0xffffU ? 5
198     : idx <= 0xffffffffU ? 7
199     : 11;
200 root 1.21 }
201    
202 root 1.1 /////////////////////////////////////////////////////////////////////////////
203     // encoder
204    
205     // structure used for encoding CBOR
206     typedef struct
207     {
208     char *cur; // SvPVX (sv) + current output position
209     char *end; // SvEND (sv)
210     SV *sv; // result scalar
211     CBOR cbor;
212     U32 depth; // recursion level
213 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
214     UV stringref_idx;
215 root 1.19 HV *shareable; // ptr => index, or 0
216     UV shareable_idx;
217 root 1.1 } enc_t;
218    
219 root 1.5 ecb_inline void
220 root 1.1 need (enc_t *enc, STRLEN len)
221     {
222 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
223 root 1.1 {
224     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
225     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
226     enc->cur = SvPVX (enc->sv) + cur;
227     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
228     }
229     }
230    
231 root 1.5 ecb_inline void
232 root 1.1 encode_ch (enc_t *enc, char ch)
233     {
234     need (enc, 1);
235     *enc->cur++ = ch;
236     }
237    
238     static void
239     encode_uint (enc_t *enc, int major, UV len)
240     {
241     need (enc, 9);
242    
243 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
244 root 1.1 *enc->cur++ = major | len;
245 root 1.36 else if (ecb_expect_true (len <= 0xffU))
246 root 1.1 {
247 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
248 root 1.1 *enc->cur++ = len;
249     }
250 root 1.36 else if (len <= 0xffffU)
251 root 1.1 {
252 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
253 root 1.1 *enc->cur++ = len >> 8;
254     *enc->cur++ = len;
255     }
256 root 1.36 else if (len <= 0xffffffffU)
257 root 1.1 {
258 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
259 root 1.1 *enc->cur++ = len >> 24;
260     *enc->cur++ = len >> 16;
261     *enc->cur++ = len >> 8;
262     *enc->cur++ = len;
263     }
264 root 1.4 else
265 root 1.1 {
266 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
267 root 1.1 *enc->cur++ = len >> 56;
268     *enc->cur++ = len >> 48;
269     *enc->cur++ = len >> 40;
270     *enc->cur++ = len >> 32;
271     *enc->cur++ = len >> 24;
272     *enc->cur++ = len >> 16;
273     *enc->cur++ = len >> 8;
274     *enc->cur++ = len;
275     }
276     }
277    
278 root 1.21 ecb_inline void
279     encode_tag (enc_t *enc, UV tag)
280     {
281 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
282 root 1.21 }
283    
284 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
285     ecb_noinline static void
286     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
287     {
288     STRLEN ulen = len;
289     U8 *p, *pend = (U8 *)str + len;
290    
291     for (p = (U8 *)str; p < pend; ++p)
292     ulen += *p >> 7; // count set high bits
293    
294     encode_uint (enc, MAJOR_TEXT, ulen);
295    
296     need (enc, ulen);
297     for (p = (U8 *)str; p < pend; ++p)
298     if (*p < 0x80)
299     *enc->cur++ = *p;
300     else
301     {
302     *enc->cur++ = 0xc0 + (*p >> 6);
303     *enc->cur++ = 0x80 + (*p & 63);
304     }
305     }
306    
307 root 1.30 ecb_inline void
308 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
309 root 1.30 {
310 root 1.53 if (ecb_expect_false (upgrade_utf8))
311 root 1.51 if (!utf8)
312     {
313 root 1.53 encode_str_utf8 (enc, utf8, str, len);
314 root 1.51 return;
315     }
316    
317 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
318 root 1.30 need (enc, len);
319     memcpy (enc->cur, str, len);
320     enc->cur += len;
321     }
322    
323 root 1.54 ecb_inline void
324 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
325 root 1.1 {
326 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
327 root 1.21 {
328 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
329 root 1.21
330     if (SvOK (*svp))
331     {
332     // already registered, use stringref
333     encode_tag (enc, CBOR_TAG_STRINGREF);
334 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
335 root 1.21 return;
336     }
337     else if (len >= minimum_string_length (enc->stringref_idx))
338     {
339     // register only
340     sv_setuv (*svp, enc->stringref_idx);
341     ++enc->stringref_idx;
342     }
343     }
344    
345 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
346 root 1.1 }
347    
348     static void encode_sv (enc_t *enc, SV *sv);
349    
350     static void
351     encode_av (enc_t *enc, AV *av)
352     {
353     int i, len = av_len (av);
354    
355     if (enc->depth >= enc->cbor.max_depth)
356     croak (ERR_NESTING_EXCEEDED);
357    
358     ++enc->depth;
359    
360 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
361 root 1.1
362 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
363 root 1.45 for (i = 0; i <= len; ++i)
364     {
365     SV **svp = av_fetch (av, i, 0);
366     encode_sv (enc, svp ? *svp : &PL_sv_undef);
367     }
368     else
369     for (i = 0; i <= len; ++i)
370 root 1.46 {
371     SV *sv = AvARRAY (av)[i];
372     encode_sv (enc, sv ? sv : &PL_sv_undef);
373     }
374 root 1.1
375     --enc->depth;
376     }
377    
378     static void
379     encode_hv (enc_t *enc, HV *hv)
380     {
381     HE *he;
382    
383     if (enc->depth >= enc->cbor.max_depth)
384     croak (ERR_NESTING_EXCEEDED);
385    
386     ++enc->depth;
387    
388     int pairs = hv_iterinit (hv);
389     int mg = SvMAGICAL (hv);
390    
391 root 1.63 if (ecb_expect_false (mg))
392 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
393 root 1.1 else
394 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
395 root 1.1
396     while ((he = hv_iternext (hv)))
397     {
398 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
399     encode_sv (enc, HeSVKEY (he));
400 root 1.1 else
401 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
402 root 1.1
403 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
404 root 1.1 }
405    
406 root 1.63 if (ecb_expect_false (mg))
407 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
408 root 1.1
409     --enc->depth;
410     }
411    
412     // encode objects, arrays and special \0=false and \1=true values.
413     static void
414     encode_rv (enc_t *enc, SV *sv)
415     {
416 root 1.19 SvGETMAGIC (sv);
417 root 1.1
418 root 1.19 svtype svt = SvTYPE (sv);
419 root 1.18
420 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
421 root 1.1 {
422 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
423     ? types_boolean_stash
424     : gv_stashpv ("Types::Serialiser::Boolean", 1);
425     HV *error_stash = !CBOR_SLOW || types_error_stash
426     ? types_error_stash
427     : gv_stashpv ("Types::Serialiser::Error", 1);
428 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
429     ? cbor_tagged_stash
430     : gv_stashpv ("CBOR::XS::Tagged" , 1);
431 root 1.1
432 root 1.11 HV *stash = SvSTASH (sv);
433    
434     if (stash == boolean_stash)
435 root 1.34 {
436 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
437 root 1.34 return;
438     }
439 root 1.11 else if (stash == error_stash)
440 root 1.34 {
441 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
442 root 1.34 return;
443     }
444 root 1.11 else if (stash == tagged_stash)
445 root 1.6 {
446     if (svt != SVt_PVAV)
447     croak ("encountered CBOR::XS::Tagged object that isn't an array");
448    
449 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
450 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
451 root 1.34
452     return;
453 root 1.6 }
454 root 1.34 }
455    
456     if (ecb_expect_false (SvREFCNT (sv) > 1)
457     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
458     {
459 root 1.63 if (ecb_expect_false (!enc->shareable))
460 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
461    
462     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
463    
464     if (SvOK (*svp))
465     {
466     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
467 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
468 root 1.34 return;
469     }
470     else
471     {
472     sv_setuv (*svp, enc->shareable_idx);
473     ++enc->shareable_idx;
474     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
475     }
476     }
477    
478     if (ecb_expect_false (SvOBJECT (sv)))
479     {
480     HV *stash = SvSTASH (sv);
481     GV *method;
482    
483 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
484     croak ("encountered object '%s', but forbid_objects is enabled",
485     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
486     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
487 root 1.1 {
488 root 1.11 dSP;
489    
490 root 1.50 ENTER; SAVETMPS;
491     PUSHMARK (SP);
492 root 1.6 // we re-bless the reference to get overload and other niceties right
493 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
494    
495     PUTBACK;
496     // G_SCALAR ensures that return value is 1
497     call_sv ((SV *)GvCV (method), G_SCALAR);
498     SPAGAIN;
499    
500     // catch this surprisingly common error
501     if (SvROK (TOPs) && SvRV (TOPs) == sv)
502     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
503    
504     encode_sv (enc, POPs);
505    
506     PUTBACK;
507    
508     FREETMPS; LEAVE;
509     }
510     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
511     {
512     dSP;
513 root 1.6
514 root 1.50 ENTER; SAVETMPS;
515     PUSHMARK (SP);
516 root 1.11 EXTEND (SP, 2);
517     // we re-bless the reference to get overload and other niceties right
518     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
519     PUSHs (sv_cbor);
520 root 1.1
521 root 1.11 PUTBACK;
522     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
523     SPAGAIN;
524 root 1.6
525 root 1.11 // catch this surprisingly common error
526     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
527     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
528 root 1.6
529 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
530 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
531 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
532 root 1.6
533 root 1.65 {
534     int i;
535    
536     for (i = 0; i < count; ++i)
537     encode_sv (enc, SP[i + 1 - count]);
538    
539     SP -= count;
540     }
541 root 1.6
542 root 1.11 PUTBACK;
543 root 1.6
544 root 1.11 FREETMPS; LEAVE;
545 root 1.1 }
546 root 1.11 else
547     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
548     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
549 root 1.1 }
550     else if (svt == SVt_PVHV)
551     encode_hv (enc, (HV *)sv);
552     else if (svt == SVt_PVAV)
553     encode_av (enc, (AV *)sv);
554 root 1.18 else
555 root 1.1 {
556 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
557     encode_sv (enc, sv);
558 root 1.1 }
559     }
560    
561     static void
562     encode_nv (enc_t *enc, SV *sv)
563     {
564     double nv = SvNVX (sv);
565    
566     need (enc, 9);
567    
568 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
569     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
570 root 1.1 //TODO: maybe I32?
571 root 1.5 else if (ecb_expect_false (nv == (float)nv))
572 root 1.1 {
573 root 1.63 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
574    
575 root 1.1 uint32_t fp = ecb_float_to_binary32 (nv);
576    
577     if (!ecb_big_endian ())
578     fp = ecb_bswap32 (fp);
579    
580     memcpy (enc->cur, &fp, 4);
581     enc->cur += 4;
582     }
583     else
584     {
585 root 1.63 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
586    
587 root 1.1 uint64_t fp = ecb_double_to_binary64 (nv);
588    
589     if (!ecb_big_endian ())
590     fp = ecb_bswap64 (fp);
591    
592     memcpy (enc->cur, &fp, 8);
593     enc->cur += 8;
594     }
595     }
596    
597     static void
598     encode_sv (enc_t *enc, SV *sv)
599     {
600     SvGETMAGIC (sv);
601    
602     if (SvPOKp (sv))
603     {
604     STRLEN len;
605     char *str = SvPV (sv, len);
606 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
607 root 1.1 }
608     else if (SvNOKp (sv))
609     encode_nv (enc, sv);
610     else if (SvIOKp (sv))
611     {
612     if (SvIsUV (sv))
613 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
614 root 1.1 else if (SvIVX (sv) >= 0)
615 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
616 root 1.1 else
617 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
618 root 1.1 }
619     else if (SvROK (sv))
620     encode_rv (enc, SvRV (sv));
621     else if (!SvOK (sv))
622 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
623 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
624 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
625 root 1.1 else
626     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
627     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
628     }
629    
630     static SV *
631     encode_cbor (SV *scalar, CBOR *cbor)
632     {
633 root 1.48 enc_t enc = { 0 };
634 root 1.1
635 root 1.63 enc.cbor = *cbor;
636     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
637     enc.cur = SvPVX (enc.sv);
638     enc.end = SvEND (enc.sv);
639 root 1.1
640     SvPOK_only (enc.sv);
641 root 1.20
642 root 1.32 if (cbor->flags & F_PACK_STRINGS)
643 root 1.20 {
644     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
645     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
646     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
647     }
648    
649 root 1.1 encode_sv (&enc, scalar);
650    
651     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
652     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
653    
654     if (enc.cbor.flags & F_SHRINK)
655     shrink (enc.sv);
656    
657     return enc.sv;
658     }
659    
660     /////////////////////////////////////////////////////////////////////////////
661     // decoder
662    
663     // structure used for decoding CBOR
664     typedef struct
665     {
666     U8 *cur; // current parser pointer
667     U8 *end; // end of input string
668     const char *err; // parse error, if != 0
669     CBOR cbor;
670     U32 depth; // recursion depth
671     U32 maxdepth; // recursion depth limit
672 root 1.19 AV *shareable;
673 root 1.20 AV *stringref;
674 root 1.27 SV *decode_tagged;
675 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
676 root 1.1 } dec_t;
677    
678 root 1.59 // set dec->err to ERRSV
679     ecb_cold static void
680     err_errsv (dec_t *dec)
681     {
682     if (!dec->err)
683     {
684     dec->err_sv = newSVsv (ERRSV);
685    
686     // chop off the trailing \n
687     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
688     *SvEND (dec->err_sv) = 0;
689    
690     dec->err = SvPVutf8_nolen (dec->err_sv);
691     }
692     }
693 root 1.1
694 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
695     ecb_cold static void
696     err_set (dec_t *dec, const char *reason)
697     {
698     if (!dec->err)
699     dec->err = reason;
700     }
701    
702     ecb_cold static void
703     err_unexpected_end (dec_t *dec)
704     {
705     err_set (dec, "unexpected end of CBOR data");
706     }
707    
708     #define ERR_DO(do) SB do; goto fail; SE
709     #define ERR(reason) ERR_DO (err_set (dec, reason))
710     #define ERR_ERRSV ERR_DO (err_errsv (dec))
711    
712 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
713 root 1.1
714 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
715 root 1.1 #define DEC_DEC_DEPTH --dec->depth
716    
717     static UV
718     decode_uint (dec_t *dec)
719     {
720 root 1.35 U8 m = *dec->cur & MINOR_MASK;
721     ++dec->cur;
722    
723     if (ecb_expect_true (m < LENGTH_EXT1))
724     return m;
725 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
726     {
727     WANT (1);
728     dec->cur += 1;
729     return dec->cur[-1];
730     }
731     else if (ecb_expect_true (m == LENGTH_EXT2))
732     {
733     WANT (2);
734     dec->cur += 2;
735     return (((UV)dec->cur[-2]) << 8)
736     | ((UV)dec->cur[-1]);
737     }
738     else if (ecb_expect_true (m == LENGTH_EXT4))
739     {
740     WANT (4);
741     dec->cur += 4;
742     return (((UV)dec->cur[-4]) << 24)
743     | (((UV)dec->cur[-3]) << 16)
744     | (((UV)dec->cur[-2]) << 8)
745     | ((UV)dec->cur[-1]);
746     }
747     else if (ecb_expect_true (m == LENGTH_EXT8))
748 root 1.1 {
749 root 1.36 WANT (8);
750     dec->cur += 8;
751 root 1.34
752 root 1.36 return
753 root 1.34 #if UVSIZE < 8
754 root 1.36 0
755 root 1.34 #else
756 root 1.36 (((UV)dec->cur[-8]) << 56)
757     | (((UV)dec->cur[-7]) << 48)
758     | (((UV)dec->cur[-6]) << 40)
759     | (((UV)dec->cur[-5]) << 32)
760 root 1.34 #endif
761 root 1.36 | (((UV)dec->cur[-4]) << 24)
762     | (((UV)dec->cur[-3]) << 16)
763     | (((UV)dec->cur[-2]) << 8)
764     | ((UV)dec->cur[-1]);
765 root 1.1 }
766 root 1.36 else
767     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
768 root 1.1
769     fail:
770     return 0;
771     }
772    
773     static SV *decode_sv (dec_t *dec);
774    
775     static SV *
776     decode_av (dec_t *dec)
777     {
778     AV *av = newAV ();
779    
780     DEC_INC_DEPTH;
781    
782 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
783 root 1.1 {
784     ++dec->cur;
785    
786     for (;;)
787     {
788     WANT (1);
789    
790 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
791 root 1.1 {
792     ++dec->cur;
793     break;
794     }
795    
796     av_push (av, decode_sv (dec));
797     }
798     }
799     else
800     {
801 root 1.57 UV i, len = decode_uint (dec);
802 root 1.1
803 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
804 root 1.1 av_fill (av, len - 1);
805    
806     for (i = 0; i < len; ++i)
807     AvARRAY (av)[i] = decode_sv (dec);
808     }
809    
810     DEC_DEC_DEPTH;
811     return newRV_noinc ((SV *)av);
812    
813     fail:
814 root 1.63 SvREFCNT_dec_NN (av);
815 root 1.1 DEC_DEC_DEPTH;
816     return &PL_sv_undef;
817     }
818    
819 root 1.16 static void
820     decode_he (dec_t *dec, HV *hv)
821     {
822     // for speed reasons, we specialcase single-string
823 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
824    
825 root 1.23 if (ecb_expect_true (!dec->stringref))
826 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
827 root 1.21 {
828 root 1.56 STRLEN len = decode_uint (dec);
829 root 1.21 char *key = (char *)dec->cur;
830 root 1.16
831 root 1.49 WANT (len);
832 root 1.21 dec->cur += len;
833    
834 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
835 root 1.16
836 root 1.21 return;
837     }
838 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
839 root 1.21 {
840 root 1.56 STRLEN len = decode_uint (dec);
841 root 1.21 char *key = (char *)dec->cur;
842 root 1.16
843 root 1.49 WANT (len);
844 root 1.21 dec->cur += len;
845 root 1.20
846 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
847     if (!is_utf8_string (key, len))
848     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
849 root 1.16
850 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
851 root 1.16
852 root 1.21 return;
853     }
854 root 1.20
855 root 1.21 SV *k = decode_sv (dec);
856     SV *v = decode_sv (dec);
857 root 1.16
858 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
859     // methods, and this is hopefully the only place where it can happen,
860     // so if there is a chance of an exception, take the very slow path.
861     // since catching exceptions is "undocumented/internal/forbidden" by
862     // the new p5p powers, we need to call out to a perl function :/
863     if (ecb_expect_false (SvAMAGIC (k)))
864     {
865     dSP;
866    
867     ENTER; SAVETMPS;
868     PUSHMARK (SP);
869     EXTEND (SP, 3);
870     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
871     PUSHs (sv_2mortal (k));
872     PUSHs (sv_2mortal (v));
873    
874     PUTBACK;
875     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
876     SPAGAIN;
877    
878     FREETMPS; LEAVE;
879    
880     if (SvTRUE (ERRSV))
881     ERR_ERRSV;
882    
883     return;
884     }
885    
886 root 1.21 hv_store_ent (hv, k, v, 0);
887 root 1.63 SvREFCNT_dec_NN (k);
888 root 1.38
889     fail:
890     ;
891 root 1.16 }
892    
893 root 1.1 static SV *
894     decode_hv (dec_t *dec)
895     {
896     HV *hv = newHV ();
897    
898     DEC_INC_DEPTH;
899    
900 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
901 root 1.1 {
902     ++dec->cur;
903    
904     for (;;)
905     {
906     WANT (1);
907    
908 root 1.64 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err)
909 root 1.1 {
910     ++dec->cur;
911     break;
912     }
913    
914 root 1.16 decode_he (dec, hv);
915 root 1.1 }
916     }
917     else
918     {
919 root 1.57 UV pairs = decode_uint (dec);
920    
921     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
922 root 1.1
923 root 1.16 while (pairs--)
924     decode_he (dec, hv);
925 root 1.1 }
926    
927     DEC_DEC_DEPTH;
928     return newRV_noinc ((SV *)hv);
929    
930     fail:
931 root 1.63 SvREFCNT_dec_NN (hv);
932 root 1.1 DEC_DEC_DEPTH;
933     return &PL_sv_undef;
934     }
935    
936     static SV *
937     decode_str (dec_t *dec, int utf8)
938     {
939 root 1.6 SV *sv = 0;
940 root 1.1
941 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
942 root 1.1 {
943 root 1.33 // indefinite length strings
944 root 1.1 ++dec->cur;
945    
946 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
947 root 1.33
948 root 1.1 sv = newSVpvn ("", 0);
949    
950     for (;;)
951     {
952     WANT (1);
953    
954 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
955     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
956 root 1.33 {
957     ++dec->cur;
958     break;
959     }
960     else
961     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
962    
963     STRLEN len = decode_uint (dec);
964 root 1.1
965 root 1.33 WANT (len);
966     sv_catpvn (sv, dec->cur, len);
967     dec->cur += len;
968 root 1.1 }
969     }
970     else
971     {
972     STRLEN len = decode_uint (dec);
973    
974     WANT (len);
975     sv = newSVpvn (dec->cur, len);
976     dec->cur += len;
977 root 1.25
978     if (ecb_expect_false (dec->stringref)
979     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
980     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
981 root 1.1 }
982    
983     if (utf8)
984 root 1.38 {
985     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
986     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
987     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
988    
989     SvUTF8_on (sv);
990     }
991 root 1.1
992     return sv;
993    
994     fail:
995 root 1.6 SvREFCNT_dec (sv);
996 root 1.1 return &PL_sv_undef;
997     }
998    
999     static SV *
1000 root 1.3 decode_tagged (dec_t *dec)
1001     {
1002 root 1.19 SV *sv = 0;
1003 root 1.3 UV tag = decode_uint (dec);
1004 root 1.19
1005     WANT (1);
1006 root 1.3
1007 root 1.18 switch (tag)
1008     {
1009     case CBOR_TAG_MAGIC:
1010 root 1.20 sv = decode_sv (dec);
1011     break;
1012 root 1.18
1013     case CBOR_TAG_INDIRECTION:
1014 root 1.20 sv = newRV_noinc (decode_sv (dec));
1015     break;
1016    
1017     case CBOR_TAG_STRINGREF_NAMESPACE:
1018     {
1019 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1020 root 1.55 // erase mortalised caches, e.g. "shareable"
1021     ENTER;
1022 root 1.20
1023     SAVESPTR (dec->stringref);
1024     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1025    
1026     sv = decode_sv (dec);
1027    
1028 root 1.55 LEAVE;
1029 root 1.20 }
1030     break;
1031    
1032     case CBOR_TAG_STRINGREF:
1033     {
1034 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1035 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1036    
1037     UV idx = decode_uint (dec);
1038    
1039 root 1.64 if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref)))
1040 root 1.20 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1041    
1042     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1043     }
1044     break;
1045 root 1.11
1046 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1047     {
1048     if (ecb_expect_false (!dec->shareable))
1049     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1050    
1051 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1052     {
1053     sv = newSV (0);
1054     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1055 root 1.18
1056 root 1.37 SV *osv = decode_sv (dec);
1057     sv_setsv (sv, osv);
1058     SvREFCNT_dec_NN (osv);
1059     }
1060     else
1061     {
1062     av_push (dec->shareable, &PL_sv_undef);
1063     int idx = AvFILLp (dec->shareable);
1064     sv = decode_sv (dec);
1065     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1066     }
1067 root 1.19 }
1068 root 1.20 break;
1069 root 1.18
1070     case CBOR_TAG_VALUE_SHAREDREF:
1071 root 1.17 {
1072 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1073 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1074 root 1.18
1075 root 1.19 UV idx = decode_uint (dec);
1076    
1077 root 1.64 if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable)))
1078 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1079    
1080 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1081 root 1.37
1082     if (sv == &PL_sv_undef)
1083     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1084 root 1.17 }
1085 root 1.20 break;
1086 root 1.17
1087 root 1.18 case CBOR_TAG_PERL_OBJECT:
1088     {
1089 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1090     goto filter;
1091    
1092 root 1.19 sv = decode_sv (dec);
1093    
1094 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1095     ERR ("corrupted CBOR data (non-array perl object)");
1096    
1097     AV *av = (AV *)SvRV (sv);
1098     int len = av_len (av) + 1;
1099     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1100    
1101     if (!stash)
1102     ERR ("cannot decode perl-object (package does not exist)");
1103    
1104     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1105    
1106     if (!method)
1107     ERR ("cannot decode perl-object (package does not have a THAW method)");
1108    
1109     dSP;
1110    
1111 root 1.50 ENTER; SAVETMPS;
1112     PUSHMARK (SP);
1113 root 1.18 EXTEND (SP, len + 1);
1114     // we re-bless the reference to get overload and other niceties right
1115     PUSHs (*av_fetch (av, 0, 1));
1116     PUSHs (sv_cbor);
1117    
1118     int i;
1119    
1120     for (i = 1; i < len; ++i)
1121     PUSHs (*av_fetch (av, i, 1));
1122    
1123     PUTBACK;
1124     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1125     SPAGAIN;
1126    
1127     if (SvTRUE (ERRSV))
1128     {
1129     FREETMPS; LEAVE;
1130 root 1.59 ERR_ERRSV;
1131 root 1.18 }
1132    
1133 root 1.63 SvREFCNT_dec_NN (sv);
1134 root 1.18 sv = SvREFCNT_inc (POPs);
1135 root 1.11
1136 root 1.18 PUTBACK;
1137 root 1.11
1138 root 1.18 FREETMPS; LEAVE;
1139     }
1140 root 1.20 break;
1141 root 1.9
1142 root 1.18 default:
1143 root 1.60 filter:
1144 root 1.18 {
1145 root 1.58 SV *tag_sv = newSVuv (tag);
1146    
1147 root 1.19 sv = decode_sv (dec);
1148    
1149 root 1.27 dSP;
1150 root 1.50 ENTER; SAVETMPS;
1151     PUSHMARK (SP);
1152 root 1.27 EXTEND (SP, 2);
1153 root 1.58 PUSHs (tag_sv);
1154 root 1.27 PUSHs (sv);
1155    
1156     PUTBACK;
1157     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1158     SPAGAIN;
1159    
1160     if (SvTRUE (ERRSV))
1161     {
1162 root 1.63 SvREFCNT_dec_NN (tag_sv);
1163 root 1.27 FREETMPS; LEAVE;
1164 root 1.59 ERR_ERRSV;
1165 root 1.27 }
1166    
1167     if (count)
1168     {
1169 root 1.63 SvREFCNT_dec_NN (tag_sv);
1170     SvREFCNT_dec_NN (sv);
1171 root 1.65 sv = SvREFCNT_inc_NN (TOPs);
1172     SP -= count;
1173 root 1.27 }
1174     else
1175     {
1176     AV *av = newAV ();
1177 root 1.58 av_push (av, tag_sv);
1178 root 1.27 av_push (av, sv);
1179    
1180     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1181     ? cbor_tagged_stash
1182     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1183     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1184     }
1185 root 1.7
1186 root 1.27 PUTBACK;
1187    
1188     FREETMPS; LEAVE;
1189 root 1.18 }
1190 root 1.20 break;
1191 root 1.11 }
1192 root 1.9
1193 root 1.20 return sv;
1194    
1195 root 1.9 fail:
1196     SvREFCNT_dec (sv);
1197     return &PL_sv_undef;
1198 root 1.3 }
1199    
1200     static SV *
1201 root 1.1 decode_sv (dec_t *dec)
1202     {
1203     WANT (1);
1204    
1205 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1206 root 1.1 {
1207 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1208     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1209     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1210     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1211     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1212     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1213     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1214    
1215     case MAJOR_MISC >> MAJOR_SHIFT:
1216     switch (*dec->cur++ & MINOR_MASK)
1217 root 1.1 {
1218 root 1.35 case SIMPLE_FALSE:
1219 root 1.1 #if CBOR_SLOW
1220 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1221 root 1.1 #endif
1222 root 1.10 return newSVsv (types_false);
1223 root 1.35 case SIMPLE_TRUE:
1224 root 1.1 #if CBOR_SLOW
1225 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1226 root 1.1 #endif
1227 root 1.10 return newSVsv (types_true);
1228 root 1.35 case SIMPLE_NULL:
1229 root 1.1 return newSVsv (&PL_sv_undef);
1230 root 1.35 case SIMPLE_UNDEF:
1231 root 1.10 #if CBOR_SLOW
1232     types_error = get_bool ("Types::Serialiser::error");
1233     #endif
1234     return newSVsv (types_error);
1235 root 1.1
1236 root 1.35 case MISC_FLOAT16:
1237 root 1.2 {
1238     WANT (2);
1239    
1240     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1241     dec->cur += 2;
1242    
1243     return newSVnv (ecb_binary16_to_float (fp));
1244     }
1245 root 1.1
1246 root 1.35 case MISC_FLOAT32:
1247 root 1.1 {
1248     uint32_t fp;
1249     WANT (4);
1250     memcpy (&fp, dec->cur, 4);
1251     dec->cur += 4;
1252    
1253     if (!ecb_big_endian ())
1254     fp = ecb_bswap32 (fp);
1255    
1256     return newSVnv (ecb_binary32_to_float (fp));
1257     }
1258    
1259 root 1.35 case MISC_FLOAT64:
1260 root 1.1 {
1261     uint64_t fp;
1262     WANT (8);
1263     memcpy (&fp, dec->cur, 8);
1264     dec->cur += 8;
1265    
1266     if (!ecb_big_endian ())
1267     fp = ecb_bswap64 (fp);
1268    
1269     return newSVnv (ecb_binary64_to_double (fp));
1270     }
1271    
1272 root 1.35 // 0..19 unassigned simple
1273 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1274     // 28-30 unassigned misc
1275     // 31 break code
1276 root 1.1 default:
1277 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1278 root 1.1 }
1279    
1280     break;
1281     }
1282    
1283     fail:
1284     return &PL_sv_undef;
1285     }
1286    
1287     static SV *
1288     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1289     {
1290 root 1.48 dec_t dec = { 0 };
1291 root 1.1 SV *sv;
1292 root 1.16 STRLEN len;
1293     char *data = SvPVbyte (string, len);
1294 root 1.1
1295 root 1.16 if (len > cbor->max_size && cbor->max_size)
1296     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1297     (unsigned long)len, (unsigned long)cbor->max_size);
1298 root 1.1
1299     dec.cbor = *cbor;
1300 root 1.16 dec.cur = (U8 *)data;
1301     dec.end = (U8 *)data + len;
1302 root 1.1
1303     sv = decode_sv (&dec);
1304    
1305     if (offset_return)
1306     *offset_return = dec.cur;
1307    
1308     if (!(offset_return || !sv))
1309 root 1.2 if (dec.cur != dec.end && !dec.err)
1310     dec.err = "garbage after CBOR object";
1311    
1312     if (dec.err)
1313 root 1.1 {
1314 root 1.39 if (dec.shareable)
1315     {
1316 root 1.58 // need to break cyclic links, which would all be in shareable
1317 root 1.39 int i;
1318     SV **svp;
1319    
1320     for (i = av_len (dec.shareable) + 1; i--; )
1321     if ((svp = av_fetch (dec.shareable, i, 0)))
1322     sv_setsv (*svp, &PL_sv_undef);
1323     }
1324    
1325 root 1.63 SvREFCNT_dec_NN (sv);
1326 root 1.59
1327     if (dec.err_sv)
1328     sv_2mortal (dec.err_sv);
1329    
1330 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1331 root 1.1 }
1332    
1333     sv = sv_2mortal (sv);
1334    
1335     return sv;
1336     }
1337    
1338     /////////////////////////////////////////////////////////////////////////////
1339 root 1.40 // incremental parser
1340    
1341     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1342    
1343     // returns 0 for notyet, 1 for success or error
1344     static int
1345     incr_parse (CBOR *self, SV *cborstr)
1346     {
1347     STRLEN cur;
1348     SvPV (cborstr, cur);
1349    
1350     while (ecb_expect_true (self->incr_need <= cur))
1351     {
1352     // table of integer count bytes
1353     static I8 incr_len[MINOR_MASK + 1] = {
1354     0, 0, 0, 0, 0, 0, 0, 0,
1355     0, 0, 0, 0, 0, 0, 0, 0,
1356     0, 0, 0, 0, 0, 0, 0, 0,
1357     1, 2, 4, 8,-1,-1,-1,-2
1358     };
1359    
1360     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1361     U8 m = *p & MINOR_MASK;
1362     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1363     I8 ilen = incr_len[m];
1364    
1365     self->incr_need = self->incr_pos + 1;
1366    
1367     if (ecb_expect_false (ilen < 0))
1368     {
1369     if (m != MINOR_INDEF)
1370     return 1; // error
1371    
1372     if (*p == (MAJOR_MISC | MINOR_INDEF))
1373     {
1374     if (count >= 0)
1375     return 1; // error
1376    
1377     count = 1;
1378     }
1379     else
1380     {
1381     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1382     count = -1;
1383     }
1384     }
1385     else
1386     {
1387     self->incr_need += ilen;
1388     if (ecb_expect_false (self->incr_need > cur))
1389     return 0;
1390    
1391     int major = *p >> MAJOR_SHIFT;
1392    
1393     switch (major)
1394     {
1395 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1396     ++count; // tags merely prefix another value
1397     break;
1398    
1399 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1400     case MAJOR_TEXT >> MAJOR_SHIFT:
1401     case MAJOR_ARRAY >> MAJOR_SHIFT:
1402     case MAJOR_MAP >> MAJOR_SHIFT:
1403     {
1404     UV len;
1405    
1406     if (ecb_expect_false (ilen))
1407     {
1408     len = 0;
1409    
1410     do {
1411     len = (len << 8) | *++p;
1412     } while (--ilen);
1413     }
1414     else
1415     len = m;
1416    
1417     switch (major)
1418     {
1419     case MAJOR_BYTES >> MAJOR_SHIFT:
1420     case MAJOR_TEXT >> MAJOR_SHIFT:
1421     self->incr_need += len;
1422     if (ecb_expect_false (self->incr_need > cur))
1423     return 0;
1424    
1425     break;
1426    
1427     case MAJOR_MAP >> MAJOR_SHIFT:
1428     len <<= 1;
1429     case MAJOR_ARRAY >> MAJOR_SHIFT:
1430     if (len)
1431     {
1432     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1433     count = len + 1;
1434     }
1435     break;
1436     }
1437     }
1438     }
1439     }
1440    
1441     self->incr_pos = self->incr_need;
1442    
1443     if (count > 0)
1444     {
1445     while (!--count)
1446     {
1447     if (!AvFILLp (self->incr_count))
1448     return 1; // done
1449    
1450     SvREFCNT_dec_NN (av_pop (self->incr_count));
1451     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1452     }
1453    
1454     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1455     }
1456     }
1457    
1458     return 0;
1459     }
1460    
1461    
1462     /////////////////////////////////////////////////////////////////////////////
1463 root 1.1 // XS interface functions
1464    
1465     MODULE = CBOR::XS PACKAGE = CBOR::XS
1466    
1467     BOOT:
1468     {
1469     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1470 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1471 root 1.1
1472 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1473     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1474    
1475     types_true = get_bool ("Types::Serialiser::true" );
1476     types_false = get_bool ("Types::Serialiser::false");
1477     types_error = get_bool ("Types::Serialiser::error");
1478 root 1.11
1479 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1480    
1481 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1482     SvREADONLY_on (sv_cbor);
1483 root 1.56
1484     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1485 root 1.1 }
1486    
1487     PROTOTYPES: DISABLE
1488    
1489     void CLONE (...)
1490     CODE:
1491 root 1.10 cbor_stash = 0;
1492     cbor_tagged_stash = 0;
1493     types_error_stash = 0;
1494     types_boolean_stash = 0;
1495 root 1.1
1496     void new (char *klass)
1497     PPCODE:
1498     {
1499     SV *pv = NEWSV (0, sizeof (CBOR));
1500     SvPOK_only (pv);
1501     cbor_init ((CBOR *)SvPVX (pv));
1502     XPUSHs (sv_2mortal (sv_bless (
1503     newRV_noinc (pv),
1504     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1505     )));
1506     }
1507    
1508     void shrink (CBOR *self, int enable = 1)
1509     ALIAS:
1510     shrink = F_SHRINK
1511     allow_unknown = F_ALLOW_UNKNOWN
1512 root 1.18 allow_sharing = F_ALLOW_SHARING
1513 root 1.37 allow_cycles = F_ALLOW_CYCLES
1514 root 1.60 forbid_objects = F_FORBID_OBJECTS
1515 root 1.32 pack_strings = F_PACK_STRINGS
1516 root 1.54 text_keys = F_TEXT_KEYS
1517     text_strings = F_TEXT_STRINGS
1518 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1519 root 1.1 PPCODE:
1520     {
1521     if (enable)
1522     self->flags |= ix;
1523     else
1524     self->flags &= ~ix;
1525    
1526     XPUSHs (ST (0));
1527     }
1528    
1529     void get_shrink (CBOR *self)
1530     ALIAS:
1531     get_shrink = F_SHRINK
1532     get_allow_unknown = F_ALLOW_UNKNOWN
1533 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1534 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1535 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1536 root 1.32 get_pack_strings = F_PACK_STRINGS
1537 root 1.54 get_text_keys = F_TEXT_KEYS
1538     get_text_strings = F_TEXT_STRINGS
1539 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1540 root 1.1 PPCODE:
1541     XPUSHs (boolSV (self->flags & ix));
1542    
1543     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1544     PPCODE:
1545     self->max_depth = max_depth;
1546     XPUSHs (ST (0));
1547    
1548     U32 get_max_depth (CBOR *self)
1549     CODE:
1550     RETVAL = self->max_depth;
1551     OUTPUT:
1552     RETVAL
1553    
1554     void max_size (CBOR *self, U32 max_size = 0)
1555     PPCODE:
1556     self->max_size = max_size;
1557     XPUSHs (ST (0));
1558    
1559     int get_max_size (CBOR *self)
1560     CODE:
1561     RETVAL = self->max_size;
1562     OUTPUT:
1563     RETVAL
1564    
1565 root 1.27 void filter (CBOR *self, SV *filter = 0)
1566     PPCODE:
1567     SvREFCNT_dec (self->filter);
1568     self->filter = filter ? newSVsv (filter) : filter;
1569     XPUSHs (ST (0));
1570    
1571     SV *get_filter (CBOR *self)
1572     CODE:
1573     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1574     OUTPUT:
1575     RETVAL
1576    
1577 root 1.1 void encode (CBOR *self, SV *scalar)
1578     PPCODE:
1579     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1580     XPUSHs (scalar);
1581    
1582     void decode (CBOR *self, SV *cborstr)
1583     PPCODE:
1584     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1585     XPUSHs (cborstr);
1586    
1587     void decode_prefix (CBOR *self, SV *cborstr)
1588     PPCODE:
1589     {
1590     SV *sv;
1591     char *offset;
1592     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1593     EXTEND (SP, 2);
1594     PUSHs (sv);
1595     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1596     }
1597    
1598 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1599 root 1.42 ALIAS:
1600     incr_parse_multiple = 1
1601 root 1.40 PPCODE:
1602     {
1603     if (SvUTF8 (cborstr))
1604     sv_utf8_downgrade (cborstr, 0);
1605    
1606     if (!self->incr_count)
1607     {
1608     self->incr_count = newAV ();
1609     self->incr_pos = 0;
1610     self->incr_need = 1;
1611    
1612     av_push (self->incr_count, newSViv (1));
1613     }
1614    
1615 root 1.41 do
1616 root 1.40 {
1617     if (!incr_parse (self, cborstr))
1618     {
1619     if (self->incr_need > self->max_size && self->max_size)
1620     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1621     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1622    
1623     break;
1624     }
1625    
1626 root 1.41 SV *sv;
1627     char *offset;
1628 root 1.40
1629 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1630     XPUSHs (sv);
1631 root 1.40
1632 root 1.41 sv_chop (cborstr, offset);
1633 root 1.40
1634 root 1.41 av_clear (self->incr_count);
1635     av_push (self->incr_count, newSViv (1));
1636 root 1.40
1637 root 1.41 self->incr_pos = 0;
1638     self->incr_need = self->incr_pos + 1;
1639 root 1.40 }
1640 root 1.42 while (ix);
1641 root 1.40 }
1642    
1643     void incr_reset (CBOR *self)
1644     CODE:
1645     {
1646     SvREFCNT_dec (self->incr_count);
1647     self->incr_count = 0;
1648     }
1649    
1650 root 1.27 void DESTROY (CBOR *self)
1651     PPCODE:
1652     cbor_free (self);
1653    
1654 root 1.1 PROTOTYPES: ENABLE
1655    
1656     void encode_cbor (SV *scalar)
1657 root 1.36 ALIAS:
1658     encode_cbor = 0
1659     encode_cbor_sharing = F_ALLOW_SHARING
1660 root 1.1 PPCODE:
1661     {
1662     CBOR cbor;
1663     cbor_init (&cbor);
1664 root 1.36 cbor.flags |= ix;
1665 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1666     XPUSHs (scalar);
1667     }
1668    
1669     void decode_cbor (SV *cborstr)
1670     PPCODE:
1671     {
1672     CBOR cbor;
1673     cbor_init (&cbor);
1674     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1675     XPUSHs (cborstr);
1676     }
1677    
1678 root 1.64 #ifdef __AFL_COMPILER
1679    
1680     void
1681     afl_init ()
1682     CODE:
1683     __AFL_INIT ();
1684    
1685     int
1686     afl_loop (unsigned int count = 10000)
1687     CODE:
1688     RETVAL = __AFL_LOOP (count);
1689     OUTPUT:
1690     RETVAL
1691    
1692     #endif
1693