ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.62
Committed: Sat Nov 26 02:11:52 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.61: +0 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.28 #ifndef SvREFCNT_dec_NN
27     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
28     #endif
29 root 1.14
30 root 1.35 // known major and minor types
31     enum cbor_type
32     {
33     MAJOR_SHIFT = 5,
34     MINOR_MASK = 0x1f,
35    
36     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
37     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
38     MAJOR_BYTES = 2 << MAJOR_SHIFT,
39     MAJOR_TEXT = 3 << MAJOR_SHIFT,
40     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
41     MAJOR_MAP = 5 << MAJOR_SHIFT,
42     MAJOR_TAG = 6 << MAJOR_SHIFT,
43     MAJOR_MISC = 7 << MAJOR_SHIFT,
44    
45     // INT/STRING/ARRAY/MAP subtypes
46     LENGTH_EXT1 = 24,
47     LENGTH_EXT2 = 25,
48     LENGTH_EXT4 = 26,
49     LENGTH_EXT8 = 27,
50    
51     // SIMPLE types (effectively MISC subtypes)
52     SIMPLE_FALSE = 20,
53     SIMPLE_TRUE = 21,
54     SIMPLE_NULL = 22,
55     SIMPLE_UNDEF = 23,
56    
57     // MISC subtype (unused)
58     MISC_EXT1 = 24,
59     MISC_FLOAT16 = 25,
60     MISC_FLOAT32 = 26,
61     MISC_FLOAT64 = 27,
62    
63     // BYTES/TEXT/ARRAY/MAP
64     MINOR_INDEF = 31,
65     };
66    
67 root 1.9 // known tags
68 root 1.8 enum cbor_tag
69     {
70 root 1.35 // extensions
71     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
72     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
73     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
74     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
76     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
77     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
78    
79     // rfc7049
80     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
81     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
82     CBOR_TAG_POS_BIGNUM = 2, // byte string
83     CBOR_TAG_NEG_BIGNUM = 3, // byte string
84     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
85     CBOR_TAG_BIGFLOAT = 5, // array
86    
87     CBOR_TAG_CONV_B64U = 21, // base64url, any
88     CBOR_TAG_CONV_B64 = 22, // base64, any
89     CBOR_TAG_CONV_HEX = 23, // base16, any
90     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
91    
92     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
93     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
94     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
95     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
96     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
97 root 1.8
98 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
99 root 1.8 };
100    
101 root 1.24 #define F_SHRINK 0x00000001UL
102     #define F_ALLOW_UNKNOWN 0x00000002UL
103 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
104 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
105 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
106     #define F_PACK_STRINGS 0x00000020UL
107     #define F_TEXT_KEYS 0x00000040UL
108     #define F_TEXT_STRINGS 0x00000080UL
109     #define F_VALIDATE_UTF8 0x00000100UL
110 root 1.1
111     #define INIT_SIZE 32 // initial scalar size to be allocated
112    
113     #define SB do {
114     #define SE } while (0)
115    
116     #define IN_RANGE_INC(type,val,beg,end) \
117     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
118     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
119    
120     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
121    
122     #ifdef USE_ITHREADS
123     # define CBOR_SLOW 1
124     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
125     #else
126     # define CBOR_SLOW 0
127     # define CBOR_STASH cbor_stash
128     #endif
129    
130 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
131 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
132 root 1.1
133     typedef struct {
134     U32 flags;
135     U32 max_depth;
136     STRLEN max_size;
137 root 1.27 SV *filter;
138 root 1.40
139     // for the incremental parser
140     STRLEN incr_pos; // the current offset into the text
141     STRLEN incr_need; // minimum bytes needed to decode
142     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
143 root 1.1 } CBOR;
144    
145 root 1.5 ecb_inline void
146 root 1.1 cbor_init (CBOR *cbor)
147     {
148     Zero (cbor, 1, CBOR);
149     cbor->max_depth = 512;
150     }
151    
152 root 1.27 ecb_inline void
153     cbor_free (CBOR *cbor)
154     {
155     SvREFCNT_dec (cbor->filter);
156 root 1.40 SvREFCNT_dec (cbor->incr_count);
157 root 1.27 }
158    
159 root 1.1 /////////////////////////////////////////////////////////////////////////////
160     // utility functions
161    
162 root 1.5 ecb_inline SV *
163 root 1.1 get_bool (const char *name)
164     {
165     SV *sv = get_sv (name, 1);
166    
167     SvREADONLY_on (sv);
168     SvREADONLY_on (SvRV (sv));
169    
170     return sv;
171     }
172    
173 root 1.5 ecb_inline void
174 root 1.1 shrink (SV *sv)
175     {
176     sv_utf8_downgrade (sv, 1);
177    
178     if (SvLEN (sv) > SvCUR (sv) + 1)
179     {
180     #ifdef SvPV_shrink_to_cur
181     SvPV_shrink_to_cur (sv);
182     #elif defined (SvPV_renew)
183     SvPV_renew (sv, SvCUR (sv) + 1);
184     #endif
185     }
186     }
187    
188 root 1.21 // minimum length of a string to be registered for stringref
189     ecb_inline int
190     minimum_string_length (UV idx)
191     {
192 root 1.61 return idx <= 23 ? 3
193     : idx <= 0xffU ? 4
194     : idx <= 0xffffU ? 5
195     : idx <= 0xffffffffU ? 7
196     : 11;
197 root 1.21 }
198    
199 root 1.1 /////////////////////////////////////////////////////////////////////////////
200     // encoder
201    
202     // structure used for encoding CBOR
203     typedef struct
204     {
205     char *cur; // SvPVX (sv) + current output position
206     char *end; // SvEND (sv)
207     SV *sv; // result scalar
208     CBOR cbor;
209     U32 depth; // recursion level
210 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
211     UV stringref_idx;
212 root 1.19 HV *shareable; // ptr => index, or 0
213     UV shareable_idx;
214 root 1.1 } enc_t;
215    
216 root 1.5 ecb_inline void
217 root 1.1 need (enc_t *enc, STRLEN len)
218     {
219 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
220 root 1.1 {
221     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
222     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
223     enc->cur = SvPVX (enc->sv) + cur;
224     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
225     }
226     }
227    
228 root 1.5 ecb_inline void
229 root 1.1 encode_ch (enc_t *enc, char ch)
230     {
231     need (enc, 1);
232     *enc->cur++ = ch;
233     }
234    
235     static void
236     encode_uint (enc_t *enc, int major, UV len)
237     {
238     need (enc, 9);
239    
240 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
241 root 1.1 *enc->cur++ = major | len;
242 root 1.36 else if (ecb_expect_true (len <= 0xffU))
243 root 1.1 {
244 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
245 root 1.1 *enc->cur++ = len;
246     }
247 root 1.36 else if (len <= 0xffffU)
248 root 1.1 {
249 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
250 root 1.1 *enc->cur++ = len >> 8;
251     *enc->cur++ = len;
252     }
253 root 1.36 else if (len <= 0xffffffffU)
254 root 1.1 {
255 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
256 root 1.1 *enc->cur++ = len >> 24;
257     *enc->cur++ = len >> 16;
258     *enc->cur++ = len >> 8;
259     *enc->cur++ = len;
260     }
261 root 1.4 else
262 root 1.1 {
263 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
264 root 1.1 *enc->cur++ = len >> 56;
265     *enc->cur++ = len >> 48;
266     *enc->cur++ = len >> 40;
267     *enc->cur++ = len >> 32;
268     *enc->cur++ = len >> 24;
269     *enc->cur++ = len >> 16;
270     *enc->cur++ = len >> 8;
271     *enc->cur++ = len;
272     }
273     }
274    
275 root 1.21 ecb_inline void
276     encode_tag (enc_t *enc, UV tag)
277     {
278 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
279 root 1.21 }
280    
281 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
282     ecb_noinline static void
283     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
284     {
285     STRLEN ulen = len;
286     U8 *p, *pend = (U8 *)str + len;
287    
288     for (p = (U8 *)str; p < pend; ++p)
289     ulen += *p >> 7; // count set high bits
290    
291     encode_uint (enc, MAJOR_TEXT, ulen);
292    
293     need (enc, ulen);
294     for (p = (U8 *)str; p < pend; ++p)
295     if (*p < 0x80)
296     *enc->cur++ = *p;
297     else
298     {
299     *enc->cur++ = 0xc0 + (*p >> 6);
300     *enc->cur++ = 0x80 + (*p & 63);
301     }
302     }
303    
304 root 1.30 ecb_inline void
305 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
306 root 1.30 {
307 root 1.53 if (ecb_expect_false (upgrade_utf8))
308 root 1.51 if (!utf8)
309     {
310 root 1.53 encode_str_utf8 (enc, utf8, str, len);
311 root 1.51 return;
312     }
313    
314 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
315 root 1.30 need (enc, len);
316     memcpy (enc->cur, str, len);
317     enc->cur += len;
318     }
319    
320 root 1.54 ecb_inline void
321 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
322 root 1.1 {
323 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
324 root 1.21 {
325 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
326 root 1.21
327     if (SvOK (*svp))
328     {
329     // already registered, use stringref
330     encode_tag (enc, CBOR_TAG_STRINGREF);
331 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
332 root 1.21 return;
333     }
334     else if (len >= minimum_string_length (enc->stringref_idx))
335     {
336     // register only
337     sv_setuv (*svp, enc->stringref_idx);
338     ++enc->stringref_idx;
339     }
340     }
341    
342 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
343 root 1.1 }
344    
345     static void encode_sv (enc_t *enc, SV *sv);
346    
347     static void
348     encode_av (enc_t *enc, AV *av)
349     {
350     int i, len = av_len (av);
351    
352     if (enc->depth >= enc->cbor.max_depth)
353     croak (ERR_NESTING_EXCEEDED);
354    
355     ++enc->depth;
356    
357 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
358 root 1.1
359 root 1.45 if (SvMAGICAL (av))
360     for (i = 0; i <= len; ++i)
361     {
362     SV **svp = av_fetch (av, i, 0);
363     encode_sv (enc, svp ? *svp : &PL_sv_undef);
364     }
365     else
366     for (i = 0; i <= len; ++i)
367 root 1.46 {
368     SV *sv = AvARRAY (av)[i];
369     encode_sv (enc, sv ? sv : &PL_sv_undef);
370     }
371 root 1.1
372     --enc->depth;
373     }
374    
375     static void
376     encode_hv (enc_t *enc, HV *hv)
377     {
378     HE *he;
379    
380     if (enc->depth >= enc->cbor.max_depth)
381     croak (ERR_NESTING_EXCEEDED);
382    
383     ++enc->depth;
384    
385     int pairs = hv_iterinit (hv);
386     int mg = SvMAGICAL (hv);
387    
388     if (mg)
389 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
390 root 1.1 else
391 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
392 root 1.1
393     while ((he = hv_iternext (hv)))
394     {
395 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
396     encode_sv (enc, HeSVKEY (he));
397 root 1.1 else
398 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
399 root 1.1
400 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
401 root 1.1 }
402    
403     if (mg)
404 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
405 root 1.1
406     --enc->depth;
407     }
408    
409     // encode objects, arrays and special \0=false and \1=true values.
410     static void
411     encode_rv (enc_t *enc, SV *sv)
412     {
413 root 1.19 SvGETMAGIC (sv);
414 root 1.1
415 root 1.19 svtype svt = SvTYPE (sv);
416 root 1.18
417 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
418 root 1.1 {
419 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
420     ? types_boolean_stash
421     : gv_stashpv ("Types::Serialiser::Boolean", 1);
422     HV *error_stash = !CBOR_SLOW || types_error_stash
423     ? types_error_stash
424     : gv_stashpv ("Types::Serialiser::Error", 1);
425 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
426     ? cbor_tagged_stash
427     : gv_stashpv ("CBOR::XS::Tagged" , 1);
428 root 1.1
429 root 1.11 HV *stash = SvSTASH (sv);
430    
431     if (stash == boolean_stash)
432 root 1.34 {
433 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
434 root 1.34 return;
435     }
436 root 1.11 else if (stash == error_stash)
437 root 1.34 {
438 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
439 root 1.34 return;
440     }
441 root 1.11 else if (stash == tagged_stash)
442 root 1.6 {
443     if (svt != SVt_PVAV)
444     croak ("encountered CBOR::XS::Tagged object that isn't an array");
445    
446 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
447 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
448 root 1.34
449     return;
450 root 1.6 }
451 root 1.34 }
452    
453     if (ecb_expect_false (SvREFCNT (sv) > 1)
454     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
455     {
456     if (!enc->shareable)
457     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
458    
459     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
460    
461     if (SvOK (*svp))
462     {
463     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
464 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
465 root 1.34 return;
466     }
467     else
468     {
469     sv_setuv (*svp, enc->shareable_idx);
470     ++enc->shareable_idx;
471     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
472     }
473     }
474    
475     if (ecb_expect_false (SvOBJECT (sv)))
476     {
477     HV *stash = SvSTASH (sv);
478     GV *method;
479    
480 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
481     croak ("encountered object '%s', but forbid_objects is enabled",
482     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
483     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
484 root 1.1 {
485 root 1.11 dSP;
486    
487 root 1.50 ENTER; SAVETMPS;
488     PUSHMARK (SP);
489 root 1.6 // we re-bless the reference to get overload and other niceties right
490 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
491    
492     PUTBACK;
493     // G_SCALAR ensures that return value is 1
494     call_sv ((SV *)GvCV (method), G_SCALAR);
495     SPAGAIN;
496    
497     // catch this surprisingly common error
498     if (SvROK (TOPs) && SvRV (TOPs) == sv)
499     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
500    
501     encode_sv (enc, POPs);
502    
503     PUTBACK;
504    
505     FREETMPS; LEAVE;
506     }
507     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
508     {
509     dSP;
510 root 1.6
511 root 1.50 ENTER; SAVETMPS;
512     SAVESTACK_POS ();
513     PUSHMARK (SP);
514 root 1.11 EXTEND (SP, 2);
515     // we re-bless the reference to get overload and other niceties right
516     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
517     PUSHs (sv_cbor);
518 root 1.1
519 root 1.11 PUTBACK;
520     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
521     SPAGAIN;
522 root 1.6
523 root 1.11 // catch this surprisingly common error
524     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
525     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
526 root 1.6
527 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
528 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
529 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
530 root 1.6
531 root 1.11 while (count)
532     encode_sv (enc, SP[1 - count--]);
533 root 1.6
534 root 1.11 PUTBACK;
535 root 1.6
536 root 1.11 FREETMPS; LEAVE;
537 root 1.1 }
538 root 1.11 else
539     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
540     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
541 root 1.1 }
542     else if (svt == SVt_PVHV)
543     encode_hv (enc, (HV *)sv);
544     else if (svt == SVt_PVAV)
545     encode_av (enc, (AV *)sv);
546 root 1.18 else
547 root 1.1 {
548 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
549     encode_sv (enc, sv);
550 root 1.1 }
551     }
552    
553     static void
554     encode_nv (enc_t *enc, SV *sv)
555     {
556     double nv = SvNVX (sv);
557    
558     need (enc, 9);
559    
560 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
561     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
562 root 1.1 //TODO: maybe I32?
563 root 1.5 else if (ecb_expect_false (nv == (float)nv))
564 root 1.1 {
565     uint32_t fp = ecb_float_to_binary32 (nv);
566    
567 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
568 root 1.1
569     if (!ecb_big_endian ())
570     fp = ecb_bswap32 (fp);
571    
572     memcpy (enc->cur, &fp, 4);
573     enc->cur += 4;
574     }
575     else
576     {
577     uint64_t fp = ecb_double_to_binary64 (nv);
578    
579 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
580 root 1.1
581     if (!ecb_big_endian ())
582     fp = ecb_bswap64 (fp);
583    
584     memcpy (enc->cur, &fp, 8);
585     enc->cur += 8;
586     }
587     }
588    
589     static void
590     encode_sv (enc_t *enc, SV *sv)
591     {
592     SvGETMAGIC (sv);
593    
594     if (SvPOKp (sv))
595     {
596     STRLEN len;
597     char *str = SvPV (sv, len);
598 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
599 root 1.1 }
600     else if (SvNOKp (sv))
601     encode_nv (enc, sv);
602     else if (SvIOKp (sv))
603     {
604     if (SvIsUV (sv))
605 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
606 root 1.1 else if (SvIVX (sv) >= 0)
607 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
608 root 1.1 else
609 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
610 root 1.1 }
611     else if (SvROK (sv))
612     encode_rv (enc, SvRV (sv));
613     else if (!SvOK (sv))
614 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
615 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
616 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
617 root 1.1 else
618     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
619     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
620     }
621    
622     static SV *
623     encode_cbor (SV *scalar, CBOR *cbor)
624     {
625 root 1.48 enc_t enc = { 0 };
626 root 1.1
627     enc.cbor = *cbor;
628     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
629     enc.cur = SvPVX (enc.sv);
630     enc.end = SvEND (enc.sv);
631    
632     SvPOK_only (enc.sv);
633 root 1.20
634 root 1.32 if (cbor->flags & F_PACK_STRINGS)
635 root 1.20 {
636     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
637     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
638     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
639     }
640    
641 root 1.1 encode_sv (&enc, scalar);
642    
643     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
644     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
645    
646     if (enc.cbor.flags & F_SHRINK)
647     shrink (enc.sv);
648    
649     return enc.sv;
650     }
651    
652     /////////////////////////////////////////////////////////////////////////////
653     // decoder
654    
655     // structure used for decoding CBOR
656     typedef struct
657     {
658     U8 *cur; // current parser pointer
659     U8 *end; // end of input string
660     const char *err; // parse error, if != 0
661     CBOR cbor;
662     U32 depth; // recursion depth
663     U32 maxdepth; // recursion depth limit
664 root 1.19 AV *shareable;
665 root 1.20 AV *stringref;
666 root 1.27 SV *decode_tagged;
667 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
668 root 1.1 } dec_t;
669    
670 root 1.59 // set dec->err to ERRSV
671     ecb_cold static void
672     err_errsv (dec_t *dec)
673     {
674     if (!dec->err)
675     {
676     dec->err_sv = newSVsv (ERRSV);
677    
678     // chop off the trailing \n
679     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
680     *SvEND (dec->err_sv) = 0;
681    
682     dec->err = SvPVutf8_nolen (dec->err_sv);
683     }
684     }
685 root 1.1
686 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
687     ecb_cold static void
688     err_set (dec_t *dec, const char *reason)
689     {
690     if (!dec->err)
691     dec->err = reason;
692     }
693    
694     ecb_cold static void
695     err_unexpected_end (dec_t *dec)
696     {
697     err_set (dec, "unexpected end of CBOR data");
698     }
699    
700     ecb_cold static void
701     err_nesting_exceeded (dec_t *dec)
702     {
703     err_set (dec, ERR_NESTING_EXCEEDED);
704     }
705    
706     #define ERR_DO(do) SB do; goto fail; SE
707     #define ERR(reason) ERR_DO (err_set (dec, reason))
708     #define ERR_ERRSV ERR_DO (err_errsv (dec))
709    
710 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
711 root 1.1
712 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
713 root 1.1 #define DEC_DEC_DEPTH --dec->depth
714    
715     static UV
716     decode_uint (dec_t *dec)
717     {
718 root 1.35 U8 m = *dec->cur & MINOR_MASK;
719     ++dec->cur;
720    
721     if (ecb_expect_true (m < LENGTH_EXT1))
722     return m;
723 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
724     {
725     WANT (1);
726     dec->cur += 1;
727     return dec->cur[-1];
728     }
729     else if (ecb_expect_true (m == LENGTH_EXT2))
730     {
731     WANT (2);
732     dec->cur += 2;
733     return (((UV)dec->cur[-2]) << 8)
734     | ((UV)dec->cur[-1]);
735     }
736     else if (ecb_expect_true (m == LENGTH_EXT4))
737     {
738     WANT (4);
739     dec->cur += 4;
740     return (((UV)dec->cur[-4]) << 24)
741     | (((UV)dec->cur[-3]) << 16)
742     | (((UV)dec->cur[-2]) << 8)
743     | ((UV)dec->cur[-1]);
744     }
745     else if (ecb_expect_true (m == LENGTH_EXT8))
746 root 1.1 {
747 root 1.36 WANT (8);
748     dec->cur += 8;
749 root 1.34
750 root 1.36 return
751 root 1.34 #if UVSIZE < 8
752 root 1.36 0
753 root 1.34 #else
754 root 1.36 (((UV)dec->cur[-8]) << 56)
755     | (((UV)dec->cur[-7]) << 48)
756     | (((UV)dec->cur[-6]) << 40)
757     | (((UV)dec->cur[-5]) << 32)
758 root 1.34 #endif
759 root 1.36 | (((UV)dec->cur[-4]) << 24)
760     | (((UV)dec->cur[-3]) << 16)
761     | (((UV)dec->cur[-2]) << 8)
762     | ((UV)dec->cur[-1]);
763 root 1.1 }
764 root 1.36 else
765     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
766 root 1.1
767     fail:
768     return 0;
769     }
770    
771     static SV *decode_sv (dec_t *dec);
772    
773     static SV *
774     decode_av (dec_t *dec)
775     {
776     AV *av = newAV ();
777    
778     DEC_INC_DEPTH;
779    
780 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
781 root 1.1 {
782     ++dec->cur;
783    
784     for (;;)
785     {
786     WANT (1);
787    
788 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
789 root 1.1 {
790     ++dec->cur;
791     break;
792     }
793    
794     av_push (av, decode_sv (dec));
795     }
796     }
797     else
798     {
799 root 1.57 UV i, len = decode_uint (dec);
800 root 1.1
801 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
802 root 1.1 av_fill (av, len - 1);
803    
804     for (i = 0; i < len; ++i)
805     AvARRAY (av)[i] = decode_sv (dec);
806     }
807    
808     DEC_DEC_DEPTH;
809     return newRV_noinc ((SV *)av);
810    
811     fail:
812     SvREFCNT_dec (av);
813     DEC_DEC_DEPTH;
814     return &PL_sv_undef;
815     }
816    
817 root 1.16 static void
818     decode_he (dec_t *dec, HV *hv)
819     {
820     // for speed reasons, we specialcase single-string
821 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
822    
823 root 1.23 if (ecb_expect_true (!dec->stringref))
824 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
825 root 1.21 {
826 root 1.56 STRLEN len = decode_uint (dec);
827 root 1.21 char *key = (char *)dec->cur;
828 root 1.16
829 root 1.49 WANT (len);
830 root 1.21 dec->cur += len;
831    
832 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
833 root 1.16
834 root 1.21 return;
835     }
836 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
837 root 1.21 {
838 root 1.56 STRLEN len = decode_uint (dec);
839 root 1.21 char *key = (char *)dec->cur;
840 root 1.16
841 root 1.49 WANT (len);
842 root 1.21 dec->cur += len;
843 root 1.20
844 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
845     if (!is_utf8_string (key, len))
846     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
847 root 1.16
848 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
849 root 1.16
850 root 1.21 return;
851     }
852 root 1.20
853 root 1.21 SV *k = decode_sv (dec);
854     SV *v = decode_sv (dec);
855 root 1.16
856 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
857     // methods, and this is hopefully the only place where it can happen,
858     // so if there is a chance of an exception, take the very slow path.
859     // since catching exceptions is "undocumented/internal/forbidden" by
860     // the new p5p powers, we need to call out to a perl function :/
861     if (ecb_expect_false (SvAMAGIC (k)))
862     {
863     dSP;
864    
865     ENTER; SAVETMPS;
866     PUSHMARK (SP);
867     EXTEND (SP, 3);
868     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
869     PUSHs (sv_2mortal (k));
870     PUSHs (sv_2mortal (v));
871    
872     PUTBACK;
873     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
874     SPAGAIN;
875    
876     FREETMPS; LEAVE;
877    
878     if (SvTRUE (ERRSV))
879     ERR_ERRSV;
880    
881     return;
882     }
883    
884 root 1.21 hv_store_ent (hv, k, v, 0);
885     SvREFCNT_dec (k);
886 root 1.38
887     fail:
888     ;
889 root 1.16 }
890    
891 root 1.1 static SV *
892     decode_hv (dec_t *dec)
893     {
894     HV *hv = newHV ();
895    
896     DEC_INC_DEPTH;
897    
898 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
899 root 1.1 {
900     ++dec->cur;
901    
902     for (;;)
903     {
904     WANT (1);
905    
906 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
907 root 1.1 {
908     ++dec->cur;
909     break;
910     }
911    
912 root 1.16 decode_he (dec, hv);
913 root 1.1 }
914     }
915     else
916     {
917 root 1.57 UV pairs = decode_uint (dec);
918    
919     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
920 root 1.1
921 root 1.16 while (pairs--)
922     decode_he (dec, hv);
923 root 1.1 }
924    
925     DEC_DEC_DEPTH;
926     return newRV_noinc ((SV *)hv);
927    
928     fail:
929     SvREFCNT_dec (hv);
930     DEC_DEC_DEPTH;
931     return &PL_sv_undef;
932     }
933    
934     static SV *
935     decode_str (dec_t *dec, int utf8)
936     {
937 root 1.6 SV *sv = 0;
938 root 1.1
939 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
940 root 1.1 {
941 root 1.33 // indefinite length strings
942 root 1.1 ++dec->cur;
943    
944 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
945 root 1.33
946 root 1.1 sv = newSVpvn ("", 0);
947    
948     for (;;)
949     {
950     WANT (1);
951    
952 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
953     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
954 root 1.33 {
955     ++dec->cur;
956     break;
957     }
958     else
959     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
960    
961     STRLEN len = decode_uint (dec);
962 root 1.1
963 root 1.33 WANT (len);
964     sv_catpvn (sv, dec->cur, len);
965     dec->cur += len;
966 root 1.1 }
967     }
968     else
969     {
970     STRLEN len = decode_uint (dec);
971    
972     WANT (len);
973     sv = newSVpvn (dec->cur, len);
974     dec->cur += len;
975 root 1.25
976     if (ecb_expect_false (dec->stringref)
977     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
978     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
979 root 1.1 }
980    
981     if (utf8)
982 root 1.38 {
983     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
984     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
985     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
986    
987     SvUTF8_on (sv);
988     }
989 root 1.1
990     return sv;
991    
992     fail:
993 root 1.6 SvREFCNT_dec (sv);
994 root 1.1 return &PL_sv_undef;
995     }
996    
997     static SV *
998 root 1.3 decode_tagged (dec_t *dec)
999     {
1000 root 1.19 SV *sv = 0;
1001 root 1.3 UV tag = decode_uint (dec);
1002 root 1.19
1003     WANT (1);
1004 root 1.3
1005 root 1.18 switch (tag)
1006     {
1007     case CBOR_TAG_MAGIC:
1008 root 1.20 sv = decode_sv (dec);
1009     break;
1010 root 1.18
1011     case CBOR_TAG_INDIRECTION:
1012 root 1.20 sv = newRV_noinc (decode_sv (dec));
1013     break;
1014    
1015     case CBOR_TAG_STRINGREF_NAMESPACE:
1016     {
1017 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
1018     // erase mortalised caches, e.g. "shareable"
1019     ENTER;
1020 root 1.20
1021     SAVESPTR (dec->stringref);
1022     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1023    
1024     sv = decode_sv (dec);
1025    
1026 root 1.55 LEAVE;
1027 root 1.20 }
1028     break;
1029    
1030     case CBOR_TAG_STRINGREF:
1031     {
1032 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1033 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1034    
1035     UV idx = decode_uint (dec);
1036    
1037     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
1038     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1039    
1040     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1041     }
1042     break;
1043 root 1.11
1044 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1045     {
1046     if (ecb_expect_false (!dec->shareable))
1047     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1048    
1049 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1050     {
1051     sv = newSV (0);
1052     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1053 root 1.18
1054 root 1.37 SV *osv = decode_sv (dec);
1055     sv_setsv (sv, osv);
1056     SvREFCNT_dec_NN (osv);
1057     }
1058     else
1059     {
1060     av_push (dec->shareable, &PL_sv_undef);
1061     int idx = AvFILLp (dec->shareable);
1062     sv = decode_sv (dec);
1063     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1064     }
1065 root 1.19 }
1066 root 1.20 break;
1067 root 1.18
1068     case CBOR_TAG_VALUE_SHAREDREF:
1069 root 1.17 {
1070 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1071 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1072 root 1.18
1073 root 1.19 UV idx = decode_uint (dec);
1074    
1075 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1076 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1077    
1078 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1079 root 1.37
1080     if (sv == &PL_sv_undef)
1081     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1082 root 1.17 }
1083 root 1.20 break;
1084 root 1.17
1085 root 1.18 case CBOR_TAG_PERL_OBJECT:
1086     {
1087 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1088     goto filter;
1089    
1090 root 1.19 sv = decode_sv (dec);
1091    
1092 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1093     ERR ("corrupted CBOR data (non-array perl object)");
1094    
1095     AV *av = (AV *)SvRV (sv);
1096     int len = av_len (av) + 1;
1097     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1098    
1099     if (!stash)
1100     ERR ("cannot decode perl-object (package does not exist)");
1101    
1102     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1103    
1104     if (!method)
1105     ERR ("cannot decode perl-object (package does not have a THAW method)");
1106    
1107     dSP;
1108    
1109 root 1.50 ENTER; SAVETMPS;
1110     PUSHMARK (SP);
1111 root 1.18 EXTEND (SP, len + 1);
1112     // we re-bless the reference to get overload and other niceties right
1113     PUSHs (*av_fetch (av, 0, 1));
1114     PUSHs (sv_cbor);
1115    
1116     int i;
1117    
1118     for (i = 1; i < len; ++i)
1119     PUSHs (*av_fetch (av, i, 1));
1120    
1121     PUTBACK;
1122     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1123     SPAGAIN;
1124    
1125     if (SvTRUE (ERRSV))
1126     {
1127     FREETMPS; LEAVE;
1128 root 1.59 ERR_ERRSV;
1129 root 1.18 }
1130    
1131     SvREFCNT_dec (sv);
1132     sv = SvREFCNT_inc (POPs);
1133 root 1.11
1134 root 1.18 PUTBACK;
1135 root 1.11
1136 root 1.18 FREETMPS; LEAVE;
1137     }
1138 root 1.20 break;
1139 root 1.9
1140 root 1.18 default:
1141 root 1.60 filter:
1142 root 1.18 {
1143 root 1.58 SV *tag_sv = newSVuv (tag);
1144    
1145 root 1.19 sv = decode_sv (dec);
1146    
1147 root 1.27 dSP;
1148 root 1.50 ENTER; SAVETMPS;
1149     SAVESTACK_POS ();
1150     PUSHMARK (SP);
1151 root 1.27 EXTEND (SP, 2);
1152 root 1.58 PUSHs (tag_sv);
1153 root 1.27 PUSHs (sv);
1154    
1155     PUTBACK;
1156     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1157     SPAGAIN;
1158    
1159     if (SvTRUE (ERRSV))
1160     {
1161 root 1.58 SvREFCNT_dec (tag_sv);
1162 root 1.27 FREETMPS; LEAVE;
1163 root 1.59 ERR_ERRSV;
1164 root 1.27 }
1165    
1166     if (count)
1167     {
1168 root 1.58 SvREFCNT_dec (tag_sv);
1169 root 1.27 SvREFCNT_dec (sv);
1170     sv = SvREFCNT_inc (POPs);
1171     }
1172     else
1173     {
1174     AV *av = newAV ();
1175 root 1.58 av_push (av, tag_sv);
1176 root 1.27 av_push (av, sv);
1177    
1178     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1179     ? cbor_tagged_stash
1180     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1181     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1182     }
1183 root 1.7
1184 root 1.27 PUTBACK;
1185    
1186     FREETMPS; LEAVE;
1187 root 1.18 }
1188 root 1.20 break;
1189 root 1.11 }
1190 root 1.9
1191 root 1.20 return sv;
1192    
1193 root 1.9 fail:
1194     SvREFCNT_dec (sv);
1195     return &PL_sv_undef;
1196 root 1.3 }
1197    
1198     static SV *
1199 root 1.1 decode_sv (dec_t *dec)
1200     {
1201     WANT (1);
1202    
1203 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1204 root 1.1 {
1205 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1206     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1207     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1208     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1209     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1210     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1211     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1212    
1213     case MAJOR_MISC >> MAJOR_SHIFT:
1214     switch (*dec->cur++ & MINOR_MASK)
1215 root 1.1 {
1216 root 1.35 case SIMPLE_FALSE:
1217 root 1.1 #if CBOR_SLOW
1218 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1219 root 1.1 #endif
1220 root 1.10 return newSVsv (types_false);
1221 root 1.35 case SIMPLE_TRUE:
1222 root 1.1 #if CBOR_SLOW
1223 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1224 root 1.1 #endif
1225 root 1.10 return newSVsv (types_true);
1226 root 1.35 case SIMPLE_NULL:
1227 root 1.1 return newSVsv (&PL_sv_undef);
1228 root 1.35 case SIMPLE_UNDEF:
1229 root 1.10 #if CBOR_SLOW
1230     types_error = get_bool ("Types::Serialiser::error");
1231     #endif
1232     return newSVsv (types_error);
1233 root 1.1
1234 root 1.35 case MISC_FLOAT16:
1235 root 1.2 {
1236     WANT (2);
1237    
1238     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1239     dec->cur += 2;
1240    
1241     return newSVnv (ecb_binary16_to_float (fp));
1242     }
1243 root 1.1
1244 root 1.35 case MISC_FLOAT32:
1245 root 1.1 {
1246     uint32_t fp;
1247     WANT (4);
1248     memcpy (&fp, dec->cur, 4);
1249     dec->cur += 4;
1250    
1251     if (!ecb_big_endian ())
1252     fp = ecb_bswap32 (fp);
1253    
1254     return newSVnv (ecb_binary32_to_float (fp));
1255     }
1256    
1257 root 1.35 case MISC_FLOAT64:
1258 root 1.1 {
1259     uint64_t fp;
1260     WANT (8);
1261     memcpy (&fp, dec->cur, 8);
1262     dec->cur += 8;
1263    
1264     if (!ecb_big_endian ())
1265     fp = ecb_bswap64 (fp);
1266    
1267     return newSVnv (ecb_binary64_to_double (fp));
1268     }
1269    
1270 root 1.35 // 0..19 unassigned simple
1271 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1272     // 28-30 unassigned misc
1273     // 31 break code
1274 root 1.1 default:
1275 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1276 root 1.1 }
1277    
1278     break;
1279     }
1280    
1281     fail:
1282     return &PL_sv_undef;
1283     }
1284    
1285     static SV *
1286     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1287     {
1288 root 1.48 dec_t dec = { 0 };
1289 root 1.1 SV *sv;
1290 root 1.16 STRLEN len;
1291     char *data = SvPVbyte (string, len);
1292 root 1.1
1293 root 1.16 if (len > cbor->max_size && cbor->max_size)
1294     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1295     (unsigned long)len, (unsigned long)cbor->max_size);
1296 root 1.1
1297     dec.cbor = *cbor;
1298 root 1.16 dec.cur = (U8 *)data;
1299     dec.end = (U8 *)data + len;
1300 root 1.1
1301     sv = decode_sv (&dec);
1302    
1303     if (offset_return)
1304     *offset_return = dec.cur;
1305    
1306     if (!(offset_return || !sv))
1307 root 1.2 if (dec.cur != dec.end && !dec.err)
1308     dec.err = "garbage after CBOR object";
1309    
1310     if (dec.err)
1311 root 1.1 {
1312 root 1.39 if (dec.shareable)
1313     {
1314 root 1.58 // need to break cyclic links, which would all be in shareable
1315 root 1.39 int i;
1316     SV **svp;
1317    
1318     for (i = av_len (dec.shareable) + 1; i--; )
1319     if ((svp = av_fetch (dec.shareable, i, 0)))
1320     sv_setsv (*svp, &PL_sv_undef);
1321     }
1322    
1323 root 1.2 SvREFCNT_dec (sv);
1324 root 1.59
1325     if (dec.err_sv)
1326     sv_2mortal (dec.err_sv);
1327    
1328 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1329 root 1.1 }
1330    
1331     sv = sv_2mortal (sv);
1332    
1333     return sv;
1334     }
1335    
1336     /////////////////////////////////////////////////////////////////////////////
1337 root 1.40 // incremental parser
1338    
1339     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1340    
1341     // returns 0 for notyet, 1 for success or error
1342     static int
1343     incr_parse (CBOR *self, SV *cborstr)
1344     {
1345     STRLEN cur;
1346     SvPV (cborstr, cur);
1347    
1348     while (ecb_expect_true (self->incr_need <= cur))
1349     {
1350     // table of integer count bytes
1351     static I8 incr_len[MINOR_MASK + 1] = {
1352     0, 0, 0, 0, 0, 0, 0, 0,
1353     0, 0, 0, 0, 0, 0, 0, 0,
1354     0, 0, 0, 0, 0, 0, 0, 0,
1355     1, 2, 4, 8,-1,-1,-1,-2
1356     };
1357    
1358     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1359     U8 m = *p & MINOR_MASK;
1360     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1361     I8 ilen = incr_len[m];
1362    
1363     self->incr_need = self->incr_pos + 1;
1364    
1365     if (ecb_expect_false (ilen < 0))
1366     {
1367     if (m != MINOR_INDEF)
1368     return 1; // error
1369    
1370     if (*p == (MAJOR_MISC | MINOR_INDEF))
1371     {
1372     if (count >= 0)
1373     return 1; // error
1374    
1375     count = 1;
1376     }
1377     else
1378     {
1379     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1380     count = -1;
1381     }
1382     }
1383     else
1384     {
1385     self->incr_need += ilen;
1386     if (ecb_expect_false (self->incr_need > cur))
1387     return 0;
1388    
1389     int major = *p >> MAJOR_SHIFT;
1390    
1391     switch (major)
1392     {
1393 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1394     ++count; // tags merely prefix another value
1395     break;
1396    
1397 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1398     case MAJOR_TEXT >> MAJOR_SHIFT:
1399     case MAJOR_ARRAY >> MAJOR_SHIFT:
1400     case MAJOR_MAP >> MAJOR_SHIFT:
1401     {
1402     UV len;
1403    
1404     if (ecb_expect_false (ilen))
1405     {
1406     len = 0;
1407    
1408     do {
1409     len = (len << 8) | *++p;
1410     } while (--ilen);
1411     }
1412     else
1413     len = m;
1414    
1415     switch (major)
1416     {
1417     case MAJOR_BYTES >> MAJOR_SHIFT:
1418     case MAJOR_TEXT >> MAJOR_SHIFT:
1419     self->incr_need += len;
1420     if (ecb_expect_false (self->incr_need > cur))
1421     return 0;
1422    
1423     break;
1424    
1425     case MAJOR_MAP >> MAJOR_SHIFT:
1426     len <<= 1;
1427     case MAJOR_ARRAY >> MAJOR_SHIFT:
1428     if (len)
1429     {
1430     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1431     count = len + 1;
1432     }
1433     break;
1434     }
1435     }
1436     }
1437     }
1438    
1439     self->incr_pos = self->incr_need;
1440    
1441     if (count > 0)
1442     {
1443     while (!--count)
1444     {
1445     if (!AvFILLp (self->incr_count))
1446     return 1; // done
1447    
1448     SvREFCNT_dec_NN (av_pop (self->incr_count));
1449     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1450     }
1451    
1452     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1453     }
1454     }
1455    
1456     return 0;
1457     }
1458    
1459    
1460     /////////////////////////////////////////////////////////////////////////////
1461 root 1.1 // XS interface functions
1462    
1463     MODULE = CBOR::XS PACKAGE = CBOR::XS
1464    
1465     BOOT:
1466     {
1467     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1468 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1469 root 1.1
1470 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1471     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1472    
1473     types_true = get_bool ("Types::Serialiser::true" );
1474     types_false = get_bool ("Types::Serialiser::false");
1475     types_error = get_bool ("Types::Serialiser::error");
1476 root 1.11
1477 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1478    
1479 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1480     SvREADONLY_on (sv_cbor);
1481 root 1.56
1482     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1483 root 1.1 }
1484    
1485     PROTOTYPES: DISABLE
1486    
1487     void CLONE (...)
1488     CODE:
1489 root 1.10 cbor_stash = 0;
1490     cbor_tagged_stash = 0;
1491     types_error_stash = 0;
1492     types_boolean_stash = 0;
1493 root 1.1
1494     void new (char *klass)
1495     PPCODE:
1496     {
1497     SV *pv = NEWSV (0, sizeof (CBOR));
1498     SvPOK_only (pv);
1499     cbor_init ((CBOR *)SvPVX (pv));
1500     XPUSHs (sv_2mortal (sv_bless (
1501     newRV_noinc (pv),
1502     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1503     )));
1504     }
1505    
1506     void shrink (CBOR *self, int enable = 1)
1507     ALIAS:
1508     shrink = F_SHRINK
1509     allow_unknown = F_ALLOW_UNKNOWN
1510 root 1.18 allow_sharing = F_ALLOW_SHARING
1511 root 1.37 allow_cycles = F_ALLOW_CYCLES
1512 root 1.60 forbid_objects = F_FORBID_OBJECTS
1513 root 1.32 pack_strings = F_PACK_STRINGS
1514 root 1.54 text_keys = F_TEXT_KEYS
1515     text_strings = F_TEXT_STRINGS
1516 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1517 root 1.1 PPCODE:
1518     {
1519     if (enable)
1520     self->flags |= ix;
1521     else
1522     self->flags &= ~ix;
1523    
1524     XPUSHs (ST (0));
1525     }
1526    
1527     void get_shrink (CBOR *self)
1528     ALIAS:
1529     get_shrink = F_SHRINK
1530     get_allow_unknown = F_ALLOW_UNKNOWN
1531 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1532 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1533 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1534 root 1.32 get_pack_strings = F_PACK_STRINGS
1535 root 1.54 get_text_keys = F_TEXT_KEYS
1536     get_text_strings = F_TEXT_STRINGS
1537 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1538 root 1.1 PPCODE:
1539     XPUSHs (boolSV (self->flags & ix));
1540    
1541     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1542     PPCODE:
1543     self->max_depth = max_depth;
1544     XPUSHs (ST (0));
1545    
1546     U32 get_max_depth (CBOR *self)
1547     CODE:
1548     RETVAL = self->max_depth;
1549     OUTPUT:
1550     RETVAL
1551    
1552     void max_size (CBOR *self, U32 max_size = 0)
1553     PPCODE:
1554     self->max_size = max_size;
1555     XPUSHs (ST (0));
1556    
1557     int get_max_size (CBOR *self)
1558     CODE:
1559     RETVAL = self->max_size;
1560     OUTPUT:
1561     RETVAL
1562    
1563 root 1.27 void filter (CBOR *self, SV *filter = 0)
1564     PPCODE:
1565     SvREFCNT_dec (self->filter);
1566     self->filter = filter ? newSVsv (filter) : filter;
1567     XPUSHs (ST (0));
1568    
1569     SV *get_filter (CBOR *self)
1570     CODE:
1571     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1572     OUTPUT:
1573     RETVAL
1574    
1575 root 1.1 void encode (CBOR *self, SV *scalar)
1576     PPCODE:
1577     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1578     XPUSHs (scalar);
1579    
1580     void decode (CBOR *self, SV *cborstr)
1581     PPCODE:
1582     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1583     XPUSHs (cborstr);
1584    
1585     void decode_prefix (CBOR *self, SV *cborstr)
1586     PPCODE:
1587     {
1588     SV *sv;
1589     char *offset;
1590     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1591     EXTEND (SP, 2);
1592     PUSHs (sv);
1593     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1594     }
1595    
1596 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1597 root 1.42 ALIAS:
1598     incr_parse_multiple = 1
1599 root 1.40 PPCODE:
1600     {
1601     if (SvUTF8 (cborstr))
1602     sv_utf8_downgrade (cborstr, 0);
1603    
1604     if (!self->incr_count)
1605     {
1606     self->incr_count = newAV ();
1607     self->incr_pos = 0;
1608     self->incr_need = 1;
1609    
1610     av_push (self->incr_count, newSViv (1));
1611     }
1612    
1613 root 1.41 do
1614 root 1.40 {
1615     if (!incr_parse (self, cborstr))
1616     {
1617     if (self->incr_need > self->max_size && self->max_size)
1618     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1619     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1620    
1621     break;
1622     }
1623    
1624 root 1.41 SV *sv;
1625     char *offset;
1626 root 1.40
1627 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1628     XPUSHs (sv);
1629 root 1.40
1630 root 1.41 sv_chop (cborstr, offset);
1631 root 1.40
1632 root 1.41 av_clear (self->incr_count);
1633     av_push (self->incr_count, newSViv (1));
1634 root 1.40
1635 root 1.41 self->incr_pos = 0;
1636     self->incr_need = self->incr_pos + 1;
1637 root 1.40 }
1638 root 1.42 while (ix);
1639 root 1.40 }
1640    
1641     void incr_reset (CBOR *self)
1642     CODE:
1643     {
1644     SvREFCNT_dec (self->incr_count);
1645     self->incr_count = 0;
1646     }
1647    
1648 root 1.27 void DESTROY (CBOR *self)
1649     PPCODE:
1650     cbor_free (self);
1651    
1652 root 1.1 PROTOTYPES: ENABLE
1653    
1654     void encode_cbor (SV *scalar)
1655 root 1.36 ALIAS:
1656     encode_cbor = 0
1657     encode_cbor_sharing = F_ALLOW_SHARING
1658 root 1.1 PPCODE:
1659     {
1660     CBOR cbor;
1661     cbor_init (&cbor);
1662 root 1.36 cbor.flags |= ix;
1663 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1664     XPUSHs (scalar);
1665     }
1666    
1667     void decode_cbor (SV *cborstr)
1668     PPCODE:
1669     {
1670     CBOR cbor;
1671     cbor_init (&cbor);
1672     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1673     XPUSHs (cborstr);
1674     }
1675