ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.59
Committed: Fri Nov 25 23:37:27 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.58: +75 -4 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12 root 1.44 #define ECB_NO_THREADS 1
13 root 1.1 #include "ecb.h"
14    
15 root 1.14 // compatibility with perl <5.18
16     #ifndef HvNAMELEN_get
17     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
18     #endif
19     #ifndef HvNAMELEN
20     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
21     #endif
22     #ifndef HvNAMEUTF8
23     # define HvNAMEUTF8(hv) 0
24     #endif
25 root 1.28 #ifndef SvREFCNT_dec_NN
26     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
27     #endif
28 root 1.14
29 root 1.35 // known major and minor types
30     enum cbor_type
31     {
32     MAJOR_SHIFT = 5,
33     MINOR_MASK = 0x1f,
34    
35     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
36     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
37     MAJOR_BYTES = 2 << MAJOR_SHIFT,
38     MAJOR_TEXT = 3 << MAJOR_SHIFT,
39     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
40     MAJOR_MAP = 5 << MAJOR_SHIFT,
41     MAJOR_TAG = 6 << MAJOR_SHIFT,
42     MAJOR_MISC = 7 << MAJOR_SHIFT,
43    
44     // INT/STRING/ARRAY/MAP subtypes
45     LENGTH_EXT1 = 24,
46     LENGTH_EXT2 = 25,
47     LENGTH_EXT4 = 26,
48     LENGTH_EXT8 = 27,
49    
50     // SIMPLE types (effectively MISC subtypes)
51     SIMPLE_FALSE = 20,
52     SIMPLE_TRUE = 21,
53     SIMPLE_NULL = 22,
54     SIMPLE_UNDEF = 23,
55    
56     // MISC subtype (unused)
57     MISC_EXT1 = 24,
58     MISC_FLOAT16 = 25,
59     MISC_FLOAT32 = 26,
60     MISC_FLOAT64 = 27,
61    
62     // BYTES/TEXT/ARRAY/MAP
63     MINOR_INDEF = 31,
64     };
65    
66 root 1.9 // known tags
67 root 1.8 enum cbor_tag
68     {
69 root 1.35 // extensions
70     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
71     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
72     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
73     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
74     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
76     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
77    
78     // rfc7049
79     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
80     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
81     CBOR_TAG_POS_BIGNUM = 2, // byte string
82     CBOR_TAG_NEG_BIGNUM = 3, // byte string
83     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
84     CBOR_TAG_BIGFLOAT = 5, // array
85    
86     CBOR_TAG_CONV_B64U = 21, // base64url, any
87     CBOR_TAG_CONV_B64 = 22, // base64, any
88     CBOR_TAG_CONV_HEX = 23, // base16, any
89     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
90    
91     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
92     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
93     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
94     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
95     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
96 root 1.8
97 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
98 root 1.8 };
99    
100 root 1.24 #define F_SHRINK 0x00000001UL
101     #define F_ALLOW_UNKNOWN 0x00000002UL
102 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
103 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
104     #define F_PACK_STRINGS 0x00000010UL
105 root 1.54 #define F_TEXT_KEYS 0x00000020UL
106     #define F_TEXT_STRINGS 0x00000040UL
107 root 1.53 #define F_VALIDATE_UTF8 0x00000080UL
108 root 1.1
109     #define INIT_SIZE 32 // initial scalar size to be allocated
110    
111     #define SB do {
112     #define SE } while (0)
113    
114     #define IN_RANGE_INC(type,val,beg,end) \
115     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
116     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
117    
118     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
119    
120     #ifdef USE_ITHREADS
121     # define CBOR_SLOW 1
122     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
123     #else
124     # define CBOR_SLOW 0
125     # define CBOR_STASH cbor_stash
126     #endif
127    
128 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
129 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
130 root 1.1
131     typedef struct {
132     U32 flags;
133     U32 max_depth;
134     STRLEN max_size;
135 root 1.27 SV *filter;
136 root 1.40
137     // for the incremental parser
138     STRLEN incr_pos; // the current offset into the text
139     STRLEN incr_need; // minimum bytes needed to decode
140     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
141 root 1.1 } CBOR;
142    
143 root 1.5 ecb_inline void
144 root 1.1 cbor_init (CBOR *cbor)
145     {
146     Zero (cbor, 1, CBOR);
147     cbor->max_depth = 512;
148     }
149    
150 root 1.27 ecb_inline void
151     cbor_free (CBOR *cbor)
152     {
153     SvREFCNT_dec (cbor->filter);
154 root 1.40 SvREFCNT_dec (cbor->incr_count);
155 root 1.27 }
156    
157 root 1.1 /////////////////////////////////////////////////////////////////////////////
158     // utility functions
159    
160 root 1.5 ecb_inline SV *
161 root 1.1 get_bool (const char *name)
162     {
163     SV *sv = get_sv (name, 1);
164    
165     SvREADONLY_on (sv);
166     SvREADONLY_on (SvRV (sv));
167    
168     return sv;
169     }
170    
171 root 1.5 ecb_inline void
172 root 1.1 shrink (SV *sv)
173     {
174     sv_utf8_downgrade (sv, 1);
175    
176     if (SvLEN (sv) > SvCUR (sv) + 1)
177     {
178     #ifdef SvPV_shrink_to_cur
179     SvPV_shrink_to_cur (sv);
180     #elif defined (SvPV_renew)
181     SvPV_renew (sv, SvCUR (sv) + 1);
182     #endif
183     }
184     }
185    
186 root 1.21 // minimum length of a string to be registered for stringref
187     ecb_inline int
188     minimum_string_length (UV idx)
189     {
190     return idx > 23
191     ? idx > 0xffU
192     ? idx > 0xffffU
193     ? idx > 0xffffffffU
194 root 1.29 ? 11
195     : 7
196 root 1.21 : 5
197     : 4
198     : 3;
199     }
200    
201 root 1.1 /////////////////////////////////////////////////////////////////////////////
202     // encoder
203    
204     // structure used for encoding CBOR
205     typedef struct
206     {
207     char *cur; // SvPVX (sv) + current output position
208     char *end; // SvEND (sv)
209     SV *sv; // result scalar
210     CBOR cbor;
211     U32 depth; // recursion level
212 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
213     UV stringref_idx;
214 root 1.19 HV *shareable; // ptr => index, or 0
215     UV shareable_idx;
216 root 1.1 } enc_t;
217    
218 root 1.5 ecb_inline void
219 root 1.1 need (enc_t *enc, STRLEN len)
220     {
221 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
222 root 1.1 {
223     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
224     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
225     enc->cur = SvPVX (enc->sv) + cur;
226     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
227     }
228     }
229    
230 root 1.5 ecb_inline void
231 root 1.1 encode_ch (enc_t *enc, char ch)
232     {
233     need (enc, 1);
234     *enc->cur++ = ch;
235     }
236    
237     static void
238     encode_uint (enc_t *enc, int major, UV len)
239     {
240     need (enc, 9);
241    
242 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
243 root 1.1 *enc->cur++ = major | len;
244 root 1.36 else if (ecb_expect_true (len <= 0xffU))
245 root 1.1 {
246 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
247 root 1.1 *enc->cur++ = len;
248     }
249 root 1.36 else if (len <= 0xffffU)
250 root 1.1 {
251 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
252 root 1.1 *enc->cur++ = len >> 8;
253     *enc->cur++ = len;
254     }
255 root 1.36 else if (len <= 0xffffffffU)
256 root 1.1 {
257 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
258 root 1.1 *enc->cur++ = len >> 24;
259     *enc->cur++ = len >> 16;
260     *enc->cur++ = len >> 8;
261     *enc->cur++ = len;
262     }
263 root 1.4 else
264 root 1.1 {
265 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
266 root 1.1 *enc->cur++ = len >> 56;
267     *enc->cur++ = len >> 48;
268     *enc->cur++ = len >> 40;
269     *enc->cur++ = len >> 32;
270     *enc->cur++ = len >> 24;
271     *enc->cur++ = len >> 16;
272     *enc->cur++ = len >> 8;
273     *enc->cur++ = len;
274     }
275     }
276    
277 root 1.21 ecb_inline void
278     encode_tag (enc_t *enc, UV tag)
279     {
280 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
281 root 1.21 }
282    
283 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284     ecb_noinline static void
285     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286     {
287     STRLEN ulen = len;
288     U8 *p, *pend = (U8 *)str + len;
289    
290     for (p = (U8 *)str; p < pend; ++p)
291     ulen += *p >> 7; // count set high bits
292    
293     encode_uint (enc, MAJOR_TEXT, ulen);
294    
295     need (enc, ulen);
296     for (p = (U8 *)str; p < pend; ++p)
297     if (*p < 0x80)
298     *enc->cur++ = *p;
299     else
300     {
301     *enc->cur++ = 0xc0 + (*p >> 6);
302     *enc->cur++ = 0x80 + (*p & 63);
303     }
304     }
305    
306 root 1.30 ecb_inline void
307 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
308 root 1.30 {
309 root 1.53 if (ecb_expect_false (upgrade_utf8))
310 root 1.51 if (!utf8)
311     {
312 root 1.53 encode_str_utf8 (enc, utf8, str, len);
313 root 1.51 return;
314     }
315    
316 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
317 root 1.30 need (enc, len);
318     memcpy (enc->cur, str, len);
319     enc->cur += len;
320     }
321    
322 root 1.54 ecb_inline void
323 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
324 root 1.1 {
325 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
326 root 1.21 {
327 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
328 root 1.21
329     if (SvOK (*svp))
330     {
331     // already registered, use stringref
332     encode_tag (enc, CBOR_TAG_STRINGREF);
333 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
334 root 1.21 return;
335     }
336     else if (len >= minimum_string_length (enc->stringref_idx))
337     {
338     // register only
339     sv_setuv (*svp, enc->stringref_idx);
340     ++enc->stringref_idx;
341     }
342     }
343    
344 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
345 root 1.1 }
346    
347     static void encode_sv (enc_t *enc, SV *sv);
348    
349     static void
350     encode_av (enc_t *enc, AV *av)
351     {
352     int i, len = av_len (av);
353    
354     if (enc->depth >= enc->cbor.max_depth)
355     croak (ERR_NESTING_EXCEEDED);
356    
357     ++enc->depth;
358    
359 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
360 root 1.1
361 root 1.45 if (SvMAGICAL (av))
362     for (i = 0; i <= len; ++i)
363     {
364     SV **svp = av_fetch (av, i, 0);
365     encode_sv (enc, svp ? *svp : &PL_sv_undef);
366     }
367     else
368     for (i = 0; i <= len; ++i)
369 root 1.46 {
370     SV *sv = AvARRAY (av)[i];
371     encode_sv (enc, sv ? sv : &PL_sv_undef);
372     }
373 root 1.1
374     --enc->depth;
375     }
376    
377     static void
378     encode_hv (enc_t *enc, HV *hv)
379     {
380     HE *he;
381    
382     if (enc->depth >= enc->cbor.max_depth)
383     croak (ERR_NESTING_EXCEEDED);
384    
385     ++enc->depth;
386    
387     int pairs = hv_iterinit (hv);
388     int mg = SvMAGICAL (hv);
389    
390     if (mg)
391 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
392 root 1.1 else
393 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
394 root 1.1
395     while ((he = hv_iternext (hv)))
396     {
397 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
398     encode_sv (enc, HeSVKEY (he));
399 root 1.1 else
400 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
401 root 1.1
402 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
403 root 1.1 }
404    
405     if (mg)
406 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
407 root 1.1
408     --enc->depth;
409     }
410    
411     // encode objects, arrays and special \0=false and \1=true values.
412     static void
413     encode_rv (enc_t *enc, SV *sv)
414     {
415 root 1.19 SvGETMAGIC (sv);
416 root 1.1
417 root 1.19 svtype svt = SvTYPE (sv);
418 root 1.18
419 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
420 root 1.1 {
421 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
422     ? types_boolean_stash
423     : gv_stashpv ("Types::Serialiser::Boolean", 1);
424     HV *error_stash = !CBOR_SLOW || types_error_stash
425     ? types_error_stash
426     : gv_stashpv ("Types::Serialiser::Error", 1);
427 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
428     ? cbor_tagged_stash
429     : gv_stashpv ("CBOR::XS::Tagged" , 1);
430 root 1.1
431 root 1.11 HV *stash = SvSTASH (sv);
432    
433     if (stash == boolean_stash)
434 root 1.34 {
435 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
436 root 1.34 return;
437     }
438 root 1.11 else if (stash == error_stash)
439 root 1.34 {
440 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
441 root 1.34 return;
442     }
443 root 1.11 else if (stash == tagged_stash)
444 root 1.6 {
445     if (svt != SVt_PVAV)
446     croak ("encountered CBOR::XS::Tagged object that isn't an array");
447    
448 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
449 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
450 root 1.34
451     return;
452 root 1.6 }
453 root 1.34 }
454    
455     if (ecb_expect_false (SvREFCNT (sv) > 1)
456     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
457     {
458     if (!enc->shareable)
459     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
460    
461     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
462    
463     if (SvOK (*svp))
464     {
465     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
466 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
467 root 1.34 return;
468     }
469     else
470     {
471     sv_setuv (*svp, enc->shareable_idx);
472     ++enc->shareable_idx;
473     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
474     }
475     }
476    
477     if (ecb_expect_false (SvOBJECT (sv)))
478     {
479     HV *stash = SvSTASH (sv);
480     GV *method;
481    
482     if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
483 root 1.1 {
484 root 1.11 dSP;
485    
486 root 1.50 ENTER; SAVETMPS;
487     PUSHMARK (SP);
488 root 1.6 // we re-bless the reference to get overload and other niceties right
489 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
490    
491     PUTBACK;
492     // G_SCALAR ensures that return value is 1
493     call_sv ((SV *)GvCV (method), G_SCALAR);
494     SPAGAIN;
495    
496     // catch this surprisingly common error
497     if (SvROK (TOPs) && SvRV (TOPs) == sv)
498     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
499    
500     encode_sv (enc, POPs);
501    
502     PUTBACK;
503    
504     FREETMPS; LEAVE;
505     }
506     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
507     {
508     dSP;
509 root 1.6
510 root 1.50 ENTER; SAVETMPS;
511     SAVESTACK_POS ();
512     PUSHMARK (SP);
513 root 1.11 EXTEND (SP, 2);
514     // we re-bless the reference to get overload and other niceties right
515     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
516     PUSHs (sv_cbor);
517 root 1.1
518 root 1.11 PUTBACK;
519     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
520     SPAGAIN;
521 root 1.6
522 root 1.11 // catch this surprisingly common error
523     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
524     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
525 root 1.6
526 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
527 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
528 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
529 root 1.6
530 root 1.11 while (count)
531     encode_sv (enc, SP[1 - count--]);
532 root 1.6
533 root 1.11 PUTBACK;
534 root 1.6
535 root 1.11 FREETMPS; LEAVE;
536 root 1.1 }
537 root 1.11 else
538     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
539     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
540 root 1.1 }
541     else if (svt == SVt_PVHV)
542     encode_hv (enc, (HV *)sv);
543     else if (svt == SVt_PVAV)
544     encode_av (enc, (AV *)sv);
545 root 1.18 else
546 root 1.1 {
547 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
548     encode_sv (enc, sv);
549 root 1.1 }
550     }
551    
552     static void
553     encode_nv (enc_t *enc, SV *sv)
554     {
555     double nv = SvNVX (sv);
556    
557     need (enc, 9);
558    
559 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
560     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
561 root 1.1 //TODO: maybe I32?
562 root 1.5 else if (ecb_expect_false (nv == (float)nv))
563 root 1.1 {
564     uint32_t fp = ecb_float_to_binary32 (nv);
565    
566 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
567 root 1.1
568     if (!ecb_big_endian ())
569     fp = ecb_bswap32 (fp);
570    
571     memcpy (enc->cur, &fp, 4);
572     enc->cur += 4;
573     }
574     else
575     {
576     uint64_t fp = ecb_double_to_binary64 (nv);
577    
578 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
579 root 1.1
580     if (!ecb_big_endian ())
581     fp = ecb_bswap64 (fp);
582    
583     memcpy (enc->cur, &fp, 8);
584     enc->cur += 8;
585     }
586     }
587    
588     static void
589     encode_sv (enc_t *enc, SV *sv)
590     {
591     SvGETMAGIC (sv);
592    
593     if (SvPOKp (sv))
594     {
595     STRLEN len;
596     char *str = SvPV (sv, len);
597 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
598 root 1.1 }
599     else if (SvNOKp (sv))
600     encode_nv (enc, sv);
601     else if (SvIOKp (sv))
602     {
603     if (SvIsUV (sv))
604 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
605 root 1.1 else if (SvIVX (sv) >= 0)
606 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
607 root 1.1 else
608 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
609 root 1.1 }
610     else if (SvROK (sv))
611     encode_rv (enc, SvRV (sv));
612     else if (!SvOK (sv))
613 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
614 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
615 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
616 root 1.1 else
617     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
618     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
619     }
620    
621     static SV *
622     encode_cbor (SV *scalar, CBOR *cbor)
623     {
624 root 1.48 enc_t enc = { 0 };
625 root 1.1
626     enc.cbor = *cbor;
627     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
628     enc.cur = SvPVX (enc.sv);
629     enc.end = SvEND (enc.sv);
630    
631     SvPOK_only (enc.sv);
632 root 1.20
633 root 1.32 if (cbor->flags & F_PACK_STRINGS)
634 root 1.20 {
635     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
636     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
637     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
638     }
639    
640 root 1.1 encode_sv (&enc, scalar);
641    
642     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
643     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
644    
645     if (enc.cbor.flags & F_SHRINK)
646     shrink (enc.sv);
647    
648     return enc.sv;
649     }
650    
651     /////////////////////////////////////////////////////////////////////////////
652     // decoder
653    
654     // structure used for decoding CBOR
655     typedef struct
656     {
657     U8 *cur; // current parser pointer
658     U8 *end; // end of input string
659     const char *err; // parse error, if != 0
660     CBOR cbor;
661     U32 depth; // recursion depth
662     U32 maxdepth; // recursion depth limit
663 root 1.19 AV *shareable;
664 root 1.20 AV *stringref;
665 root 1.27 SV *decode_tagged;
666 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
667 root 1.1 } dec_t;
668    
669 root 1.59 // set dec->err to ERRSV
670     ecb_cold static void
671     err_errsv (dec_t *dec)
672     {
673     if (!dec->err)
674     {
675     dec->err_sv = newSVsv (ERRSV);
676    
677     // chop off the trailing \n
678     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
679     *SvEND (dec->err_sv) = 0;
680    
681     dec->err = SvPVutf8_nolen (dec->err_sv);
682     }
683     }
684 root 1.1
685 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
686     ecb_cold static void
687     err_set (dec_t *dec, const char *reason)
688     {
689     if (!dec->err)
690     dec->err = reason;
691     }
692    
693     ecb_cold static void
694     err_unexpected_end (dec_t *dec)
695     {
696     err_set (dec, "unexpected end of CBOR data");
697     }
698    
699     ecb_cold static void
700     err_nesting_exceeded (dec_t *dec)
701     {
702     err_set (dec, ERR_NESTING_EXCEEDED);
703     }
704    
705     #define ERR_DO(do) SB do; goto fail; SE
706     #define ERR(reason) ERR_DO (err_set (dec, reason))
707     #define ERR_ERRSV ERR_DO (err_errsv (dec))
708    
709     #define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR_DO (err_unexpected_end (dec))
710 root 1.1
711 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
712 root 1.1 #define DEC_DEC_DEPTH --dec->depth
713    
714     static UV
715     decode_uint (dec_t *dec)
716     {
717 root 1.35 U8 m = *dec->cur & MINOR_MASK;
718     ++dec->cur;
719    
720     if (ecb_expect_true (m < LENGTH_EXT1))
721     return m;
722 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
723     {
724     WANT (1);
725     dec->cur += 1;
726     return dec->cur[-1];
727     }
728     else if (ecb_expect_true (m == LENGTH_EXT2))
729     {
730     WANT (2);
731     dec->cur += 2;
732     return (((UV)dec->cur[-2]) << 8)
733     | ((UV)dec->cur[-1]);
734     }
735     else if (ecb_expect_true (m == LENGTH_EXT4))
736     {
737     WANT (4);
738     dec->cur += 4;
739     return (((UV)dec->cur[-4]) << 24)
740     | (((UV)dec->cur[-3]) << 16)
741     | (((UV)dec->cur[-2]) << 8)
742     | ((UV)dec->cur[-1]);
743     }
744     else if (ecb_expect_true (m == LENGTH_EXT8))
745 root 1.1 {
746 root 1.36 WANT (8);
747     dec->cur += 8;
748 root 1.34
749 root 1.36 return
750 root 1.34 #if UVSIZE < 8
751 root 1.36 0
752 root 1.34 #else
753 root 1.36 (((UV)dec->cur[-8]) << 56)
754     | (((UV)dec->cur[-7]) << 48)
755     | (((UV)dec->cur[-6]) << 40)
756     | (((UV)dec->cur[-5]) << 32)
757 root 1.34 #endif
758 root 1.36 | (((UV)dec->cur[-4]) << 24)
759     | (((UV)dec->cur[-3]) << 16)
760     | (((UV)dec->cur[-2]) << 8)
761     | ((UV)dec->cur[-1]);
762 root 1.1 }
763 root 1.36 else
764     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
765 root 1.1
766     fail:
767     return 0;
768     }
769    
770     static SV *decode_sv (dec_t *dec);
771    
772     static SV *
773     decode_av (dec_t *dec)
774     {
775     AV *av = newAV ();
776    
777     DEC_INC_DEPTH;
778    
779 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
780 root 1.1 {
781     ++dec->cur;
782    
783     for (;;)
784     {
785     WANT (1);
786    
787 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
788 root 1.1 {
789     ++dec->cur;
790     break;
791     }
792    
793     av_push (av, decode_sv (dec));
794     }
795     }
796     else
797     {
798 root 1.57 UV i, len = decode_uint (dec);
799 root 1.1
800 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
801 root 1.1 av_fill (av, len - 1);
802    
803     for (i = 0; i < len; ++i)
804     AvARRAY (av)[i] = decode_sv (dec);
805     }
806    
807     DEC_DEC_DEPTH;
808     return newRV_noinc ((SV *)av);
809    
810     fail:
811     SvREFCNT_dec (av);
812     DEC_DEC_DEPTH;
813     return &PL_sv_undef;
814     }
815    
816 root 1.16 static void
817     decode_he (dec_t *dec, HV *hv)
818     {
819     // for speed reasons, we specialcase single-string
820 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
821    
822 root 1.23 if (ecb_expect_true (!dec->stringref))
823 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
824 root 1.21 {
825 root 1.56 STRLEN len = decode_uint (dec);
826 root 1.21 char *key = (char *)dec->cur;
827 root 1.16
828 root 1.49 WANT (len);
829 root 1.21 dec->cur += len;
830    
831 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
832 root 1.16
833 root 1.21 return;
834     }
835 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
836 root 1.21 {
837 root 1.56 STRLEN len = decode_uint (dec);
838 root 1.21 char *key = (char *)dec->cur;
839 root 1.16
840 root 1.49 WANT (len);
841 root 1.21 dec->cur += len;
842 root 1.20
843 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
844     if (!is_utf8_string (key, len))
845     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
846 root 1.16
847 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
848 root 1.16
849 root 1.21 return;
850     }
851 root 1.20
852 root 1.21 SV *k = decode_sv (dec);
853     SV *v = decode_sv (dec);
854 root 1.16
855 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
856     // methods, and this is hopefully the only place where it can happen,
857     // so if there is a chance of an exception, take the very slow path.
858     // since catching exceptions is "undocumented/internal/forbidden" by
859     // the new p5p powers, we need to call out to a perl function :/
860     if (ecb_expect_false (SvAMAGIC (k)))
861     {
862     dSP;
863    
864     ENTER; SAVETMPS;
865     PUSHMARK (SP);
866     EXTEND (SP, 3);
867     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
868     PUSHs (sv_2mortal (k));
869     PUSHs (sv_2mortal (v));
870    
871     PUTBACK;
872     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
873     SPAGAIN;
874    
875     FREETMPS; LEAVE;
876    
877     if (SvTRUE (ERRSV))
878     ERR_ERRSV;
879    
880     return;
881     }
882    
883 root 1.21 hv_store_ent (hv, k, v, 0);
884     SvREFCNT_dec (k);
885 root 1.38
886     fail:
887     ;
888 root 1.16 }
889    
890 root 1.1 static SV *
891     decode_hv (dec_t *dec)
892     {
893     HV *hv = newHV ();
894    
895     DEC_INC_DEPTH;
896    
897 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
898 root 1.1 {
899     ++dec->cur;
900    
901     for (;;)
902     {
903     WANT (1);
904    
905 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
906 root 1.1 {
907     ++dec->cur;
908     break;
909     }
910    
911 root 1.16 decode_he (dec, hv);
912 root 1.1 }
913     }
914     else
915     {
916 root 1.57 UV pairs = decode_uint (dec);
917    
918     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
919 root 1.1
920 root 1.16 while (pairs--)
921     decode_he (dec, hv);
922 root 1.1 }
923    
924     DEC_DEC_DEPTH;
925     return newRV_noinc ((SV *)hv);
926    
927     fail:
928     SvREFCNT_dec (hv);
929     DEC_DEC_DEPTH;
930     return &PL_sv_undef;
931     }
932    
933     static SV *
934     decode_str (dec_t *dec, int utf8)
935     {
936 root 1.6 SV *sv = 0;
937 root 1.1
938 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
939 root 1.1 {
940 root 1.33 // indefinite length strings
941 root 1.1 ++dec->cur;
942    
943 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
944 root 1.33
945 root 1.1 sv = newSVpvn ("", 0);
946    
947     for (;;)
948     {
949     WANT (1);
950    
951 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
952     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
953 root 1.33 {
954     ++dec->cur;
955     break;
956     }
957     else
958     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
959    
960     STRLEN len = decode_uint (dec);
961 root 1.1
962 root 1.33 WANT (len);
963     sv_catpvn (sv, dec->cur, len);
964     dec->cur += len;
965 root 1.1 }
966     }
967     else
968     {
969     STRLEN len = decode_uint (dec);
970    
971     WANT (len);
972     sv = newSVpvn (dec->cur, len);
973     dec->cur += len;
974 root 1.25
975     if (ecb_expect_false (dec->stringref)
976     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
977     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
978 root 1.1 }
979    
980     if (utf8)
981 root 1.38 {
982     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
983     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
984     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
985    
986     SvUTF8_on (sv);
987     }
988 root 1.1
989     return sv;
990    
991     fail:
992 root 1.6 SvREFCNT_dec (sv);
993 root 1.1 return &PL_sv_undef;
994     }
995    
996     static SV *
997 root 1.3 decode_tagged (dec_t *dec)
998     {
999 root 1.19 SV *sv = 0;
1000 root 1.3 UV tag = decode_uint (dec);
1001 root 1.19
1002     WANT (1);
1003 root 1.3
1004 root 1.18 switch (tag)
1005     {
1006     case CBOR_TAG_MAGIC:
1007 root 1.20 sv = decode_sv (dec);
1008     break;
1009 root 1.18
1010     case CBOR_TAG_INDIRECTION:
1011 root 1.20 sv = newRV_noinc (decode_sv (dec));
1012     break;
1013    
1014     case CBOR_TAG_STRINGREF_NAMESPACE:
1015     {
1016 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
1017     // erase mortalised caches, e.g. "shareable"
1018     ENTER;
1019 root 1.20
1020     SAVESPTR (dec->stringref);
1021     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1022    
1023     sv = decode_sv (dec);
1024    
1025 root 1.55 LEAVE;
1026 root 1.20 }
1027     break;
1028    
1029     case CBOR_TAG_STRINGREF:
1030     {
1031 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1032 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1033    
1034     UV idx = decode_uint (dec);
1035    
1036     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
1037     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1038    
1039     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1040     }
1041     break;
1042 root 1.11
1043 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1044     {
1045     if (ecb_expect_false (!dec->shareable))
1046     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1047    
1048 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1049     {
1050     sv = newSV (0);
1051     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1052 root 1.18
1053 root 1.37 SV *osv = decode_sv (dec);
1054     sv_setsv (sv, osv);
1055     SvREFCNT_dec_NN (osv);
1056     }
1057     else
1058     {
1059     av_push (dec->shareable, &PL_sv_undef);
1060     int idx = AvFILLp (dec->shareable);
1061     sv = decode_sv (dec);
1062     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1063     }
1064 root 1.19 }
1065 root 1.20 break;
1066 root 1.18
1067     case CBOR_TAG_VALUE_SHAREDREF:
1068 root 1.17 {
1069 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1070 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1071 root 1.18
1072 root 1.19 UV idx = decode_uint (dec);
1073    
1074 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1075 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1076    
1077 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1078 root 1.37
1079     if (sv == &PL_sv_undef)
1080     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1081 root 1.17 }
1082 root 1.20 break;
1083 root 1.17
1084 root 1.18 case CBOR_TAG_PERL_OBJECT:
1085     {
1086 root 1.19 sv = decode_sv (dec);
1087    
1088 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1089     ERR ("corrupted CBOR data (non-array perl object)");
1090    
1091     AV *av = (AV *)SvRV (sv);
1092     int len = av_len (av) + 1;
1093     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1094    
1095     if (!stash)
1096     ERR ("cannot decode perl-object (package does not exist)");
1097    
1098     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1099    
1100     if (!method)
1101     ERR ("cannot decode perl-object (package does not have a THAW method)");
1102    
1103     dSP;
1104    
1105 root 1.50 ENTER; SAVETMPS;
1106     PUSHMARK (SP);
1107 root 1.18 EXTEND (SP, len + 1);
1108     // we re-bless the reference to get overload and other niceties right
1109     PUSHs (*av_fetch (av, 0, 1));
1110     PUSHs (sv_cbor);
1111    
1112     int i;
1113    
1114     for (i = 1; i < len; ++i)
1115     PUSHs (*av_fetch (av, i, 1));
1116    
1117     PUTBACK;
1118     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1119     SPAGAIN;
1120    
1121     if (SvTRUE (ERRSV))
1122     {
1123     FREETMPS; LEAVE;
1124 root 1.59 ERR_ERRSV;
1125 root 1.18 }
1126    
1127     SvREFCNT_dec (sv);
1128     sv = SvREFCNT_inc (POPs);
1129 root 1.11
1130 root 1.18 PUTBACK;
1131 root 1.11
1132 root 1.18 FREETMPS; LEAVE;
1133     }
1134 root 1.20 break;
1135 root 1.9
1136 root 1.18 default:
1137     {
1138 root 1.58 SV *tag_sv = newSVuv (tag);
1139    
1140 root 1.19 sv = decode_sv (dec);
1141    
1142 root 1.27 dSP;
1143 root 1.50 ENTER; SAVETMPS;
1144     SAVESTACK_POS ();
1145     PUSHMARK (SP);
1146 root 1.27 EXTEND (SP, 2);
1147 root 1.58 PUSHs (tag_sv);
1148 root 1.27 PUSHs (sv);
1149    
1150     PUTBACK;
1151     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1152     SPAGAIN;
1153    
1154     if (SvTRUE (ERRSV))
1155     {
1156 root 1.58 SvREFCNT_dec (tag_sv);
1157 root 1.27 FREETMPS; LEAVE;
1158 root 1.59 ERR_ERRSV;
1159 root 1.27 }
1160    
1161     if (count)
1162     {
1163 root 1.58 SvREFCNT_dec (tag_sv);
1164 root 1.27 SvREFCNT_dec (sv);
1165     sv = SvREFCNT_inc (POPs);
1166     }
1167     else
1168     {
1169     AV *av = newAV ();
1170 root 1.58 av_push (av, tag_sv);
1171 root 1.27 av_push (av, sv);
1172    
1173     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1174     ? cbor_tagged_stash
1175     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1176     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1177     }
1178 root 1.7
1179 root 1.27 PUTBACK;
1180    
1181     FREETMPS; LEAVE;
1182 root 1.18 }
1183 root 1.20 break;
1184 root 1.11 }
1185 root 1.9
1186 root 1.20 return sv;
1187    
1188 root 1.9 fail:
1189     SvREFCNT_dec (sv);
1190     return &PL_sv_undef;
1191 root 1.3 }
1192    
1193     static SV *
1194 root 1.1 decode_sv (dec_t *dec)
1195     {
1196     WANT (1);
1197    
1198 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1199 root 1.1 {
1200 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1201     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1202     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1203     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1204     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1205     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1206     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1207    
1208     case MAJOR_MISC >> MAJOR_SHIFT:
1209     switch (*dec->cur++ & MINOR_MASK)
1210 root 1.1 {
1211 root 1.35 case SIMPLE_FALSE:
1212 root 1.1 #if CBOR_SLOW
1213 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1214 root 1.1 #endif
1215 root 1.10 return newSVsv (types_false);
1216 root 1.35 case SIMPLE_TRUE:
1217 root 1.1 #if CBOR_SLOW
1218 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1219 root 1.1 #endif
1220 root 1.10 return newSVsv (types_true);
1221 root 1.35 case SIMPLE_NULL:
1222 root 1.1 return newSVsv (&PL_sv_undef);
1223 root 1.35 case SIMPLE_UNDEF:
1224 root 1.10 #if CBOR_SLOW
1225     types_error = get_bool ("Types::Serialiser::error");
1226     #endif
1227     return newSVsv (types_error);
1228 root 1.1
1229 root 1.35 case MISC_FLOAT16:
1230 root 1.2 {
1231     WANT (2);
1232    
1233     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1234     dec->cur += 2;
1235    
1236     return newSVnv (ecb_binary16_to_float (fp));
1237     }
1238 root 1.1
1239 root 1.35 case MISC_FLOAT32:
1240 root 1.1 {
1241     uint32_t fp;
1242     WANT (4);
1243     memcpy (&fp, dec->cur, 4);
1244     dec->cur += 4;
1245    
1246     if (!ecb_big_endian ())
1247     fp = ecb_bswap32 (fp);
1248    
1249     return newSVnv (ecb_binary32_to_float (fp));
1250     }
1251    
1252 root 1.35 case MISC_FLOAT64:
1253 root 1.1 {
1254     uint64_t fp;
1255     WANT (8);
1256     memcpy (&fp, dec->cur, 8);
1257     dec->cur += 8;
1258    
1259     if (!ecb_big_endian ())
1260     fp = ecb_bswap64 (fp);
1261    
1262     return newSVnv (ecb_binary64_to_double (fp));
1263     }
1264    
1265 root 1.35 // 0..19 unassigned simple
1266 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1267     // 28-30 unassigned misc
1268     // 31 break code
1269 root 1.1 default:
1270 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1271 root 1.1 }
1272    
1273     break;
1274     }
1275    
1276     fail:
1277     return &PL_sv_undef;
1278     }
1279    
1280     static SV *
1281     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1282     {
1283 root 1.48 dec_t dec = { 0 };
1284 root 1.1 SV *sv;
1285 root 1.16 STRLEN len;
1286     char *data = SvPVbyte (string, len);
1287 root 1.1
1288 root 1.16 if (len > cbor->max_size && cbor->max_size)
1289     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1290     (unsigned long)len, (unsigned long)cbor->max_size);
1291 root 1.1
1292     dec.cbor = *cbor;
1293 root 1.16 dec.cur = (U8 *)data;
1294     dec.end = (U8 *)data + len;
1295 root 1.1
1296     sv = decode_sv (&dec);
1297    
1298     if (offset_return)
1299     *offset_return = dec.cur;
1300    
1301     if (!(offset_return || !sv))
1302 root 1.2 if (dec.cur != dec.end && !dec.err)
1303     dec.err = "garbage after CBOR object";
1304    
1305     if (dec.err)
1306 root 1.1 {
1307 root 1.39 if (dec.shareable)
1308     {
1309 root 1.58 // need to break cyclic links, which would all be in shareable
1310 root 1.39 int i;
1311     SV **svp;
1312    
1313     for (i = av_len (dec.shareable) + 1; i--; )
1314     if ((svp = av_fetch (dec.shareable, i, 0)))
1315     sv_setsv (*svp, &PL_sv_undef);
1316     }
1317    
1318 root 1.2 SvREFCNT_dec (sv);
1319 root 1.59
1320     if (dec.err_sv)
1321     sv_2mortal (dec.err_sv);
1322    
1323 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1324 root 1.1 }
1325    
1326     sv = sv_2mortal (sv);
1327    
1328     return sv;
1329     }
1330    
1331     /////////////////////////////////////////////////////////////////////////////
1332 root 1.40 // incremental parser
1333    
1334     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1335    
1336     // returns 0 for notyet, 1 for success or error
1337     static int
1338     incr_parse (CBOR *self, SV *cborstr)
1339     {
1340     STRLEN cur;
1341     SvPV (cborstr, cur);
1342    
1343     while (ecb_expect_true (self->incr_need <= cur))
1344     {
1345     // table of integer count bytes
1346     static I8 incr_len[MINOR_MASK + 1] = {
1347     0, 0, 0, 0, 0, 0, 0, 0,
1348     0, 0, 0, 0, 0, 0, 0, 0,
1349     0, 0, 0, 0, 0, 0, 0, 0,
1350     1, 2, 4, 8,-1,-1,-1,-2
1351     };
1352    
1353     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1354     U8 m = *p & MINOR_MASK;
1355     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1356     I8 ilen = incr_len[m];
1357    
1358     self->incr_need = self->incr_pos + 1;
1359    
1360     if (ecb_expect_false (ilen < 0))
1361     {
1362     if (m != MINOR_INDEF)
1363     return 1; // error
1364    
1365     if (*p == (MAJOR_MISC | MINOR_INDEF))
1366     {
1367     if (count >= 0)
1368     return 1; // error
1369    
1370     count = 1;
1371     }
1372     else
1373     {
1374     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1375     count = -1;
1376     }
1377     }
1378     else
1379     {
1380     self->incr_need += ilen;
1381     if (ecb_expect_false (self->incr_need > cur))
1382     return 0;
1383    
1384     int major = *p >> MAJOR_SHIFT;
1385    
1386     switch (major)
1387     {
1388 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1389     ++count; // tags merely prefix another value
1390     break;
1391    
1392 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1393     case MAJOR_TEXT >> MAJOR_SHIFT:
1394     case MAJOR_ARRAY >> MAJOR_SHIFT:
1395     case MAJOR_MAP >> MAJOR_SHIFT:
1396     {
1397     UV len;
1398    
1399     if (ecb_expect_false (ilen))
1400     {
1401     len = 0;
1402    
1403     do {
1404     len = (len << 8) | *++p;
1405     } while (--ilen);
1406     }
1407     else
1408     len = m;
1409    
1410     switch (major)
1411     {
1412     case MAJOR_BYTES >> MAJOR_SHIFT:
1413     case MAJOR_TEXT >> MAJOR_SHIFT:
1414     self->incr_need += len;
1415     if (ecb_expect_false (self->incr_need > cur))
1416     return 0;
1417    
1418     break;
1419    
1420     case MAJOR_MAP >> MAJOR_SHIFT:
1421     len <<= 1;
1422     case MAJOR_ARRAY >> MAJOR_SHIFT:
1423     if (len)
1424     {
1425     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1426     count = len + 1;
1427     }
1428     break;
1429     }
1430     }
1431     }
1432     }
1433    
1434     self->incr_pos = self->incr_need;
1435    
1436     if (count > 0)
1437     {
1438     while (!--count)
1439     {
1440     if (!AvFILLp (self->incr_count))
1441     return 1; // done
1442    
1443     SvREFCNT_dec_NN (av_pop (self->incr_count));
1444     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1445     }
1446    
1447     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1448     }
1449     }
1450    
1451     return 0;
1452     }
1453    
1454    
1455     /////////////////////////////////////////////////////////////////////////////
1456 root 1.1 // XS interface functions
1457    
1458     MODULE = CBOR::XS PACKAGE = CBOR::XS
1459    
1460     BOOT:
1461     {
1462     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1463 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1464 root 1.1
1465 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1466     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1467    
1468     types_true = get_bool ("Types::Serialiser::true" );
1469     types_false = get_bool ("Types::Serialiser::false");
1470     types_error = get_bool ("Types::Serialiser::error");
1471 root 1.11
1472 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1473    
1474 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1475     SvREADONLY_on (sv_cbor);
1476 root 1.56
1477     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1478 root 1.1 }
1479    
1480     PROTOTYPES: DISABLE
1481    
1482     void CLONE (...)
1483     CODE:
1484 root 1.10 cbor_stash = 0;
1485     cbor_tagged_stash = 0;
1486     types_error_stash = 0;
1487     types_boolean_stash = 0;
1488 root 1.1
1489     void new (char *klass)
1490     PPCODE:
1491     {
1492     SV *pv = NEWSV (0, sizeof (CBOR));
1493     SvPOK_only (pv);
1494     cbor_init ((CBOR *)SvPVX (pv));
1495     XPUSHs (sv_2mortal (sv_bless (
1496     newRV_noinc (pv),
1497     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1498     )));
1499     }
1500    
1501     void shrink (CBOR *self, int enable = 1)
1502     ALIAS:
1503     shrink = F_SHRINK
1504     allow_unknown = F_ALLOW_UNKNOWN
1505 root 1.18 allow_sharing = F_ALLOW_SHARING
1506 root 1.37 allow_cycles = F_ALLOW_CYCLES
1507 root 1.32 pack_strings = F_PACK_STRINGS
1508 root 1.54 text_keys = F_TEXT_KEYS
1509     text_strings = F_TEXT_STRINGS
1510 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1511 root 1.1 PPCODE:
1512     {
1513     if (enable)
1514     self->flags |= ix;
1515     else
1516     self->flags &= ~ix;
1517    
1518     XPUSHs (ST (0));
1519     }
1520    
1521     void get_shrink (CBOR *self)
1522     ALIAS:
1523     get_shrink = F_SHRINK
1524     get_allow_unknown = F_ALLOW_UNKNOWN
1525 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1526 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1527 root 1.32 get_pack_strings = F_PACK_STRINGS
1528 root 1.54 get_text_keys = F_TEXT_KEYS
1529     get_text_strings = F_TEXT_STRINGS
1530 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1531 root 1.1 PPCODE:
1532     XPUSHs (boolSV (self->flags & ix));
1533    
1534     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1535     PPCODE:
1536     self->max_depth = max_depth;
1537     XPUSHs (ST (0));
1538    
1539     U32 get_max_depth (CBOR *self)
1540     CODE:
1541     RETVAL = self->max_depth;
1542     OUTPUT:
1543     RETVAL
1544    
1545     void max_size (CBOR *self, U32 max_size = 0)
1546     PPCODE:
1547     self->max_size = max_size;
1548     XPUSHs (ST (0));
1549    
1550     int get_max_size (CBOR *self)
1551     CODE:
1552     RETVAL = self->max_size;
1553     OUTPUT:
1554     RETVAL
1555    
1556 root 1.27 void filter (CBOR *self, SV *filter = 0)
1557     PPCODE:
1558     SvREFCNT_dec (self->filter);
1559     self->filter = filter ? newSVsv (filter) : filter;
1560     XPUSHs (ST (0));
1561    
1562     SV *get_filter (CBOR *self)
1563     CODE:
1564     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1565     OUTPUT:
1566     RETVAL
1567    
1568 root 1.1 void encode (CBOR *self, SV *scalar)
1569     PPCODE:
1570     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1571     XPUSHs (scalar);
1572    
1573     void decode (CBOR *self, SV *cborstr)
1574     PPCODE:
1575     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1576     XPUSHs (cborstr);
1577    
1578     void decode_prefix (CBOR *self, SV *cborstr)
1579     PPCODE:
1580     {
1581     SV *sv;
1582     char *offset;
1583     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1584     EXTEND (SP, 2);
1585     PUSHs (sv);
1586     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1587     }
1588    
1589 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1590 root 1.42 ALIAS:
1591     incr_parse_multiple = 1
1592 root 1.40 PPCODE:
1593     {
1594     if (SvUTF8 (cborstr))
1595     sv_utf8_downgrade (cborstr, 0);
1596    
1597     if (!self->incr_count)
1598     {
1599     self->incr_count = newAV ();
1600     self->incr_pos = 0;
1601     self->incr_need = 1;
1602    
1603     av_push (self->incr_count, newSViv (1));
1604     }
1605    
1606 root 1.41 do
1607 root 1.40 {
1608     if (!incr_parse (self, cborstr))
1609     {
1610     if (self->incr_need > self->max_size && self->max_size)
1611     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1612     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1613    
1614     break;
1615     }
1616    
1617 root 1.41 SV *sv;
1618     char *offset;
1619 root 1.40
1620 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1621     XPUSHs (sv);
1622 root 1.40
1623 root 1.41 sv_chop (cborstr, offset);
1624 root 1.40
1625 root 1.41 av_clear (self->incr_count);
1626     av_push (self->incr_count, newSViv (1));
1627 root 1.40
1628 root 1.41 self->incr_pos = 0;
1629     self->incr_need = self->incr_pos + 1;
1630 root 1.40 }
1631 root 1.42 while (ix);
1632 root 1.40 }
1633    
1634     void incr_reset (CBOR *self)
1635     CODE:
1636     {
1637     SvREFCNT_dec (self->incr_count);
1638     self->incr_count = 0;
1639     }
1640    
1641 root 1.27 void DESTROY (CBOR *self)
1642     PPCODE:
1643     cbor_free (self);
1644    
1645 root 1.1 PROTOTYPES: ENABLE
1646    
1647     void encode_cbor (SV *scalar)
1648 root 1.36 ALIAS:
1649     encode_cbor = 0
1650     encode_cbor_sharing = F_ALLOW_SHARING
1651 root 1.1 PPCODE:
1652     {
1653     CBOR cbor;
1654     cbor_init (&cbor);
1655 root 1.36 cbor.flags |= ix;
1656 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1657     XPUSHs (scalar);
1658     }
1659    
1660     void decode_cbor (SV *cborstr)
1661     PPCODE:
1662     {
1663     CBOR cbor;
1664     cbor_init (&cbor);
1665     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1666     XPUSHs (cborstr);
1667     }
1668