ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.61
Committed: Sat Nov 26 02:10:19 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.60: +9 -11 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.28 #ifndef SvREFCNT_dec_NN
27     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
28     #endif
29 root 1.14
30 root 1.35 // known major and minor types
31     enum cbor_type
32     {
33     MAJOR_SHIFT = 5,
34     MINOR_MASK = 0x1f,
35    
36     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
37     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
38     MAJOR_BYTES = 2 << MAJOR_SHIFT,
39     MAJOR_TEXT = 3 << MAJOR_SHIFT,
40     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
41     MAJOR_MAP = 5 << MAJOR_SHIFT,
42     MAJOR_TAG = 6 << MAJOR_SHIFT,
43     MAJOR_MISC = 7 << MAJOR_SHIFT,
44    
45     // INT/STRING/ARRAY/MAP subtypes
46     LENGTH_EXT1 = 24,
47     LENGTH_EXT2 = 25,
48     LENGTH_EXT4 = 26,
49     LENGTH_EXT8 = 27,
50    
51     // SIMPLE types (effectively MISC subtypes)
52     SIMPLE_FALSE = 20,
53     SIMPLE_TRUE = 21,
54     SIMPLE_NULL = 22,
55     SIMPLE_UNDEF = 23,
56    
57     // MISC subtype (unused)
58     MISC_EXT1 = 24,
59     MISC_FLOAT16 = 25,
60     MISC_FLOAT32 = 26,
61     MISC_FLOAT64 = 27,
62    
63     // BYTES/TEXT/ARRAY/MAP
64     MINOR_INDEF = 31,
65     };
66    
67 root 1.9 // known tags
68 root 1.8 enum cbor_tag
69     {
70 root 1.35 // extensions
71     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
72     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
73     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
74     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
76     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
77     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
78    
79     // rfc7049
80     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
81     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
82     CBOR_TAG_POS_BIGNUM = 2, // byte string
83     CBOR_TAG_NEG_BIGNUM = 3, // byte string
84     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
85     CBOR_TAG_BIGFLOAT = 5, // array
86    
87     CBOR_TAG_CONV_B64U = 21, // base64url, any
88     CBOR_TAG_CONV_B64 = 22, // base64, any
89     CBOR_TAG_CONV_HEX = 23, // base16, any
90     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
91    
92     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
93     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
94     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
95     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
96     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
97 root 1.8
98 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
99 root 1.8 };
100    
101 root 1.24 #define F_SHRINK 0x00000001UL
102     #define F_ALLOW_UNKNOWN 0x00000002UL
103 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
104 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
105 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
106     #define F_PACK_STRINGS 0x00000020UL
107     #define F_TEXT_KEYS 0x00000040UL
108     #define F_TEXT_STRINGS 0x00000080UL
109     #define F_VALIDATE_UTF8 0x00000100UL
110 root 1.1
111     #define INIT_SIZE 32 // initial scalar size to be allocated
112    
113     #define SB do {
114     #define SE } while (0)
115    
116     #define IN_RANGE_INC(type,val,beg,end) \
117     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
118     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
119    
120     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
121    
122     #ifdef USE_ITHREADS
123     # define CBOR_SLOW 1
124     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
125     #else
126     # define CBOR_SLOW 0
127     # define CBOR_STASH cbor_stash
128     #endif
129    
130 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
131 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
132 root 1.1
133     typedef struct {
134     U32 flags;
135     U32 max_depth;
136     STRLEN max_size;
137 root 1.27 SV *filter;
138 root 1.40
139     // for the incremental parser
140     STRLEN incr_pos; // the current offset into the text
141     STRLEN incr_need; // minimum bytes needed to decode
142     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
143 root 1.1 } CBOR;
144    
145 root 1.5 ecb_inline void
146 root 1.1 cbor_init (CBOR *cbor)
147     {
148     Zero (cbor, 1, CBOR);
149     cbor->max_depth = 512;
150     }
151    
152 root 1.27 ecb_inline void
153     cbor_free (CBOR *cbor)
154     {
155     SvREFCNT_dec (cbor->filter);
156 root 1.40 SvREFCNT_dec (cbor->incr_count);
157 root 1.27 }
158    
159 root 1.1 /////////////////////////////////////////////////////////////////////////////
160     // utility functions
161    
162 root 1.5 ecb_inline SV *
163 root 1.1 get_bool (const char *name)
164     {
165     SV *sv = get_sv (name, 1);
166    
167     SvREADONLY_on (sv);
168     SvREADONLY_on (SvRV (sv));
169    
170     return sv;
171     }
172    
173 root 1.5 ecb_inline void
174 root 1.1 shrink (SV *sv)
175     {
176     sv_utf8_downgrade (sv, 1);
177    
178     if (SvLEN (sv) > SvCUR (sv) + 1)
179     {
180     #ifdef SvPV_shrink_to_cur
181     SvPV_shrink_to_cur (sv);
182     #elif defined (SvPV_renew)
183     SvPV_renew (sv, SvCUR (sv) + 1);
184     #endif
185     }
186     }
187    
188 root 1.21 // minimum length of a string to be registered for stringref
189     ecb_inline int
190     minimum_string_length (UV idx)
191     {
192 root 1.61
193     return idx <= 23 ? 3
194     : idx <= 0xffU ? 4
195     : idx <= 0xffffU ? 5
196     : idx <= 0xffffffffU ? 7
197     : 11;
198 root 1.21 }
199    
200 root 1.1 /////////////////////////////////////////////////////////////////////////////
201     // encoder
202    
203     // structure used for encoding CBOR
204     typedef struct
205     {
206     char *cur; // SvPVX (sv) + current output position
207     char *end; // SvEND (sv)
208     SV *sv; // result scalar
209     CBOR cbor;
210     U32 depth; // recursion level
211 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
212     UV stringref_idx;
213 root 1.19 HV *shareable; // ptr => index, or 0
214     UV shareable_idx;
215 root 1.1 } enc_t;
216    
217 root 1.5 ecb_inline void
218 root 1.1 need (enc_t *enc, STRLEN len)
219     {
220 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
221 root 1.1 {
222     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
223     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
224     enc->cur = SvPVX (enc->sv) + cur;
225     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
226     }
227     }
228    
229 root 1.5 ecb_inline void
230 root 1.1 encode_ch (enc_t *enc, char ch)
231     {
232     need (enc, 1);
233     *enc->cur++ = ch;
234     }
235    
236     static void
237     encode_uint (enc_t *enc, int major, UV len)
238     {
239     need (enc, 9);
240    
241 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
242 root 1.1 *enc->cur++ = major | len;
243 root 1.36 else if (ecb_expect_true (len <= 0xffU))
244 root 1.1 {
245 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
246 root 1.1 *enc->cur++ = len;
247     }
248 root 1.36 else if (len <= 0xffffU)
249 root 1.1 {
250 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
251 root 1.1 *enc->cur++ = len >> 8;
252     *enc->cur++ = len;
253     }
254 root 1.36 else if (len <= 0xffffffffU)
255 root 1.1 {
256 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
257 root 1.1 *enc->cur++ = len >> 24;
258     *enc->cur++ = len >> 16;
259     *enc->cur++ = len >> 8;
260     *enc->cur++ = len;
261     }
262 root 1.4 else
263 root 1.1 {
264 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
265 root 1.1 *enc->cur++ = len >> 56;
266     *enc->cur++ = len >> 48;
267     *enc->cur++ = len >> 40;
268     *enc->cur++ = len >> 32;
269     *enc->cur++ = len >> 24;
270     *enc->cur++ = len >> 16;
271     *enc->cur++ = len >> 8;
272     *enc->cur++ = len;
273     }
274     }
275    
276 root 1.21 ecb_inline void
277     encode_tag (enc_t *enc, UV tag)
278     {
279 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
280 root 1.21 }
281    
282 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
283     ecb_noinline static void
284     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
285     {
286     STRLEN ulen = len;
287     U8 *p, *pend = (U8 *)str + len;
288    
289     for (p = (U8 *)str; p < pend; ++p)
290     ulen += *p >> 7; // count set high bits
291    
292     encode_uint (enc, MAJOR_TEXT, ulen);
293    
294     need (enc, ulen);
295     for (p = (U8 *)str; p < pend; ++p)
296     if (*p < 0x80)
297     *enc->cur++ = *p;
298     else
299     {
300     *enc->cur++ = 0xc0 + (*p >> 6);
301     *enc->cur++ = 0x80 + (*p & 63);
302     }
303     }
304    
305 root 1.30 ecb_inline void
306 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
307 root 1.30 {
308 root 1.53 if (ecb_expect_false (upgrade_utf8))
309 root 1.51 if (!utf8)
310     {
311 root 1.53 encode_str_utf8 (enc, utf8, str, len);
312 root 1.51 return;
313     }
314    
315 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
316 root 1.30 need (enc, len);
317     memcpy (enc->cur, str, len);
318     enc->cur += len;
319     }
320    
321 root 1.54 ecb_inline void
322 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
323 root 1.1 {
324 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
325 root 1.21 {
326 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
327 root 1.21
328     if (SvOK (*svp))
329     {
330     // already registered, use stringref
331     encode_tag (enc, CBOR_TAG_STRINGREF);
332 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
333 root 1.21 return;
334     }
335     else if (len >= minimum_string_length (enc->stringref_idx))
336     {
337     // register only
338     sv_setuv (*svp, enc->stringref_idx);
339     ++enc->stringref_idx;
340     }
341     }
342    
343 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
344 root 1.1 }
345    
346     static void encode_sv (enc_t *enc, SV *sv);
347    
348     static void
349     encode_av (enc_t *enc, AV *av)
350     {
351     int i, len = av_len (av);
352    
353     if (enc->depth >= enc->cbor.max_depth)
354     croak (ERR_NESTING_EXCEEDED);
355    
356     ++enc->depth;
357    
358 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
359 root 1.1
360 root 1.45 if (SvMAGICAL (av))
361     for (i = 0; i <= len; ++i)
362     {
363     SV **svp = av_fetch (av, i, 0);
364     encode_sv (enc, svp ? *svp : &PL_sv_undef);
365     }
366     else
367     for (i = 0; i <= len; ++i)
368 root 1.46 {
369     SV *sv = AvARRAY (av)[i];
370     encode_sv (enc, sv ? sv : &PL_sv_undef);
371     }
372 root 1.1
373     --enc->depth;
374     }
375    
376     static void
377     encode_hv (enc_t *enc, HV *hv)
378     {
379     HE *he;
380    
381     if (enc->depth >= enc->cbor.max_depth)
382     croak (ERR_NESTING_EXCEEDED);
383    
384     ++enc->depth;
385    
386     int pairs = hv_iterinit (hv);
387     int mg = SvMAGICAL (hv);
388    
389     if (mg)
390 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
391 root 1.1 else
392 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
393 root 1.1
394     while ((he = hv_iternext (hv)))
395     {
396 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
397     encode_sv (enc, HeSVKEY (he));
398 root 1.1 else
399 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
400 root 1.1
401 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
402 root 1.1 }
403    
404     if (mg)
405 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
406 root 1.1
407     --enc->depth;
408     }
409    
410     // encode objects, arrays and special \0=false and \1=true values.
411     static void
412     encode_rv (enc_t *enc, SV *sv)
413     {
414 root 1.19 SvGETMAGIC (sv);
415 root 1.1
416 root 1.19 svtype svt = SvTYPE (sv);
417 root 1.18
418 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
419 root 1.1 {
420 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
421     ? types_boolean_stash
422     : gv_stashpv ("Types::Serialiser::Boolean", 1);
423     HV *error_stash = !CBOR_SLOW || types_error_stash
424     ? types_error_stash
425     : gv_stashpv ("Types::Serialiser::Error", 1);
426 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
427     ? cbor_tagged_stash
428     : gv_stashpv ("CBOR::XS::Tagged" , 1);
429 root 1.1
430 root 1.11 HV *stash = SvSTASH (sv);
431    
432     if (stash == boolean_stash)
433 root 1.34 {
434 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
435 root 1.34 return;
436     }
437 root 1.11 else if (stash == error_stash)
438 root 1.34 {
439 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
440 root 1.34 return;
441     }
442 root 1.11 else if (stash == tagged_stash)
443 root 1.6 {
444     if (svt != SVt_PVAV)
445     croak ("encountered CBOR::XS::Tagged object that isn't an array");
446    
447 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
448 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
449 root 1.34
450     return;
451 root 1.6 }
452 root 1.34 }
453    
454     if (ecb_expect_false (SvREFCNT (sv) > 1)
455     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
456     {
457     if (!enc->shareable)
458     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
459    
460     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
461    
462     if (SvOK (*svp))
463     {
464     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
465 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
466 root 1.34 return;
467     }
468     else
469     {
470     sv_setuv (*svp, enc->shareable_idx);
471     ++enc->shareable_idx;
472     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
473     }
474     }
475    
476     if (ecb_expect_false (SvOBJECT (sv)))
477     {
478     HV *stash = SvSTASH (sv);
479     GV *method;
480    
481 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
482     croak ("encountered object '%s', but forbid_objects is enabled",
483     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
484     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
485 root 1.1 {
486 root 1.11 dSP;
487    
488 root 1.50 ENTER; SAVETMPS;
489     PUSHMARK (SP);
490 root 1.6 // we re-bless the reference to get overload and other niceties right
491 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
492    
493     PUTBACK;
494     // G_SCALAR ensures that return value is 1
495     call_sv ((SV *)GvCV (method), G_SCALAR);
496     SPAGAIN;
497    
498     // catch this surprisingly common error
499     if (SvROK (TOPs) && SvRV (TOPs) == sv)
500     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
501    
502     encode_sv (enc, POPs);
503    
504     PUTBACK;
505    
506     FREETMPS; LEAVE;
507     }
508     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
509     {
510     dSP;
511 root 1.6
512 root 1.50 ENTER; SAVETMPS;
513     SAVESTACK_POS ();
514     PUSHMARK (SP);
515 root 1.11 EXTEND (SP, 2);
516     // we re-bless the reference to get overload and other niceties right
517     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
518     PUSHs (sv_cbor);
519 root 1.1
520 root 1.11 PUTBACK;
521     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
522     SPAGAIN;
523 root 1.6
524 root 1.11 // catch this surprisingly common error
525     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
526     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
527 root 1.6
528 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
529 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
530 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
531 root 1.6
532 root 1.11 while (count)
533     encode_sv (enc, SP[1 - count--]);
534 root 1.6
535 root 1.11 PUTBACK;
536 root 1.6
537 root 1.11 FREETMPS; LEAVE;
538 root 1.1 }
539 root 1.11 else
540     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
541     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
542 root 1.1 }
543     else if (svt == SVt_PVHV)
544     encode_hv (enc, (HV *)sv);
545     else if (svt == SVt_PVAV)
546     encode_av (enc, (AV *)sv);
547 root 1.18 else
548 root 1.1 {
549 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
550     encode_sv (enc, sv);
551 root 1.1 }
552     }
553    
554     static void
555     encode_nv (enc_t *enc, SV *sv)
556     {
557     double nv = SvNVX (sv);
558    
559     need (enc, 9);
560    
561 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
562     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
563 root 1.1 //TODO: maybe I32?
564 root 1.5 else if (ecb_expect_false (nv == (float)nv))
565 root 1.1 {
566     uint32_t fp = ecb_float_to_binary32 (nv);
567    
568 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
569 root 1.1
570     if (!ecb_big_endian ())
571     fp = ecb_bswap32 (fp);
572    
573     memcpy (enc->cur, &fp, 4);
574     enc->cur += 4;
575     }
576     else
577     {
578     uint64_t fp = ecb_double_to_binary64 (nv);
579    
580 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
581 root 1.1
582     if (!ecb_big_endian ())
583     fp = ecb_bswap64 (fp);
584    
585     memcpy (enc->cur, &fp, 8);
586     enc->cur += 8;
587     }
588     }
589    
590     static void
591     encode_sv (enc_t *enc, SV *sv)
592     {
593     SvGETMAGIC (sv);
594    
595     if (SvPOKp (sv))
596     {
597     STRLEN len;
598     char *str = SvPV (sv, len);
599 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
600 root 1.1 }
601     else if (SvNOKp (sv))
602     encode_nv (enc, sv);
603     else if (SvIOKp (sv))
604     {
605     if (SvIsUV (sv))
606 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
607 root 1.1 else if (SvIVX (sv) >= 0)
608 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
609 root 1.1 else
610 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
611 root 1.1 }
612     else if (SvROK (sv))
613     encode_rv (enc, SvRV (sv));
614     else if (!SvOK (sv))
615 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
616 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
617 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
618 root 1.1 else
619     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
620     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
621     }
622    
623     static SV *
624     encode_cbor (SV *scalar, CBOR *cbor)
625     {
626 root 1.48 enc_t enc = { 0 };
627 root 1.1
628     enc.cbor = *cbor;
629     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
630     enc.cur = SvPVX (enc.sv);
631     enc.end = SvEND (enc.sv);
632    
633     SvPOK_only (enc.sv);
634 root 1.20
635 root 1.32 if (cbor->flags & F_PACK_STRINGS)
636 root 1.20 {
637     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
638     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
639     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
640     }
641    
642 root 1.1 encode_sv (&enc, scalar);
643    
644     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
645     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
646    
647     if (enc.cbor.flags & F_SHRINK)
648     shrink (enc.sv);
649    
650     return enc.sv;
651     }
652    
653     /////////////////////////////////////////////////////////////////////////////
654     // decoder
655    
656     // structure used for decoding CBOR
657     typedef struct
658     {
659     U8 *cur; // current parser pointer
660     U8 *end; // end of input string
661     const char *err; // parse error, if != 0
662     CBOR cbor;
663     U32 depth; // recursion depth
664     U32 maxdepth; // recursion depth limit
665 root 1.19 AV *shareable;
666 root 1.20 AV *stringref;
667 root 1.27 SV *decode_tagged;
668 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
669 root 1.1 } dec_t;
670    
671 root 1.59 // set dec->err to ERRSV
672     ecb_cold static void
673     err_errsv (dec_t *dec)
674     {
675     if (!dec->err)
676     {
677     dec->err_sv = newSVsv (ERRSV);
678    
679     // chop off the trailing \n
680     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
681     *SvEND (dec->err_sv) = 0;
682    
683     dec->err = SvPVutf8_nolen (dec->err_sv);
684     }
685     }
686 root 1.1
687 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
688     ecb_cold static void
689     err_set (dec_t *dec, const char *reason)
690     {
691     if (!dec->err)
692     dec->err = reason;
693     }
694    
695     ecb_cold static void
696     err_unexpected_end (dec_t *dec)
697     {
698     err_set (dec, "unexpected end of CBOR data");
699     }
700    
701     ecb_cold static void
702     err_nesting_exceeded (dec_t *dec)
703     {
704     err_set (dec, ERR_NESTING_EXCEEDED);
705     }
706    
707     #define ERR_DO(do) SB do; goto fail; SE
708     #define ERR(reason) ERR_DO (err_set (dec, reason))
709     #define ERR_ERRSV ERR_DO (err_errsv (dec))
710    
711 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
712 root 1.1
713 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
714 root 1.1 #define DEC_DEC_DEPTH --dec->depth
715    
716     static UV
717     decode_uint (dec_t *dec)
718     {
719 root 1.35 U8 m = *dec->cur & MINOR_MASK;
720     ++dec->cur;
721    
722     if (ecb_expect_true (m < LENGTH_EXT1))
723     return m;
724 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
725     {
726     WANT (1);
727     dec->cur += 1;
728     return dec->cur[-1];
729     }
730     else if (ecb_expect_true (m == LENGTH_EXT2))
731     {
732     WANT (2);
733     dec->cur += 2;
734     return (((UV)dec->cur[-2]) << 8)
735     | ((UV)dec->cur[-1]);
736     }
737     else if (ecb_expect_true (m == LENGTH_EXT4))
738     {
739     WANT (4);
740     dec->cur += 4;
741     return (((UV)dec->cur[-4]) << 24)
742     | (((UV)dec->cur[-3]) << 16)
743     | (((UV)dec->cur[-2]) << 8)
744     | ((UV)dec->cur[-1]);
745     }
746     else if (ecb_expect_true (m == LENGTH_EXT8))
747 root 1.1 {
748 root 1.36 WANT (8);
749     dec->cur += 8;
750 root 1.34
751 root 1.36 return
752 root 1.34 #if UVSIZE < 8
753 root 1.36 0
754 root 1.34 #else
755 root 1.36 (((UV)dec->cur[-8]) << 56)
756     | (((UV)dec->cur[-7]) << 48)
757     | (((UV)dec->cur[-6]) << 40)
758     | (((UV)dec->cur[-5]) << 32)
759 root 1.34 #endif
760 root 1.36 | (((UV)dec->cur[-4]) << 24)
761     | (((UV)dec->cur[-3]) << 16)
762     | (((UV)dec->cur[-2]) << 8)
763     | ((UV)dec->cur[-1]);
764 root 1.1 }
765 root 1.36 else
766     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
767 root 1.1
768     fail:
769     return 0;
770     }
771    
772     static SV *decode_sv (dec_t *dec);
773    
774     static SV *
775     decode_av (dec_t *dec)
776     {
777     AV *av = newAV ();
778    
779     DEC_INC_DEPTH;
780    
781 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
782 root 1.1 {
783     ++dec->cur;
784    
785     for (;;)
786     {
787     WANT (1);
788    
789 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
790 root 1.1 {
791     ++dec->cur;
792     break;
793     }
794    
795     av_push (av, decode_sv (dec));
796     }
797     }
798     else
799     {
800 root 1.57 UV i, len = decode_uint (dec);
801 root 1.1
802 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
803 root 1.1 av_fill (av, len - 1);
804    
805     for (i = 0; i < len; ++i)
806     AvARRAY (av)[i] = decode_sv (dec);
807     }
808    
809     DEC_DEC_DEPTH;
810     return newRV_noinc ((SV *)av);
811    
812     fail:
813     SvREFCNT_dec (av);
814     DEC_DEC_DEPTH;
815     return &PL_sv_undef;
816     }
817    
818 root 1.16 static void
819     decode_he (dec_t *dec, HV *hv)
820     {
821     // for speed reasons, we specialcase single-string
822 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
823    
824 root 1.23 if (ecb_expect_true (!dec->stringref))
825 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
826 root 1.21 {
827 root 1.56 STRLEN len = decode_uint (dec);
828 root 1.21 char *key = (char *)dec->cur;
829 root 1.16
830 root 1.49 WANT (len);
831 root 1.21 dec->cur += len;
832    
833 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
834 root 1.16
835 root 1.21 return;
836     }
837 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
838 root 1.21 {
839 root 1.56 STRLEN len = decode_uint (dec);
840 root 1.21 char *key = (char *)dec->cur;
841 root 1.16
842 root 1.49 WANT (len);
843 root 1.21 dec->cur += len;
844 root 1.20
845 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
846     if (!is_utf8_string (key, len))
847     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
848 root 1.16
849 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
850 root 1.16
851 root 1.21 return;
852     }
853 root 1.20
854 root 1.21 SV *k = decode_sv (dec);
855     SV *v = decode_sv (dec);
856 root 1.16
857 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
858     // methods, and this is hopefully the only place where it can happen,
859     // so if there is a chance of an exception, take the very slow path.
860     // since catching exceptions is "undocumented/internal/forbidden" by
861     // the new p5p powers, we need to call out to a perl function :/
862     if (ecb_expect_false (SvAMAGIC (k)))
863     {
864     dSP;
865    
866     ENTER; SAVETMPS;
867     PUSHMARK (SP);
868     EXTEND (SP, 3);
869     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
870     PUSHs (sv_2mortal (k));
871     PUSHs (sv_2mortal (v));
872    
873     PUTBACK;
874     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
875     SPAGAIN;
876    
877     FREETMPS; LEAVE;
878    
879     if (SvTRUE (ERRSV))
880     ERR_ERRSV;
881    
882     return;
883     }
884    
885 root 1.21 hv_store_ent (hv, k, v, 0);
886     SvREFCNT_dec (k);
887 root 1.38
888     fail:
889     ;
890 root 1.16 }
891    
892 root 1.1 static SV *
893     decode_hv (dec_t *dec)
894     {
895     HV *hv = newHV ();
896    
897     DEC_INC_DEPTH;
898    
899 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
900 root 1.1 {
901     ++dec->cur;
902    
903     for (;;)
904     {
905     WANT (1);
906    
907 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
908 root 1.1 {
909     ++dec->cur;
910     break;
911     }
912    
913 root 1.16 decode_he (dec, hv);
914 root 1.1 }
915     }
916     else
917     {
918 root 1.57 UV pairs = decode_uint (dec);
919    
920     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
921 root 1.1
922 root 1.16 while (pairs--)
923     decode_he (dec, hv);
924 root 1.1 }
925    
926     DEC_DEC_DEPTH;
927     return newRV_noinc ((SV *)hv);
928    
929     fail:
930     SvREFCNT_dec (hv);
931     DEC_DEC_DEPTH;
932     return &PL_sv_undef;
933     }
934    
935     static SV *
936     decode_str (dec_t *dec, int utf8)
937     {
938 root 1.6 SV *sv = 0;
939 root 1.1
940 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
941 root 1.1 {
942 root 1.33 // indefinite length strings
943 root 1.1 ++dec->cur;
944    
945 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
946 root 1.33
947 root 1.1 sv = newSVpvn ("", 0);
948    
949     for (;;)
950     {
951     WANT (1);
952    
953 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
954     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
955 root 1.33 {
956     ++dec->cur;
957     break;
958     }
959     else
960     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
961    
962     STRLEN len = decode_uint (dec);
963 root 1.1
964 root 1.33 WANT (len);
965     sv_catpvn (sv, dec->cur, len);
966     dec->cur += len;
967 root 1.1 }
968     }
969     else
970     {
971     STRLEN len = decode_uint (dec);
972    
973     WANT (len);
974     sv = newSVpvn (dec->cur, len);
975     dec->cur += len;
976 root 1.25
977     if (ecb_expect_false (dec->stringref)
978     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
979     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
980 root 1.1 }
981    
982     if (utf8)
983 root 1.38 {
984     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
985     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
986     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
987    
988     SvUTF8_on (sv);
989     }
990 root 1.1
991     return sv;
992    
993     fail:
994 root 1.6 SvREFCNT_dec (sv);
995 root 1.1 return &PL_sv_undef;
996     }
997    
998     static SV *
999 root 1.3 decode_tagged (dec_t *dec)
1000     {
1001 root 1.19 SV *sv = 0;
1002 root 1.3 UV tag = decode_uint (dec);
1003 root 1.19
1004     WANT (1);
1005 root 1.3
1006 root 1.18 switch (tag)
1007     {
1008     case CBOR_TAG_MAGIC:
1009 root 1.20 sv = decode_sv (dec);
1010     break;
1011 root 1.18
1012     case CBOR_TAG_INDIRECTION:
1013 root 1.20 sv = newRV_noinc (decode_sv (dec));
1014     break;
1015    
1016     case CBOR_TAG_STRINGREF_NAMESPACE:
1017     {
1018 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
1019     // erase mortalised caches, e.g. "shareable"
1020     ENTER;
1021 root 1.20
1022     SAVESPTR (dec->stringref);
1023     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1024    
1025     sv = decode_sv (dec);
1026    
1027 root 1.55 LEAVE;
1028 root 1.20 }
1029     break;
1030    
1031     case CBOR_TAG_STRINGREF:
1032     {
1033 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1034 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1035    
1036     UV idx = decode_uint (dec);
1037    
1038     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
1039     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1040    
1041     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1042     }
1043     break;
1044 root 1.11
1045 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1046     {
1047     if (ecb_expect_false (!dec->shareable))
1048     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1049    
1050 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1051     {
1052     sv = newSV (0);
1053     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1054 root 1.18
1055 root 1.37 SV *osv = decode_sv (dec);
1056     sv_setsv (sv, osv);
1057     SvREFCNT_dec_NN (osv);
1058     }
1059     else
1060     {
1061     av_push (dec->shareable, &PL_sv_undef);
1062     int idx = AvFILLp (dec->shareable);
1063     sv = decode_sv (dec);
1064     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1065     }
1066 root 1.19 }
1067 root 1.20 break;
1068 root 1.18
1069     case CBOR_TAG_VALUE_SHAREDREF:
1070 root 1.17 {
1071 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1072 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1073 root 1.18
1074 root 1.19 UV idx = decode_uint (dec);
1075    
1076 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1077 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1078    
1079 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1080 root 1.37
1081     if (sv == &PL_sv_undef)
1082     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1083 root 1.17 }
1084 root 1.20 break;
1085 root 1.17
1086 root 1.18 case CBOR_TAG_PERL_OBJECT:
1087     {
1088 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1089     goto filter;
1090    
1091 root 1.19 sv = decode_sv (dec);
1092    
1093 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1094     ERR ("corrupted CBOR data (non-array perl object)");
1095    
1096     AV *av = (AV *)SvRV (sv);
1097     int len = av_len (av) + 1;
1098     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1099    
1100     if (!stash)
1101     ERR ("cannot decode perl-object (package does not exist)");
1102    
1103     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1104    
1105     if (!method)
1106     ERR ("cannot decode perl-object (package does not have a THAW method)");
1107    
1108     dSP;
1109    
1110 root 1.50 ENTER; SAVETMPS;
1111     PUSHMARK (SP);
1112 root 1.18 EXTEND (SP, len + 1);
1113     // we re-bless the reference to get overload and other niceties right
1114     PUSHs (*av_fetch (av, 0, 1));
1115     PUSHs (sv_cbor);
1116    
1117     int i;
1118    
1119     for (i = 1; i < len; ++i)
1120     PUSHs (*av_fetch (av, i, 1));
1121    
1122     PUTBACK;
1123     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1124     SPAGAIN;
1125    
1126     if (SvTRUE (ERRSV))
1127     {
1128     FREETMPS; LEAVE;
1129 root 1.59 ERR_ERRSV;
1130 root 1.18 }
1131    
1132     SvREFCNT_dec (sv);
1133     sv = SvREFCNT_inc (POPs);
1134 root 1.11
1135 root 1.18 PUTBACK;
1136 root 1.11
1137 root 1.18 FREETMPS; LEAVE;
1138     }
1139 root 1.20 break;
1140 root 1.9
1141 root 1.18 default:
1142 root 1.60 filter:
1143 root 1.18 {
1144 root 1.58 SV *tag_sv = newSVuv (tag);
1145    
1146 root 1.19 sv = decode_sv (dec);
1147    
1148 root 1.27 dSP;
1149 root 1.50 ENTER; SAVETMPS;
1150     SAVESTACK_POS ();
1151     PUSHMARK (SP);
1152 root 1.27 EXTEND (SP, 2);
1153 root 1.58 PUSHs (tag_sv);
1154 root 1.27 PUSHs (sv);
1155    
1156     PUTBACK;
1157     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1158     SPAGAIN;
1159    
1160     if (SvTRUE (ERRSV))
1161     {
1162 root 1.58 SvREFCNT_dec (tag_sv);
1163 root 1.27 FREETMPS; LEAVE;
1164 root 1.59 ERR_ERRSV;
1165 root 1.27 }
1166    
1167     if (count)
1168     {
1169 root 1.58 SvREFCNT_dec (tag_sv);
1170 root 1.27 SvREFCNT_dec (sv);
1171     sv = SvREFCNT_inc (POPs);
1172     }
1173     else
1174     {
1175     AV *av = newAV ();
1176 root 1.58 av_push (av, tag_sv);
1177 root 1.27 av_push (av, sv);
1178    
1179     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1180     ? cbor_tagged_stash
1181     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1182     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1183     }
1184 root 1.7
1185 root 1.27 PUTBACK;
1186    
1187     FREETMPS; LEAVE;
1188 root 1.18 }
1189 root 1.20 break;
1190 root 1.11 }
1191 root 1.9
1192 root 1.20 return sv;
1193    
1194 root 1.9 fail:
1195     SvREFCNT_dec (sv);
1196     return &PL_sv_undef;
1197 root 1.3 }
1198    
1199     static SV *
1200 root 1.1 decode_sv (dec_t *dec)
1201     {
1202     WANT (1);
1203    
1204 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1205 root 1.1 {
1206 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1207     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1208     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1209     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1210     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1211     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1212     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1213    
1214     case MAJOR_MISC >> MAJOR_SHIFT:
1215     switch (*dec->cur++ & MINOR_MASK)
1216 root 1.1 {
1217 root 1.35 case SIMPLE_FALSE:
1218 root 1.1 #if CBOR_SLOW
1219 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1220 root 1.1 #endif
1221 root 1.10 return newSVsv (types_false);
1222 root 1.35 case SIMPLE_TRUE:
1223 root 1.1 #if CBOR_SLOW
1224 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1225 root 1.1 #endif
1226 root 1.10 return newSVsv (types_true);
1227 root 1.35 case SIMPLE_NULL:
1228 root 1.1 return newSVsv (&PL_sv_undef);
1229 root 1.35 case SIMPLE_UNDEF:
1230 root 1.10 #if CBOR_SLOW
1231     types_error = get_bool ("Types::Serialiser::error");
1232     #endif
1233     return newSVsv (types_error);
1234 root 1.1
1235 root 1.35 case MISC_FLOAT16:
1236 root 1.2 {
1237     WANT (2);
1238    
1239     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1240     dec->cur += 2;
1241    
1242     return newSVnv (ecb_binary16_to_float (fp));
1243     }
1244 root 1.1
1245 root 1.35 case MISC_FLOAT32:
1246 root 1.1 {
1247     uint32_t fp;
1248     WANT (4);
1249     memcpy (&fp, dec->cur, 4);
1250     dec->cur += 4;
1251    
1252     if (!ecb_big_endian ())
1253     fp = ecb_bswap32 (fp);
1254    
1255     return newSVnv (ecb_binary32_to_float (fp));
1256     }
1257    
1258 root 1.35 case MISC_FLOAT64:
1259 root 1.1 {
1260     uint64_t fp;
1261     WANT (8);
1262     memcpy (&fp, dec->cur, 8);
1263     dec->cur += 8;
1264    
1265     if (!ecb_big_endian ())
1266     fp = ecb_bswap64 (fp);
1267    
1268     return newSVnv (ecb_binary64_to_double (fp));
1269     }
1270    
1271 root 1.35 // 0..19 unassigned simple
1272 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1273     // 28-30 unassigned misc
1274     // 31 break code
1275 root 1.1 default:
1276 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1277 root 1.1 }
1278    
1279     break;
1280     }
1281    
1282     fail:
1283     return &PL_sv_undef;
1284     }
1285    
1286     static SV *
1287     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1288     {
1289 root 1.48 dec_t dec = { 0 };
1290 root 1.1 SV *sv;
1291 root 1.16 STRLEN len;
1292     char *data = SvPVbyte (string, len);
1293 root 1.1
1294 root 1.16 if (len > cbor->max_size && cbor->max_size)
1295     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1296     (unsigned long)len, (unsigned long)cbor->max_size);
1297 root 1.1
1298     dec.cbor = *cbor;
1299 root 1.16 dec.cur = (U8 *)data;
1300     dec.end = (U8 *)data + len;
1301 root 1.1
1302     sv = decode_sv (&dec);
1303    
1304     if (offset_return)
1305     *offset_return = dec.cur;
1306    
1307     if (!(offset_return || !sv))
1308 root 1.2 if (dec.cur != dec.end && !dec.err)
1309     dec.err = "garbage after CBOR object";
1310    
1311     if (dec.err)
1312 root 1.1 {
1313 root 1.39 if (dec.shareable)
1314     {
1315 root 1.58 // need to break cyclic links, which would all be in shareable
1316 root 1.39 int i;
1317     SV **svp;
1318    
1319     for (i = av_len (dec.shareable) + 1; i--; )
1320     if ((svp = av_fetch (dec.shareable, i, 0)))
1321     sv_setsv (*svp, &PL_sv_undef);
1322     }
1323    
1324 root 1.2 SvREFCNT_dec (sv);
1325 root 1.59
1326     if (dec.err_sv)
1327     sv_2mortal (dec.err_sv);
1328    
1329 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1330 root 1.1 }
1331    
1332     sv = sv_2mortal (sv);
1333    
1334     return sv;
1335     }
1336    
1337     /////////////////////////////////////////////////////////////////////////////
1338 root 1.40 // incremental parser
1339    
1340     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1341    
1342     // returns 0 for notyet, 1 for success or error
1343     static int
1344     incr_parse (CBOR *self, SV *cborstr)
1345     {
1346     STRLEN cur;
1347     SvPV (cborstr, cur);
1348    
1349     while (ecb_expect_true (self->incr_need <= cur))
1350     {
1351     // table of integer count bytes
1352     static I8 incr_len[MINOR_MASK + 1] = {
1353     0, 0, 0, 0, 0, 0, 0, 0,
1354     0, 0, 0, 0, 0, 0, 0, 0,
1355     0, 0, 0, 0, 0, 0, 0, 0,
1356     1, 2, 4, 8,-1,-1,-1,-2
1357     };
1358    
1359     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1360     U8 m = *p & MINOR_MASK;
1361     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1362     I8 ilen = incr_len[m];
1363    
1364     self->incr_need = self->incr_pos + 1;
1365    
1366     if (ecb_expect_false (ilen < 0))
1367     {
1368     if (m != MINOR_INDEF)
1369     return 1; // error
1370    
1371     if (*p == (MAJOR_MISC | MINOR_INDEF))
1372     {
1373     if (count >= 0)
1374     return 1; // error
1375    
1376     count = 1;
1377     }
1378     else
1379     {
1380     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1381     count = -1;
1382     }
1383     }
1384     else
1385     {
1386     self->incr_need += ilen;
1387     if (ecb_expect_false (self->incr_need > cur))
1388     return 0;
1389    
1390     int major = *p >> MAJOR_SHIFT;
1391    
1392     switch (major)
1393     {
1394 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1395     ++count; // tags merely prefix another value
1396     break;
1397    
1398 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1399     case MAJOR_TEXT >> MAJOR_SHIFT:
1400     case MAJOR_ARRAY >> MAJOR_SHIFT:
1401     case MAJOR_MAP >> MAJOR_SHIFT:
1402     {
1403     UV len;
1404    
1405     if (ecb_expect_false (ilen))
1406     {
1407     len = 0;
1408    
1409     do {
1410     len = (len << 8) | *++p;
1411     } while (--ilen);
1412     }
1413     else
1414     len = m;
1415    
1416     switch (major)
1417     {
1418     case MAJOR_BYTES >> MAJOR_SHIFT:
1419     case MAJOR_TEXT >> MAJOR_SHIFT:
1420     self->incr_need += len;
1421     if (ecb_expect_false (self->incr_need > cur))
1422     return 0;
1423    
1424     break;
1425    
1426     case MAJOR_MAP >> MAJOR_SHIFT:
1427     len <<= 1;
1428     case MAJOR_ARRAY >> MAJOR_SHIFT:
1429     if (len)
1430     {
1431     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1432     count = len + 1;
1433     }
1434     break;
1435     }
1436     }
1437     }
1438     }
1439    
1440     self->incr_pos = self->incr_need;
1441    
1442     if (count > 0)
1443     {
1444     while (!--count)
1445     {
1446     if (!AvFILLp (self->incr_count))
1447     return 1; // done
1448    
1449     SvREFCNT_dec_NN (av_pop (self->incr_count));
1450     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1451     }
1452    
1453     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1454     }
1455     }
1456    
1457     return 0;
1458     }
1459    
1460    
1461     /////////////////////////////////////////////////////////////////////////////
1462 root 1.1 // XS interface functions
1463    
1464     MODULE = CBOR::XS PACKAGE = CBOR::XS
1465    
1466     BOOT:
1467     {
1468     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1469 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1470 root 1.1
1471 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1472     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1473    
1474     types_true = get_bool ("Types::Serialiser::true" );
1475     types_false = get_bool ("Types::Serialiser::false");
1476     types_error = get_bool ("Types::Serialiser::error");
1477 root 1.11
1478 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1479    
1480 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1481     SvREADONLY_on (sv_cbor);
1482 root 1.56
1483     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1484 root 1.1 }
1485    
1486     PROTOTYPES: DISABLE
1487    
1488     void CLONE (...)
1489     CODE:
1490 root 1.10 cbor_stash = 0;
1491     cbor_tagged_stash = 0;
1492     types_error_stash = 0;
1493     types_boolean_stash = 0;
1494 root 1.1
1495     void new (char *klass)
1496     PPCODE:
1497     {
1498     SV *pv = NEWSV (0, sizeof (CBOR));
1499     SvPOK_only (pv);
1500     cbor_init ((CBOR *)SvPVX (pv));
1501     XPUSHs (sv_2mortal (sv_bless (
1502     newRV_noinc (pv),
1503     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1504     )));
1505     }
1506    
1507     void shrink (CBOR *self, int enable = 1)
1508     ALIAS:
1509     shrink = F_SHRINK
1510     allow_unknown = F_ALLOW_UNKNOWN
1511 root 1.18 allow_sharing = F_ALLOW_SHARING
1512 root 1.37 allow_cycles = F_ALLOW_CYCLES
1513 root 1.60 forbid_objects = F_FORBID_OBJECTS
1514 root 1.32 pack_strings = F_PACK_STRINGS
1515 root 1.54 text_keys = F_TEXT_KEYS
1516     text_strings = F_TEXT_STRINGS
1517 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1518 root 1.1 PPCODE:
1519     {
1520     if (enable)
1521     self->flags |= ix;
1522     else
1523     self->flags &= ~ix;
1524    
1525     XPUSHs (ST (0));
1526     }
1527    
1528     void get_shrink (CBOR *self)
1529     ALIAS:
1530     get_shrink = F_SHRINK
1531     get_allow_unknown = F_ALLOW_UNKNOWN
1532 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1533 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1534 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1535 root 1.32 get_pack_strings = F_PACK_STRINGS
1536 root 1.54 get_text_keys = F_TEXT_KEYS
1537     get_text_strings = F_TEXT_STRINGS
1538 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1539 root 1.1 PPCODE:
1540     XPUSHs (boolSV (self->flags & ix));
1541    
1542     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1543     PPCODE:
1544     self->max_depth = max_depth;
1545     XPUSHs (ST (0));
1546    
1547     U32 get_max_depth (CBOR *self)
1548     CODE:
1549     RETVAL = self->max_depth;
1550     OUTPUT:
1551     RETVAL
1552    
1553     void max_size (CBOR *self, U32 max_size = 0)
1554     PPCODE:
1555     self->max_size = max_size;
1556     XPUSHs (ST (0));
1557    
1558     int get_max_size (CBOR *self)
1559     CODE:
1560     RETVAL = self->max_size;
1561     OUTPUT:
1562     RETVAL
1563    
1564 root 1.27 void filter (CBOR *self, SV *filter = 0)
1565     PPCODE:
1566     SvREFCNT_dec (self->filter);
1567     self->filter = filter ? newSVsv (filter) : filter;
1568     XPUSHs (ST (0));
1569    
1570     SV *get_filter (CBOR *self)
1571     CODE:
1572     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1573     OUTPUT:
1574     RETVAL
1575    
1576 root 1.1 void encode (CBOR *self, SV *scalar)
1577     PPCODE:
1578     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1579     XPUSHs (scalar);
1580    
1581     void decode (CBOR *self, SV *cborstr)
1582     PPCODE:
1583     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1584     XPUSHs (cborstr);
1585    
1586     void decode_prefix (CBOR *self, SV *cborstr)
1587     PPCODE:
1588     {
1589     SV *sv;
1590     char *offset;
1591     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1592     EXTEND (SP, 2);
1593     PUSHs (sv);
1594     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1595     }
1596    
1597 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1598 root 1.42 ALIAS:
1599     incr_parse_multiple = 1
1600 root 1.40 PPCODE:
1601     {
1602     if (SvUTF8 (cborstr))
1603     sv_utf8_downgrade (cborstr, 0);
1604    
1605     if (!self->incr_count)
1606     {
1607     self->incr_count = newAV ();
1608     self->incr_pos = 0;
1609     self->incr_need = 1;
1610    
1611     av_push (self->incr_count, newSViv (1));
1612     }
1613    
1614 root 1.41 do
1615 root 1.40 {
1616     if (!incr_parse (self, cborstr))
1617     {
1618     if (self->incr_need > self->max_size && self->max_size)
1619     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1620     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1621    
1622     break;
1623     }
1624    
1625 root 1.41 SV *sv;
1626     char *offset;
1627 root 1.40
1628 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1629     XPUSHs (sv);
1630 root 1.40
1631 root 1.41 sv_chop (cborstr, offset);
1632 root 1.40
1633 root 1.41 av_clear (self->incr_count);
1634     av_push (self->incr_count, newSViv (1));
1635 root 1.40
1636 root 1.41 self->incr_pos = 0;
1637     self->incr_need = self->incr_pos + 1;
1638 root 1.40 }
1639 root 1.42 while (ix);
1640 root 1.40 }
1641    
1642     void incr_reset (CBOR *self)
1643     CODE:
1644     {
1645     SvREFCNT_dec (self->incr_count);
1646     self->incr_count = 0;
1647     }
1648    
1649 root 1.27 void DESTROY (CBOR *self)
1650     PPCODE:
1651     cbor_free (self);
1652    
1653 root 1.1 PROTOTYPES: ENABLE
1654    
1655     void encode_cbor (SV *scalar)
1656 root 1.36 ALIAS:
1657     encode_cbor = 0
1658     encode_cbor_sharing = F_ALLOW_SHARING
1659 root 1.1 PPCODE:
1660     {
1661     CBOR cbor;
1662     cbor_init (&cbor);
1663 root 1.36 cbor.flags |= ix;
1664 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1665     XPUSHs (scalar);
1666     }
1667    
1668     void decode_cbor (SV *cborstr)
1669     PPCODE:
1670     {
1671     CBOR cbor;
1672     cbor_init (&cbor);
1673     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1674     XPUSHs (cborstr);
1675     }
1676