ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.60
Committed: Sat Nov 26 00:47:02 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.59: +15 -5 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12 root 1.44 #define ECB_NO_THREADS 1
13 root 1.1 #include "ecb.h"
14    
15 root 1.14 // compatibility with perl <5.18
16     #ifndef HvNAMELEN_get
17     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
18     #endif
19     #ifndef HvNAMELEN
20     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
21     #endif
22     #ifndef HvNAMEUTF8
23     # define HvNAMEUTF8(hv) 0
24     #endif
25 root 1.28 #ifndef SvREFCNT_dec_NN
26     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
27     #endif
28 root 1.14
29 root 1.35 // known major and minor types
30     enum cbor_type
31     {
32     MAJOR_SHIFT = 5,
33     MINOR_MASK = 0x1f,
34    
35     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
36     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
37     MAJOR_BYTES = 2 << MAJOR_SHIFT,
38     MAJOR_TEXT = 3 << MAJOR_SHIFT,
39     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
40     MAJOR_MAP = 5 << MAJOR_SHIFT,
41     MAJOR_TAG = 6 << MAJOR_SHIFT,
42     MAJOR_MISC = 7 << MAJOR_SHIFT,
43    
44     // INT/STRING/ARRAY/MAP subtypes
45     LENGTH_EXT1 = 24,
46     LENGTH_EXT2 = 25,
47     LENGTH_EXT4 = 26,
48     LENGTH_EXT8 = 27,
49    
50     // SIMPLE types (effectively MISC subtypes)
51     SIMPLE_FALSE = 20,
52     SIMPLE_TRUE = 21,
53     SIMPLE_NULL = 22,
54     SIMPLE_UNDEF = 23,
55    
56     // MISC subtype (unused)
57     MISC_EXT1 = 24,
58     MISC_FLOAT16 = 25,
59     MISC_FLOAT32 = 26,
60     MISC_FLOAT64 = 27,
61    
62     // BYTES/TEXT/ARRAY/MAP
63     MINOR_INDEF = 31,
64     };
65    
66 root 1.9 // known tags
67 root 1.8 enum cbor_tag
68     {
69 root 1.35 // extensions
70     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
71     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
72     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
73     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
74     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
76     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
77    
78     // rfc7049
79     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
80     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
81     CBOR_TAG_POS_BIGNUM = 2, // byte string
82     CBOR_TAG_NEG_BIGNUM = 3, // byte string
83     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
84     CBOR_TAG_BIGFLOAT = 5, // array
85    
86     CBOR_TAG_CONV_B64U = 21, // base64url, any
87     CBOR_TAG_CONV_B64 = 22, // base64, any
88     CBOR_TAG_CONV_HEX = 23, // base16, any
89     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
90    
91     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
92     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
93     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
94     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
95     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
96 root 1.8
97 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
98 root 1.8 };
99    
100 root 1.24 #define F_SHRINK 0x00000001UL
101     #define F_ALLOW_UNKNOWN 0x00000002UL
102 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
103 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
104 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
105     #define F_PACK_STRINGS 0x00000020UL
106     #define F_TEXT_KEYS 0x00000040UL
107     #define F_TEXT_STRINGS 0x00000080UL
108     #define F_VALIDATE_UTF8 0x00000100UL
109 root 1.1
110     #define INIT_SIZE 32 // initial scalar size to be allocated
111    
112     #define SB do {
113     #define SE } while (0)
114    
115     #define IN_RANGE_INC(type,val,beg,end) \
116     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
117     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
118    
119     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
120    
121     #ifdef USE_ITHREADS
122     # define CBOR_SLOW 1
123     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
124     #else
125     # define CBOR_SLOW 0
126     # define CBOR_STASH cbor_stash
127     #endif
128    
129 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
130 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
131 root 1.1
132     typedef struct {
133     U32 flags;
134     U32 max_depth;
135     STRLEN max_size;
136 root 1.27 SV *filter;
137 root 1.40
138     // for the incremental parser
139     STRLEN incr_pos; // the current offset into the text
140     STRLEN incr_need; // minimum bytes needed to decode
141     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
142 root 1.1 } CBOR;
143    
144 root 1.5 ecb_inline void
145 root 1.1 cbor_init (CBOR *cbor)
146     {
147     Zero (cbor, 1, CBOR);
148     cbor->max_depth = 512;
149     }
150    
151 root 1.27 ecb_inline void
152     cbor_free (CBOR *cbor)
153     {
154     SvREFCNT_dec (cbor->filter);
155 root 1.40 SvREFCNT_dec (cbor->incr_count);
156 root 1.27 }
157    
158 root 1.1 /////////////////////////////////////////////////////////////////////////////
159     // utility functions
160    
161 root 1.5 ecb_inline SV *
162 root 1.1 get_bool (const char *name)
163     {
164     SV *sv = get_sv (name, 1);
165    
166     SvREADONLY_on (sv);
167     SvREADONLY_on (SvRV (sv));
168    
169     return sv;
170     }
171    
172 root 1.5 ecb_inline void
173 root 1.1 shrink (SV *sv)
174     {
175     sv_utf8_downgrade (sv, 1);
176    
177     if (SvLEN (sv) > SvCUR (sv) + 1)
178     {
179     #ifdef SvPV_shrink_to_cur
180     SvPV_shrink_to_cur (sv);
181     #elif defined (SvPV_renew)
182     SvPV_renew (sv, SvCUR (sv) + 1);
183     #endif
184     }
185     }
186    
187 root 1.21 // minimum length of a string to be registered for stringref
188     ecb_inline int
189     minimum_string_length (UV idx)
190     {
191     return idx > 23
192     ? idx > 0xffU
193     ? idx > 0xffffU
194     ? idx > 0xffffffffU
195 root 1.29 ? 11
196     : 7
197 root 1.21 : 5
198     : 4
199     : 3;
200     }
201    
202 root 1.1 /////////////////////////////////////////////////////////////////////////////
203     // encoder
204    
205     // structure used for encoding CBOR
206     typedef struct
207     {
208     char *cur; // SvPVX (sv) + current output position
209     char *end; // SvEND (sv)
210     SV *sv; // result scalar
211     CBOR cbor;
212     U32 depth; // recursion level
213 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
214     UV stringref_idx;
215 root 1.19 HV *shareable; // ptr => index, or 0
216     UV shareable_idx;
217 root 1.1 } enc_t;
218    
219 root 1.5 ecb_inline void
220 root 1.1 need (enc_t *enc, STRLEN len)
221     {
222 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
223 root 1.1 {
224     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
225     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
226     enc->cur = SvPVX (enc->sv) + cur;
227     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
228     }
229     }
230    
231 root 1.5 ecb_inline void
232 root 1.1 encode_ch (enc_t *enc, char ch)
233     {
234     need (enc, 1);
235     *enc->cur++ = ch;
236     }
237    
238     static void
239     encode_uint (enc_t *enc, int major, UV len)
240     {
241     need (enc, 9);
242    
243 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
244 root 1.1 *enc->cur++ = major | len;
245 root 1.36 else if (ecb_expect_true (len <= 0xffU))
246 root 1.1 {
247 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
248 root 1.1 *enc->cur++ = len;
249     }
250 root 1.36 else if (len <= 0xffffU)
251 root 1.1 {
252 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
253 root 1.1 *enc->cur++ = len >> 8;
254     *enc->cur++ = len;
255     }
256 root 1.36 else if (len <= 0xffffffffU)
257 root 1.1 {
258 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
259 root 1.1 *enc->cur++ = len >> 24;
260     *enc->cur++ = len >> 16;
261     *enc->cur++ = len >> 8;
262     *enc->cur++ = len;
263     }
264 root 1.4 else
265 root 1.1 {
266 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
267 root 1.1 *enc->cur++ = len >> 56;
268     *enc->cur++ = len >> 48;
269     *enc->cur++ = len >> 40;
270     *enc->cur++ = len >> 32;
271     *enc->cur++ = len >> 24;
272     *enc->cur++ = len >> 16;
273     *enc->cur++ = len >> 8;
274     *enc->cur++ = len;
275     }
276     }
277    
278 root 1.21 ecb_inline void
279     encode_tag (enc_t *enc, UV tag)
280     {
281 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
282 root 1.21 }
283    
284 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
285     ecb_noinline static void
286     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
287     {
288     STRLEN ulen = len;
289     U8 *p, *pend = (U8 *)str + len;
290    
291     for (p = (U8 *)str; p < pend; ++p)
292     ulen += *p >> 7; // count set high bits
293    
294     encode_uint (enc, MAJOR_TEXT, ulen);
295    
296     need (enc, ulen);
297     for (p = (U8 *)str; p < pend; ++p)
298     if (*p < 0x80)
299     *enc->cur++ = *p;
300     else
301     {
302     *enc->cur++ = 0xc0 + (*p >> 6);
303     *enc->cur++ = 0x80 + (*p & 63);
304     }
305     }
306    
307 root 1.30 ecb_inline void
308 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
309 root 1.30 {
310 root 1.53 if (ecb_expect_false (upgrade_utf8))
311 root 1.51 if (!utf8)
312     {
313 root 1.53 encode_str_utf8 (enc, utf8, str, len);
314 root 1.51 return;
315     }
316    
317 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
318 root 1.30 need (enc, len);
319     memcpy (enc->cur, str, len);
320     enc->cur += len;
321     }
322    
323 root 1.54 ecb_inline void
324 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
325 root 1.1 {
326 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
327 root 1.21 {
328 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
329 root 1.21
330     if (SvOK (*svp))
331     {
332     // already registered, use stringref
333     encode_tag (enc, CBOR_TAG_STRINGREF);
334 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
335 root 1.21 return;
336     }
337     else if (len >= minimum_string_length (enc->stringref_idx))
338     {
339     // register only
340     sv_setuv (*svp, enc->stringref_idx);
341     ++enc->stringref_idx;
342     }
343     }
344    
345 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
346 root 1.1 }
347    
348     static void encode_sv (enc_t *enc, SV *sv);
349    
350     static void
351     encode_av (enc_t *enc, AV *av)
352     {
353     int i, len = av_len (av);
354    
355     if (enc->depth >= enc->cbor.max_depth)
356     croak (ERR_NESTING_EXCEEDED);
357    
358     ++enc->depth;
359    
360 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
361 root 1.1
362 root 1.45 if (SvMAGICAL (av))
363     for (i = 0; i <= len; ++i)
364     {
365     SV **svp = av_fetch (av, i, 0);
366     encode_sv (enc, svp ? *svp : &PL_sv_undef);
367     }
368     else
369     for (i = 0; i <= len; ++i)
370 root 1.46 {
371     SV *sv = AvARRAY (av)[i];
372     encode_sv (enc, sv ? sv : &PL_sv_undef);
373     }
374 root 1.1
375     --enc->depth;
376     }
377    
378     static void
379     encode_hv (enc_t *enc, HV *hv)
380     {
381     HE *he;
382    
383     if (enc->depth >= enc->cbor.max_depth)
384     croak (ERR_NESTING_EXCEEDED);
385    
386     ++enc->depth;
387    
388     int pairs = hv_iterinit (hv);
389     int mg = SvMAGICAL (hv);
390    
391     if (mg)
392 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
393 root 1.1 else
394 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
395 root 1.1
396     while ((he = hv_iternext (hv)))
397     {
398 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
399     encode_sv (enc, HeSVKEY (he));
400 root 1.1 else
401 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
402 root 1.1
403 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
404 root 1.1 }
405    
406     if (mg)
407 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
408 root 1.1
409     --enc->depth;
410     }
411    
412     // encode objects, arrays and special \0=false and \1=true values.
413     static void
414     encode_rv (enc_t *enc, SV *sv)
415     {
416 root 1.19 SvGETMAGIC (sv);
417 root 1.1
418 root 1.19 svtype svt = SvTYPE (sv);
419 root 1.18
420 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
421 root 1.1 {
422 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
423     ? types_boolean_stash
424     : gv_stashpv ("Types::Serialiser::Boolean", 1);
425     HV *error_stash = !CBOR_SLOW || types_error_stash
426     ? types_error_stash
427     : gv_stashpv ("Types::Serialiser::Error", 1);
428 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
429     ? cbor_tagged_stash
430     : gv_stashpv ("CBOR::XS::Tagged" , 1);
431 root 1.1
432 root 1.11 HV *stash = SvSTASH (sv);
433    
434     if (stash == boolean_stash)
435 root 1.34 {
436 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
437 root 1.34 return;
438     }
439 root 1.11 else if (stash == error_stash)
440 root 1.34 {
441 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
442 root 1.34 return;
443     }
444 root 1.11 else if (stash == tagged_stash)
445 root 1.6 {
446     if (svt != SVt_PVAV)
447     croak ("encountered CBOR::XS::Tagged object that isn't an array");
448    
449 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
450 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
451 root 1.34
452     return;
453 root 1.6 }
454 root 1.34 }
455    
456     if (ecb_expect_false (SvREFCNT (sv) > 1)
457     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
458     {
459     if (!enc->shareable)
460     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
461    
462     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
463    
464     if (SvOK (*svp))
465     {
466     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
467 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
468 root 1.34 return;
469     }
470     else
471     {
472     sv_setuv (*svp, enc->shareable_idx);
473     ++enc->shareable_idx;
474     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
475     }
476     }
477    
478     if (ecb_expect_false (SvOBJECT (sv)))
479     {
480     HV *stash = SvSTASH (sv);
481     GV *method;
482    
483 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
484     croak ("encountered object '%s', but forbid_objects is enabled",
485     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
486     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
487 root 1.1 {
488 root 1.11 dSP;
489    
490 root 1.50 ENTER; SAVETMPS;
491     PUSHMARK (SP);
492 root 1.6 // we re-bless the reference to get overload and other niceties right
493 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
494    
495     PUTBACK;
496     // G_SCALAR ensures that return value is 1
497     call_sv ((SV *)GvCV (method), G_SCALAR);
498     SPAGAIN;
499    
500     // catch this surprisingly common error
501     if (SvROK (TOPs) && SvRV (TOPs) == sv)
502     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
503    
504     encode_sv (enc, POPs);
505    
506     PUTBACK;
507    
508     FREETMPS; LEAVE;
509     }
510     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
511     {
512     dSP;
513 root 1.6
514 root 1.50 ENTER; SAVETMPS;
515     SAVESTACK_POS ();
516     PUSHMARK (SP);
517 root 1.11 EXTEND (SP, 2);
518     // we re-bless the reference to get overload and other niceties right
519     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
520     PUSHs (sv_cbor);
521 root 1.1
522 root 1.11 PUTBACK;
523     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
524     SPAGAIN;
525 root 1.6
526 root 1.11 // catch this surprisingly common error
527     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
528     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
529 root 1.6
530 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
531 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
532 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
533 root 1.6
534 root 1.11 while (count)
535     encode_sv (enc, SP[1 - count--]);
536 root 1.6
537 root 1.11 PUTBACK;
538 root 1.6
539 root 1.11 FREETMPS; LEAVE;
540 root 1.1 }
541 root 1.11 else
542     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
543     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
544 root 1.1 }
545     else if (svt == SVt_PVHV)
546     encode_hv (enc, (HV *)sv);
547     else if (svt == SVt_PVAV)
548     encode_av (enc, (AV *)sv);
549 root 1.18 else
550 root 1.1 {
551 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
552     encode_sv (enc, sv);
553 root 1.1 }
554     }
555    
556     static void
557     encode_nv (enc_t *enc, SV *sv)
558     {
559     double nv = SvNVX (sv);
560    
561     need (enc, 9);
562    
563 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
564     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
565 root 1.1 //TODO: maybe I32?
566 root 1.5 else if (ecb_expect_false (nv == (float)nv))
567 root 1.1 {
568     uint32_t fp = ecb_float_to_binary32 (nv);
569    
570 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
571 root 1.1
572     if (!ecb_big_endian ())
573     fp = ecb_bswap32 (fp);
574    
575     memcpy (enc->cur, &fp, 4);
576     enc->cur += 4;
577     }
578     else
579     {
580     uint64_t fp = ecb_double_to_binary64 (nv);
581    
582 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
583 root 1.1
584     if (!ecb_big_endian ())
585     fp = ecb_bswap64 (fp);
586    
587     memcpy (enc->cur, &fp, 8);
588     enc->cur += 8;
589     }
590     }
591    
592     static void
593     encode_sv (enc_t *enc, SV *sv)
594     {
595     SvGETMAGIC (sv);
596    
597     if (SvPOKp (sv))
598     {
599     STRLEN len;
600     char *str = SvPV (sv, len);
601 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
602 root 1.1 }
603     else if (SvNOKp (sv))
604     encode_nv (enc, sv);
605     else if (SvIOKp (sv))
606     {
607     if (SvIsUV (sv))
608 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
609 root 1.1 else if (SvIVX (sv) >= 0)
610 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
611 root 1.1 else
612 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
613 root 1.1 }
614     else if (SvROK (sv))
615     encode_rv (enc, SvRV (sv));
616     else if (!SvOK (sv))
617 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
618 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
619 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
620 root 1.1 else
621     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
622     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
623     }
624    
625     static SV *
626     encode_cbor (SV *scalar, CBOR *cbor)
627     {
628 root 1.48 enc_t enc = { 0 };
629 root 1.1
630     enc.cbor = *cbor;
631     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
632     enc.cur = SvPVX (enc.sv);
633     enc.end = SvEND (enc.sv);
634    
635     SvPOK_only (enc.sv);
636 root 1.20
637 root 1.32 if (cbor->flags & F_PACK_STRINGS)
638 root 1.20 {
639     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
640     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
641     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
642     }
643    
644 root 1.1 encode_sv (&enc, scalar);
645    
646     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
647     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
648    
649     if (enc.cbor.flags & F_SHRINK)
650     shrink (enc.sv);
651    
652     return enc.sv;
653     }
654    
655     /////////////////////////////////////////////////////////////////////////////
656     // decoder
657    
658     // structure used for decoding CBOR
659     typedef struct
660     {
661     U8 *cur; // current parser pointer
662     U8 *end; // end of input string
663     const char *err; // parse error, if != 0
664     CBOR cbor;
665     U32 depth; // recursion depth
666     U32 maxdepth; // recursion depth limit
667 root 1.19 AV *shareable;
668 root 1.20 AV *stringref;
669 root 1.27 SV *decode_tagged;
670 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
671 root 1.1 } dec_t;
672    
673 root 1.59 // set dec->err to ERRSV
674     ecb_cold static void
675     err_errsv (dec_t *dec)
676     {
677     if (!dec->err)
678     {
679     dec->err_sv = newSVsv (ERRSV);
680    
681     // chop off the trailing \n
682     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
683     *SvEND (dec->err_sv) = 0;
684    
685     dec->err = SvPVutf8_nolen (dec->err_sv);
686     }
687     }
688 root 1.1
689 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
690     ecb_cold static void
691     err_set (dec_t *dec, const char *reason)
692     {
693     if (!dec->err)
694     dec->err = reason;
695     }
696    
697     ecb_cold static void
698     err_unexpected_end (dec_t *dec)
699     {
700     err_set (dec, "unexpected end of CBOR data");
701     }
702    
703     ecb_cold static void
704     err_nesting_exceeded (dec_t *dec)
705     {
706     err_set (dec, ERR_NESTING_EXCEEDED);
707     }
708    
709     #define ERR_DO(do) SB do; goto fail; SE
710     #define ERR(reason) ERR_DO (err_set (dec, reason))
711     #define ERR_ERRSV ERR_DO (err_errsv (dec))
712    
713     #define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR_DO (err_unexpected_end (dec))
714 root 1.1
715 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
716 root 1.1 #define DEC_DEC_DEPTH --dec->depth
717    
718     static UV
719     decode_uint (dec_t *dec)
720     {
721 root 1.35 U8 m = *dec->cur & MINOR_MASK;
722     ++dec->cur;
723    
724     if (ecb_expect_true (m < LENGTH_EXT1))
725     return m;
726 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
727     {
728     WANT (1);
729     dec->cur += 1;
730     return dec->cur[-1];
731     }
732     else if (ecb_expect_true (m == LENGTH_EXT2))
733     {
734     WANT (2);
735     dec->cur += 2;
736     return (((UV)dec->cur[-2]) << 8)
737     | ((UV)dec->cur[-1]);
738     }
739     else if (ecb_expect_true (m == LENGTH_EXT4))
740     {
741     WANT (4);
742     dec->cur += 4;
743     return (((UV)dec->cur[-4]) << 24)
744     | (((UV)dec->cur[-3]) << 16)
745     | (((UV)dec->cur[-2]) << 8)
746     | ((UV)dec->cur[-1]);
747     }
748     else if (ecb_expect_true (m == LENGTH_EXT8))
749 root 1.1 {
750 root 1.36 WANT (8);
751     dec->cur += 8;
752 root 1.34
753 root 1.36 return
754 root 1.34 #if UVSIZE < 8
755 root 1.36 0
756 root 1.34 #else
757 root 1.36 (((UV)dec->cur[-8]) << 56)
758     | (((UV)dec->cur[-7]) << 48)
759     | (((UV)dec->cur[-6]) << 40)
760     | (((UV)dec->cur[-5]) << 32)
761 root 1.34 #endif
762 root 1.36 | (((UV)dec->cur[-4]) << 24)
763     | (((UV)dec->cur[-3]) << 16)
764     | (((UV)dec->cur[-2]) << 8)
765     | ((UV)dec->cur[-1]);
766 root 1.1 }
767 root 1.36 else
768     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
769 root 1.1
770     fail:
771     return 0;
772     }
773    
774     static SV *decode_sv (dec_t *dec);
775    
776     static SV *
777     decode_av (dec_t *dec)
778     {
779     AV *av = newAV ();
780    
781     DEC_INC_DEPTH;
782    
783 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
784 root 1.1 {
785     ++dec->cur;
786    
787     for (;;)
788     {
789     WANT (1);
790    
791 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
792 root 1.1 {
793     ++dec->cur;
794     break;
795     }
796    
797     av_push (av, decode_sv (dec));
798     }
799     }
800     else
801     {
802 root 1.57 UV i, len = decode_uint (dec);
803 root 1.1
804 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
805 root 1.1 av_fill (av, len - 1);
806    
807     for (i = 0; i < len; ++i)
808     AvARRAY (av)[i] = decode_sv (dec);
809     }
810    
811     DEC_DEC_DEPTH;
812     return newRV_noinc ((SV *)av);
813    
814     fail:
815     SvREFCNT_dec (av);
816     DEC_DEC_DEPTH;
817     return &PL_sv_undef;
818     }
819    
820 root 1.16 static void
821     decode_he (dec_t *dec, HV *hv)
822     {
823     // for speed reasons, we specialcase single-string
824 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
825    
826 root 1.23 if (ecb_expect_true (!dec->stringref))
827 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
828 root 1.21 {
829 root 1.56 STRLEN len = decode_uint (dec);
830 root 1.21 char *key = (char *)dec->cur;
831 root 1.16
832 root 1.49 WANT (len);
833 root 1.21 dec->cur += len;
834    
835 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
836 root 1.16
837 root 1.21 return;
838     }
839 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
840 root 1.21 {
841 root 1.56 STRLEN len = decode_uint (dec);
842 root 1.21 char *key = (char *)dec->cur;
843 root 1.16
844 root 1.49 WANT (len);
845 root 1.21 dec->cur += len;
846 root 1.20
847 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
848     if (!is_utf8_string (key, len))
849     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
850 root 1.16
851 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
852 root 1.16
853 root 1.21 return;
854     }
855 root 1.20
856 root 1.21 SV *k = decode_sv (dec);
857     SV *v = decode_sv (dec);
858 root 1.16
859 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
860     // methods, and this is hopefully the only place where it can happen,
861     // so if there is a chance of an exception, take the very slow path.
862     // since catching exceptions is "undocumented/internal/forbidden" by
863     // the new p5p powers, we need to call out to a perl function :/
864     if (ecb_expect_false (SvAMAGIC (k)))
865     {
866     dSP;
867    
868     ENTER; SAVETMPS;
869     PUSHMARK (SP);
870     EXTEND (SP, 3);
871     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
872     PUSHs (sv_2mortal (k));
873     PUSHs (sv_2mortal (v));
874    
875     PUTBACK;
876     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
877     SPAGAIN;
878    
879     FREETMPS; LEAVE;
880    
881     if (SvTRUE (ERRSV))
882     ERR_ERRSV;
883    
884     return;
885     }
886    
887 root 1.21 hv_store_ent (hv, k, v, 0);
888     SvREFCNT_dec (k);
889 root 1.38
890     fail:
891     ;
892 root 1.16 }
893    
894 root 1.1 static SV *
895     decode_hv (dec_t *dec)
896     {
897     HV *hv = newHV ();
898    
899     DEC_INC_DEPTH;
900    
901 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
902 root 1.1 {
903     ++dec->cur;
904    
905     for (;;)
906     {
907     WANT (1);
908    
909 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
910 root 1.1 {
911     ++dec->cur;
912     break;
913     }
914    
915 root 1.16 decode_he (dec, hv);
916 root 1.1 }
917     }
918     else
919     {
920 root 1.57 UV pairs = decode_uint (dec);
921    
922     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
923 root 1.1
924 root 1.16 while (pairs--)
925     decode_he (dec, hv);
926 root 1.1 }
927    
928     DEC_DEC_DEPTH;
929     return newRV_noinc ((SV *)hv);
930    
931     fail:
932     SvREFCNT_dec (hv);
933     DEC_DEC_DEPTH;
934     return &PL_sv_undef;
935     }
936    
937     static SV *
938     decode_str (dec_t *dec, int utf8)
939     {
940 root 1.6 SV *sv = 0;
941 root 1.1
942 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
943 root 1.1 {
944 root 1.33 // indefinite length strings
945 root 1.1 ++dec->cur;
946    
947 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
948 root 1.33
949 root 1.1 sv = newSVpvn ("", 0);
950    
951     for (;;)
952     {
953     WANT (1);
954    
955 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
956     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
957 root 1.33 {
958     ++dec->cur;
959     break;
960     }
961     else
962     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
963    
964     STRLEN len = decode_uint (dec);
965 root 1.1
966 root 1.33 WANT (len);
967     sv_catpvn (sv, dec->cur, len);
968     dec->cur += len;
969 root 1.1 }
970     }
971     else
972     {
973     STRLEN len = decode_uint (dec);
974    
975     WANT (len);
976     sv = newSVpvn (dec->cur, len);
977     dec->cur += len;
978 root 1.25
979     if (ecb_expect_false (dec->stringref)
980     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
981     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
982 root 1.1 }
983    
984     if (utf8)
985 root 1.38 {
986     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
987     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
988     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
989    
990     SvUTF8_on (sv);
991     }
992 root 1.1
993     return sv;
994    
995     fail:
996 root 1.6 SvREFCNT_dec (sv);
997 root 1.1 return &PL_sv_undef;
998     }
999    
1000     static SV *
1001 root 1.3 decode_tagged (dec_t *dec)
1002     {
1003 root 1.19 SV *sv = 0;
1004 root 1.3 UV tag = decode_uint (dec);
1005 root 1.19
1006     WANT (1);
1007 root 1.3
1008 root 1.18 switch (tag)
1009     {
1010     case CBOR_TAG_MAGIC:
1011 root 1.20 sv = decode_sv (dec);
1012     break;
1013 root 1.18
1014     case CBOR_TAG_INDIRECTION:
1015 root 1.20 sv = newRV_noinc (decode_sv (dec));
1016     break;
1017    
1018     case CBOR_TAG_STRINGREF_NAMESPACE:
1019     {
1020 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
1021     // erase mortalised caches, e.g. "shareable"
1022     ENTER;
1023 root 1.20
1024     SAVESPTR (dec->stringref);
1025     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1026    
1027     sv = decode_sv (dec);
1028    
1029 root 1.55 LEAVE;
1030 root 1.20 }
1031     break;
1032    
1033     case CBOR_TAG_STRINGREF:
1034     {
1035 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1036 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1037    
1038     UV idx = decode_uint (dec);
1039    
1040     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
1041     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1042    
1043     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1044     }
1045     break;
1046 root 1.11
1047 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1048     {
1049     if (ecb_expect_false (!dec->shareable))
1050     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1051    
1052 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1053     {
1054     sv = newSV (0);
1055     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1056 root 1.18
1057 root 1.37 SV *osv = decode_sv (dec);
1058     sv_setsv (sv, osv);
1059     SvREFCNT_dec_NN (osv);
1060     }
1061     else
1062     {
1063     av_push (dec->shareable, &PL_sv_undef);
1064     int idx = AvFILLp (dec->shareable);
1065     sv = decode_sv (dec);
1066     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1067     }
1068 root 1.19 }
1069 root 1.20 break;
1070 root 1.18
1071     case CBOR_TAG_VALUE_SHAREDREF:
1072 root 1.17 {
1073 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1074 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1075 root 1.18
1076 root 1.19 UV idx = decode_uint (dec);
1077    
1078 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1079 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1080    
1081 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1082 root 1.37
1083     if (sv == &PL_sv_undef)
1084     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1085 root 1.17 }
1086 root 1.20 break;
1087 root 1.17
1088 root 1.18 case CBOR_TAG_PERL_OBJECT:
1089     {
1090 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1091     goto filter;
1092    
1093 root 1.19 sv = decode_sv (dec);
1094    
1095 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1096     ERR ("corrupted CBOR data (non-array perl object)");
1097    
1098     AV *av = (AV *)SvRV (sv);
1099     int len = av_len (av) + 1;
1100     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1101    
1102     if (!stash)
1103     ERR ("cannot decode perl-object (package does not exist)");
1104    
1105     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1106    
1107     if (!method)
1108     ERR ("cannot decode perl-object (package does not have a THAW method)");
1109    
1110     dSP;
1111    
1112 root 1.50 ENTER; SAVETMPS;
1113     PUSHMARK (SP);
1114 root 1.18 EXTEND (SP, len + 1);
1115     // we re-bless the reference to get overload and other niceties right
1116     PUSHs (*av_fetch (av, 0, 1));
1117     PUSHs (sv_cbor);
1118    
1119     int i;
1120    
1121     for (i = 1; i < len; ++i)
1122     PUSHs (*av_fetch (av, i, 1));
1123    
1124     PUTBACK;
1125     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1126     SPAGAIN;
1127    
1128     if (SvTRUE (ERRSV))
1129     {
1130     FREETMPS; LEAVE;
1131 root 1.59 ERR_ERRSV;
1132 root 1.18 }
1133    
1134     SvREFCNT_dec (sv);
1135     sv = SvREFCNT_inc (POPs);
1136 root 1.11
1137 root 1.18 PUTBACK;
1138 root 1.11
1139 root 1.18 FREETMPS; LEAVE;
1140     }
1141 root 1.20 break;
1142 root 1.9
1143 root 1.18 default:
1144 root 1.60 filter:
1145 root 1.18 {
1146 root 1.58 SV *tag_sv = newSVuv (tag);
1147    
1148 root 1.19 sv = decode_sv (dec);
1149    
1150 root 1.27 dSP;
1151 root 1.50 ENTER; SAVETMPS;
1152     SAVESTACK_POS ();
1153     PUSHMARK (SP);
1154 root 1.27 EXTEND (SP, 2);
1155 root 1.58 PUSHs (tag_sv);
1156 root 1.27 PUSHs (sv);
1157    
1158     PUTBACK;
1159     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1160     SPAGAIN;
1161    
1162     if (SvTRUE (ERRSV))
1163     {
1164 root 1.58 SvREFCNT_dec (tag_sv);
1165 root 1.27 FREETMPS; LEAVE;
1166 root 1.59 ERR_ERRSV;
1167 root 1.27 }
1168    
1169     if (count)
1170     {
1171 root 1.58 SvREFCNT_dec (tag_sv);
1172 root 1.27 SvREFCNT_dec (sv);
1173     sv = SvREFCNT_inc (POPs);
1174     }
1175     else
1176     {
1177     AV *av = newAV ();
1178 root 1.58 av_push (av, tag_sv);
1179 root 1.27 av_push (av, sv);
1180    
1181     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1182     ? cbor_tagged_stash
1183     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1184     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1185     }
1186 root 1.7
1187 root 1.27 PUTBACK;
1188    
1189     FREETMPS; LEAVE;
1190 root 1.18 }
1191 root 1.20 break;
1192 root 1.11 }
1193 root 1.9
1194 root 1.20 return sv;
1195    
1196 root 1.9 fail:
1197     SvREFCNT_dec (sv);
1198     return &PL_sv_undef;
1199 root 1.3 }
1200    
1201     static SV *
1202 root 1.1 decode_sv (dec_t *dec)
1203     {
1204     WANT (1);
1205    
1206 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1207 root 1.1 {
1208 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1209     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1210     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1211     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1212     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1213     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1214     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1215    
1216     case MAJOR_MISC >> MAJOR_SHIFT:
1217     switch (*dec->cur++ & MINOR_MASK)
1218 root 1.1 {
1219 root 1.35 case SIMPLE_FALSE:
1220 root 1.1 #if CBOR_SLOW
1221 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1222 root 1.1 #endif
1223 root 1.10 return newSVsv (types_false);
1224 root 1.35 case SIMPLE_TRUE:
1225 root 1.1 #if CBOR_SLOW
1226 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1227 root 1.1 #endif
1228 root 1.10 return newSVsv (types_true);
1229 root 1.35 case SIMPLE_NULL:
1230 root 1.1 return newSVsv (&PL_sv_undef);
1231 root 1.35 case SIMPLE_UNDEF:
1232 root 1.10 #if CBOR_SLOW
1233     types_error = get_bool ("Types::Serialiser::error");
1234     #endif
1235     return newSVsv (types_error);
1236 root 1.1
1237 root 1.35 case MISC_FLOAT16:
1238 root 1.2 {
1239     WANT (2);
1240    
1241     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1242     dec->cur += 2;
1243    
1244     return newSVnv (ecb_binary16_to_float (fp));
1245     }
1246 root 1.1
1247 root 1.35 case MISC_FLOAT32:
1248 root 1.1 {
1249     uint32_t fp;
1250     WANT (4);
1251     memcpy (&fp, dec->cur, 4);
1252     dec->cur += 4;
1253    
1254     if (!ecb_big_endian ())
1255     fp = ecb_bswap32 (fp);
1256    
1257     return newSVnv (ecb_binary32_to_float (fp));
1258     }
1259    
1260 root 1.35 case MISC_FLOAT64:
1261 root 1.1 {
1262     uint64_t fp;
1263     WANT (8);
1264     memcpy (&fp, dec->cur, 8);
1265     dec->cur += 8;
1266    
1267     if (!ecb_big_endian ())
1268     fp = ecb_bswap64 (fp);
1269    
1270     return newSVnv (ecb_binary64_to_double (fp));
1271     }
1272    
1273 root 1.35 // 0..19 unassigned simple
1274 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1275     // 28-30 unassigned misc
1276     // 31 break code
1277 root 1.1 default:
1278 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1279 root 1.1 }
1280    
1281     break;
1282     }
1283    
1284     fail:
1285     return &PL_sv_undef;
1286     }
1287    
1288     static SV *
1289     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1290     {
1291 root 1.48 dec_t dec = { 0 };
1292 root 1.1 SV *sv;
1293 root 1.16 STRLEN len;
1294     char *data = SvPVbyte (string, len);
1295 root 1.1
1296 root 1.16 if (len > cbor->max_size && cbor->max_size)
1297     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1298     (unsigned long)len, (unsigned long)cbor->max_size);
1299 root 1.1
1300     dec.cbor = *cbor;
1301 root 1.16 dec.cur = (U8 *)data;
1302     dec.end = (U8 *)data + len;
1303 root 1.1
1304     sv = decode_sv (&dec);
1305    
1306     if (offset_return)
1307     *offset_return = dec.cur;
1308    
1309     if (!(offset_return || !sv))
1310 root 1.2 if (dec.cur != dec.end && !dec.err)
1311     dec.err = "garbage after CBOR object";
1312    
1313     if (dec.err)
1314 root 1.1 {
1315 root 1.39 if (dec.shareable)
1316     {
1317 root 1.58 // need to break cyclic links, which would all be in shareable
1318 root 1.39 int i;
1319     SV **svp;
1320    
1321     for (i = av_len (dec.shareable) + 1; i--; )
1322     if ((svp = av_fetch (dec.shareable, i, 0)))
1323     sv_setsv (*svp, &PL_sv_undef);
1324     }
1325    
1326 root 1.2 SvREFCNT_dec (sv);
1327 root 1.59
1328     if (dec.err_sv)
1329     sv_2mortal (dec.err_sv);
1330    
1331 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1332 root 1.1 }
1333    
1334     sv = sv_2mortal (sv);
1335    
1336     return sv;
1337     }
1338    
1339     /////////////////////////////////////////////////////////////////////////////
1340 root 1.40 // incremental parser
1341    
1342     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1343    
1344     // returns 0 for notyet, 1 for success or error
1345     static int
1346     incr_parse (CBOR *self, SV *cborstr)
1347     {
1348     STRLEN cur;
1349     SvPV (cborstr, cur);
1350    
1351     while (ecb_expect_true (self->incr_need <= cur))
1352     {
1353     // table of integer count bytes
1354     static I8 incr_len[MINOR_MASK + 1] = {
1355     0, 0, 0, 0, 0, 0, 0, 0,
1356     0, 0, 0, 0, 0, 0, 0, 0,
1357     0, 0, 0, 0, 0, 0, 0, 0,
1358     1, 2, 4, 8,-1,-1,-1,-2
1359     };
1360    
1361     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1362     U8 m = *p & MINOR_MASK;
1363     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1364     I8 ilen = incr_len[m];
1365    
1366     self->incr_need = self->incr_pos + 1;
1367    
1368     if (ecb_expect_false (ilen < 0))
1369     {
1370     if (m != MINOR_INDEF)
1371     return 1; // error
1372    
1373     if (*p == (MAJOR_MISC | MINOR_INDEF))
1374     {
1375     if (count >= 0)
1376     return 1; // error
1377    
1378     count = 1;
1379     }
1380     else
1381     {
1382     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1383     count = -1;
1384     }
1385     }
1386     else
1387     {
1388     self->incr_need += ilen;
1389     if (ecb_expect_false (self->incr_need > cur))
1390     return 0;
1391    
1392     int major = *p >> MAJOR_SHIFT;
1393    
1394     switch (major)
1395     {
1396 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1397     ++count; // tags merely prefix another value
1398     break;
1399    
1400 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1401     case MAJOR_TEXT >> MAJOR_SHIFT:
1402     case MAJOR_ARRAY >> MAJOR_SHIFT:
1403     case MAJOR_MAP >> MAJOR_SHIFT:
1404     {
1405     UV len;
1406    
1407     if (ecb_expect_false (ilen))
1408     {
1409     len = 0;
1410    
1411     do {
1412     len = (len << 8) | *++p;
1413     } while (--ilen);
1414     }
1415     else
1416     len = m;
1417    
1418     switch (major)
1419     {
1420     case MAJOR_BYTES >> MAJOR_SHIFT:
1421     case MAJOR_TEXT >> MAJOR_SHIFT:
1422     self->incr_need += len;
1423     if (ecb_expect_false (self->incr_need > cur))
1424     return 0;
1425    
1426     break;
1427    
1428     case MAJOR_MAP >> MAJOR_SHIFT:
1429     len <<= 1;
1430     case MAJOR_ARRAY >> MAJOR_SHIFT:
1431     if (len)
1432     {
1433     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1434     count = len + 1;
1435     }
1436     break;
1437     }
1438     }
1439     }
1440     }
1441    
1442     self->incr_pos = self->incr_need;
1443    
1444     if (count > 0)
1445     {
1446     while (!--count)
1447     {
1448     if (!AvFILLp (self->incr_count))
1449     return 1; // done
1450    
1451     SvREFCNT_dec_NN (av_pop (self->incr_count));
1452     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1453     }
1454    
1455     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1456     }
1457     }
1458    
1459     return 0;
1460     }
1461    
1462    
1463     /////////////////////////////////////////////////////////////////////////////
1464 root 1.1 // XS interface functions
1465    
1466     MODULE = CBOR::XS PACKAGE = CBOR::XS
1467    
1468     BOOT:
1469     {
1470     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1471 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1472 root 1.1
1473 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1474     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1475    
1476     types_true = get_bool ("Types::Serialiser::true" );
1477     types_false = get_bool ("Types::Serialiser::false");
1478     types_error = get_bool ("Types::Serialiser::error");
1479 root 1.11
1480 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1481    
1482 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1483     SvREADONLY_on (sv_cbor);
1484 root 1.56
1485     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1486 root 1.1 }
1487    
1488     PROTOTYPES: DISABLE
1489    
1490     void CLONE (...)
1491     CODE:
1492 root 1.10 cbor_stash = 0;
1493     cbor_tagged_stash = 0;
1494     types_error_stash = 0;
1495     types_boolean_stash = 0;
1496 root 1.1
1497     void new (char *klass)
1498     PPCODE:
1499     {
1500     SV *pv = NEWSV (0, sizeof (CBOR));
1501     SvPOK_only (pv);
1502     cbor_init ((CBOR *)SvPVX (pv));
1503     XPUSHs (sv_2mortal (sv_bless (
1504     newRV_noinc (pv),
1505     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1506     )));
1507     }
1508    
1509     void shrink (CBOR *self, int enable = 1)
1510     ALIAS:
1511     shrink = F_SHRINK
1512     allow_unknown = F_ALLOW_UNKNOWN
1513 root 1.18 allow_sharing = F_ALLOW_SHARING
1514 root 1.37 allow_cycles = F_ALLOW_CYCLES
1515 root 1.60 forbid_objects = F_FORBID_OBJECTS
1516 root 1.32 pack_strings = F_PACK_STRINGS
1517 root 1.54 text_keys = F_TEXT_KEYS
1518     text_strings = F_TEXT_STRINGS
1519 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1520 root 1.1 PPCODE:
1521     {
1522     if (enable)
1523     self->flags |= ix;
1524     else
1525     self->flags &= ~ix;
1526    
1527     XPUSHs (ST (0));
1528     }
1529    
1530     void get_shrink (CBOR *self)
1531     ALIAS:
1532     get_shrink = F_SHRINK
1533     get_allow_unknown = F_ALLOW_UNKNOWN
1534 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1535 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1536 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1537 root 1.32 get_pack_strings = F_PACK_STRINGS
1538 root 1.54 get_text_keys = F_TEXT_KEYS
1539     get_text_strings = F_TEXT_STRINGS
1540 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1541 root 1.1 PPCODE:
1542     XPUSHs (boolSV (self->flags & ix));
1543    
1544     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1545     PPCODE:
1546     self->max_depth = max_depth;
1547     XPUSHs (ST (0));
1548    
1549     U32 get_max_depth (CBOR *self)
1550     CODE:
1551     RETVAL = self->max_depth;
1552     OUTPUT:
1553     RETVAL
1554    
1555     void max_size (CBOR *self, U32 max_size = 0)
1556     PPCODE:
1557     self->max_size = max_size;
1558     XPUSHs (ST (0));
1559    
1560     int get_max_size (CBOR *self)
1561     CODE:
1562     RETVAL = self->max_size;
1563     OUTPUT:
1564     RETVAL
1565    
1566 root 1.27 void filter (CBOR *self, SV *filter = 0)
1567     PPCODE:
1568     SvREFCNT_dec (self->filter);
1569     self->filter = filter ? newSVsv (filter) : filter;
1570     XPUSHs (ST (0));
1571    
1572     SV *get_filter (CBOR *self)
1573     CODE:
1574     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1575     OUTPUT:
1576     RETVAL
1577    
1578 root 1.1 void encode (CBOR *self, SV *scalar)
1579     PPCODE:
1580     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1581     XPUSHs (scalar);
1582    
1583     void decode (CBOR *self, SV *cborstr)
1584     PPCODE:
1585     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1586     XPUSHs (cborstr);
1587    
1588     void decode_prefix (CBOR *self, SV *cborstr)
1589     PPCODE:
1590     {
1591     SV *sv;
1592     char *offset;
1593     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1594     EXTEND (SP, 2);
1595     PUSHs (sv);
1596     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1597     }
1598    
1599 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1600 root 1.42 ALIAS:
1601     incr_parse_multiple = 1
1602 root 1.40 PPCODE:
1603     {
1604     if (SvUTF8 (cborstr))
1605     sv_utf8_downgrade (cborstr, 0);
1606    
1607     if (!self->incr_count)
1608     {
1609     self->incr_count = newAV ();
1610     self->incr_pos = 0;
1611     self->incr_need = 1;
1612    
1613     av_push (self->incr_count, newSViv (1));
1614     }
1615    
1616 root 1.41 do
1617 root 1.40 {
1618     if (!incr_parse (self, cborstr))
1619     {
1620     if (self->incr_need > self->max_size && self->max_size)
1621     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1622     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1623    
1624     break;
1625     }
1626    
1627 root 1.41 SV *sv;
1628     char *offset;
1629 root 1.40
1630 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1631     XPUSHs (sv);
1632 root 1.40
1633 root 1.41 sv_chop (cborstr, offset);
1634 root 1.40
1635 root 1.41 av_clear (self->incr_count);
1636     av_push (self->incr_count, newSViv (1));
1637 root 1.40
1638 root 1.41 self->incr_pos = 0;
1639     self->incr_need = self->incr_pos + 1;
1640 root 1.40 }
1641 root 1.42 while (ix);
1642 root 1.40 }
1643    
1644     void incr_reset (CBOR *self)
1645     CODE:
1646     {
1647     SvREFCNT_dec (self->incr_count);
1648     self->incr_count = 0;
1649     }
1650    
1651 root 1.27 void DESTROY (CBOR *self)
1652     PPCODE:
1653     cbor_free (self);
1654    
1655 root 1.1 PROTOTYPES: ENABLE
1656    
1657     void encode_cbor (SV *scalar)
1658 root 1.36 ALIAS:
1659     encode_cbor = 0
1660     encode_cbor_sharing = F_ALLOW_SHARING
1661 root 1.1 PPCODE:
1662     {
1663     CBOR cbor;
1664     cbor_init (&cbor);
1665 root 1.36 cbor.flags |= ix;
1666 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1667     XPUSHs (scalar);
1668     }
1669    
1670     void decode_cbor (SV *cborstr)
1671     PPCODE:
1672     {
1673     CBOR cbor;
1674     cbor_init (&cbor);
1675     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1676     XPUSHs (cborstr);
1677     }
1678