ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.63
Committed: Sat Nov 26 04:50:58 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-1_6
Changes since 1.62: +26 -29 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11 root 1.61 #include <inttypes.h>
12 root 1.1
13 root 1.44 #define ECB_NO_THREADS 1
14 root 1.1 #include "ecb.h"
15    
16 root 1.14 // compatibility with perl <5.18
17     #ifndef HvNAMELEN_get
18     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
19     #endif
20     #ifndef HvNAMELEN
21     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
22     #endif
23     #ifndef HvNAMEUTF8
24     # define HvNAMEUTF8(hv) 0
25     #endif
26 root 1.63 #ifndef SvREFCNT_inc_NN
27     # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv)
28     #endif
29 root 1.28 #ifndef SvREFCNT_dec_NN
30     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
31     #endif
32 root 1.14
33 root 1.35 // known major and minor types
34     enum cbor_type
35     {
36     MAJOR_SHIFT = 5,
37     MINOR_MASK = 0x1f,
38    
39     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
40     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
41     MAJOR_BYTES = 2 << MAJOR_SHIFT,
42     MAJOR_TEXT = 3 << MAJOR_SHIFT,
43     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
44     MAJOR_MAP = 5 << MAJOR_SHIFT,
45     MAJOR_TAG = 6 << MAJOR_SHIFT,
46     MAJOR_MISC = 7 << MAJOR_SHIFT,
47    
48     // INT/STRING/ARRAY/MAP subtypes
49     LENGTH_EXT1 = 24,
50     LENGTH_EXT2 = 25,
51     LENGTH_EXT4 = 26,
52     LENGTH_EXT8 = 27,
53    
54     // SIMPLE types (effectively MISC subtypes)
55     SIMPLE_FALSE = 20,
56     SIMPLE_TRUE = 21,
57     SIMPLE_NULL = 22,
58     SIMPLE_UNDEF = 23,
59    
60     // MISC subtype (unused)
61     MISC_EXT1 = 24,
62     MISC_FLOAT16 = 25,
63     MISC_FLOAT32 = 26,
64     MISC_FLOAT64 = 27,
65    
66     // BYTES/TEXT/ARRAY/MAP
67     MINOR_INDEF = 31,
68     };
69    
70 root 1.9 // known tags
71 root 1.8 enum cbor_tag
72     {
73 root 1.35 // extensions
74     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
75     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
76     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
77     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
78     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
79     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
80     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
81    
82     // rfc7049
83     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
84     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
85     CBOR_TAG_POS_BIGNUM = 2, // byte string
86     CBOR_TAG_NEG_BIGNUM = 3, // byte string
87     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
88     CBOR_TAG_BIGFLOAT = 5, // array
89    
90     CBOR_TAG_CONV_B64U = 21, // base64url, any
91     CBOR_TAG_CONV_B64 = 22, // base64, any
92     CBOR_TAG_CONV_HEX = 23, // base16, any
93     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
94    
95     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
96     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
97     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
98     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
99     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
100 root 1.8
101 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
102 root 1.8 };
103    
104 root 1.24 #define F_SHRINK 0x00000001UL
105     #define F_ALLOW_UNKNOWN 0x00000002UL
106 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
107 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
108 root 1.60 #define F_FORBID_OBJECTS 0x00000010UL
109     #define F_PACK_STRINGS 0x00000020UL
110     #define F_TEXT_KEYS 0x00000040UL
111     #define F_TEXT_STRINGS 0x00000080UL
112     #define F_VALIDATE_UTF8 0x00000100UL
113 root 1.1
114     #define INIT_SIZE 32 // initial scalar size to be allocated
115    
116     #define SB do {
117     #define SE } while (0)
118    
119     #define IN_RANGE_INC(type,val,beg,end) \
120     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
121     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
122    
123     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
124    
125     #ifdef USE_ITHREADS
126     # define CBOR_SLOW 1
127     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
128     #else
129     # define CBOR_SLOW 0
130     # define CBOR_STASH cbor_stash
131     #endif
132    
133 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
134 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
135 root 1.1
136     typedef struct {
137     U32 flags;
138     U32 max_depth;
139     STRLEN max_size;
140 root 1.27 SV *filter;
141 root 1.40
142     // for the incremental parser
143     STRLEN incr_pos; // the current offset into the text
144     STRLEN incr_need; // minimum bytes needed to decode
145     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
146 root 1.1 } CBOR;
147    
148 root 1.5 ecb_inline void
149 root 1.1 cbor_init (CBOR *cbor)
150     {
151     Zero (cbor, 1, CBOR);
152     cbor->max_depth = 512;
153     }
154    
155 root 1.27 ecb_inline void
156     cbor_free (CBOR *cbor)
157     {
158     SvREFCNT_dec (cbor->filter);
159 root 1.40 SvREFCNT_dec (cbor->incr_count);
160 root 1.27 }
161    
162 root 1.1 /////////////////////////////////////////////////////////////////////////////
163     // utility functions
164    
165 root 1.5 ecb_inline SV *
166 root 1.1 get_bool (const char *name)
167     {
168     SV *sv = get_sv (name, 1);
169    
170     SvREADONLY_on (sv);
171     SvREADONLY_on (SvRV (sv));
172    
173     return sv;
174     }
175    
176 root 1.5 ecb_inline void
177 root 1.1 shrink (SV *sv)
178     {
179     sv_utf8_downgrade (sv, 1);
180    
181     if (SvLEN (sv) > SvCUR (sv) + 1)
182     {
183     #ifdef SvPV_shrink_to_cur
184     SvPV_shrink_to_cur (sv);
185     #elif defined (SvPV_renew)
186     SvPV_renew (sv, SvCUR (sv) + 1);
187     #endif
188     }
189     }
190    
191 root 1.21 // minimum length of a string to be registered for stringref
192     ecb_inline int
193     minimum_string_length (UV idx)
194     {
195 root 1.61 return idx <= 23 ? 3
196     : idx <= 0xffU ? 4
197     : idx <= 0xffffU ? 5
198     : idx <= 0xffffffffU ? 7
199     : 11;
200 root 1.21 }
201    
202 root 1.1 /////////////////////////////////////////////////////////////////////////////
203     // encoder
204    
205     // structure used for encoding CBOR
206     typedef struct
207     {
208     char *cur; // SvPVX (sv) + current output position
209     char *end; // SvEND (sv)
210     SV *sv; // result scalar
211     CBOR cbor;
212     U32 depth; // recursion level
213 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
214     UV stringref_idx;
215 root 1.19 HV *shareable; // ptr => index, or 0
216     UV shareable_idx;
217 root 1.1 } enc_t;
218    
219 root 1.5 ecb_inline void
220 root 1.1 need (enc_t *enc, STRLEN len)
221     {
222 root 1.61 if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len))
223 root 1.1 {
224     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
225     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
226     enc->cur = SvPVX (enc->sv) + cur;
227     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
228     }
229     }
230    
231 root 1.5 ecb_inline void
232 root 1.1 encode_ch (enc_t *enc, char ch)
233     {
234     need (enc, 1);
235     *enc->cur++ = ch;
236     }
237    
238     static void
239     encode_uint (enc_t *enc, int major, UV len)
240     {
241     need (enc, 9);
242    
243 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
244 root 1.1 *enc->cur++ = major | len;
245 root 1.36 else if (ecb_expect_true (len <= 0xffU))
246 root 1.1 {
247 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
248 root 1.1 *enc->cur++ = len;
249     }
250 root 1.36 else if (len <= 0xffffU)
251 root 1.1 {
252 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
253 root 1.1 *enc->cur++ = len >> 8;
254     *enc->cur++ = len;
255     }
256 root 1.36 else if (len <= 0xffffffffU)
257 root 1.1 {
258 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
259 root 1.1 *enc->cur++ = len >> 24;
260     *enc->cur++ = len >> 16;
261     *enc->cur++ = len >> 8;
262     *enc->cur++ = len;
263     }
264 root 1.4 else
265 root 1.1 {
266 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
267 root 1.1 *enc->cur++ = len >> 56;
268     *enc->cur++ = len >> 48;
269     *enc->cur++ = len >> 40;
270     *enc->cur++ = len >> 32;
271     *enc->cur++ = len >> 24;
272     *enc->cur++ = len >> 16;
273     *enc->cur++ = len >> 8;
274     *enc->cur++ = len;
275     }
276     }
277    
278 root 1.21 ecb_inline void
279     encode_tag (enc_t *enc, UV tag)
280     {
281 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
282 root 1.21 }
283    
284 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
285     ecb_noinline static void
286     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
287     {
288     STRLEN ulen = len;
289     U8 *p, *pend = (U8 *)str + len;
290    
291     for (p = (U8 *)str; p < pend; ++p)
292     ulen += *p >> 7; // count set high bits
293    
294     encode_uint (enc, MAJOR_TEXT, ulen);
295    
296     need (enc, ulen);
297     for (p = (U8 *)str; p < pend; ++p)
298     if (*p < 0x80)
299     *enc->cur++ = *p;
300     else
301     {
302     *enc->cur++ = 0xc0 + (*p >> 6);
303     *enc->cur++ = 0x80 + (*p & 63);
304     }
305     }
306    
307 root 1.30 ecb_inline void
308 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
309 root 1.30 {
310 root 1.53 if (ecb_expect_false (upgrade_utf8))
311 root 1.51 if (!utf8)
312     {
313 root 1.53 encode_str_utf8 (enc, utf8, str, len);
314 root 1.51 return;
315     }
316    
317 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
318 root 1.30 need (enc, len);
319     memcpy (enc->cur, str, len);
320     enc->cur += len;
321     }
322    
323 root 1.54 ecb_inline void
324 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
325 root 1.1 {
326 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
327 root 1.21 {
328 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
329 root 1.21
330     if (SvOK (*svp))
331     {
332     // already registered, use stringref
333     encode_tag (enc, CBOR_TAG_STRINGREF);
334 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
335 root 1.21 return;
336     }
337     else if (len >= minimum_string_length (enc->stringref_idx))
338     {
339     // register only
340     sv_setuv (*svp, enc->stringref_idx);
341     ++enc->stringref_idx;
342     }
343     }
344    
345 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
346 root 1.1 }
347    
348     static void encode_sv (enc_t *enc, SV *sv);
349    
350     static void
351     encode_av (enc_t *enc, AV *av)
352     {
353     int i, len = av_len (av);
354    
355     if (enc->depth >= enc->cbor.max_depth)
356     croak (ERR_NESTING_EXCEEDED);
357    
358     ++enc->depth;
359    
360 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
361 root 1.1
362 root 1.63 if (ecb_expect_false (SvMAGICAL (av)))
363 root 1.45 for (i = 0; i <= len; ++i)
364     {
365     SV **svp = av_fetch (av, i, 0);
366     encode_sv (enc, svp ? *svp : &PL_sv_undef);
367     }
368     else
369     for (i = 0; i <= len; ++i)
370 root 1.46 {
371     SV *sv = AvARRAY (av)[i];
372     encode_sv (enc, sv ? sv : &PL_sv_undef);
373     }
374 root 1.1
375     --enc->depth;
376     }
377    
378     static void
379     encode_hv (enc_t *enc, HV *hv)
380     {
381     HE *he;
382    
383     if (enc->depth >= enc->cbor.max_depth)
384     croak (ERR_NESTING_EXCEEDED);
385    
386     ++enc->depth;
387    
388     int pairs = hv_iterinit (hv);
389     int mg = SvMAGICAL (hv);
390    
391 root 1.63 if (ecb_expect_false (mg))
392 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
393 root 1.1 else
394 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
395 root 1.1
396     while ((he = hv_iternext (hv)))
397     {
398 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
399     encode_sv (enc, HeSVKEY (he));
400 root 1.1 else
401 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
402 root 1.1
403 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
404 root 1.1 }
405    
406 root 1.63 if (ecb_expect_false (mg))
407 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
408 root 1.1
409     --enc->depth;
410     }
411    
412     // encode objects, arrays and special \0=false and \1=true values.
413     static void
414     encode_rv (enc_t *enc, SV *sv)
415     {
416 root 1.19 SvGETMAGIC (sv);
417 root 1.1
418 root 1.19 svtype svt = SvTYPE (sv);
419 root 1.18
420 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
421 root 1.1 {
422 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
423     ? types_boolean_stash
424     : gv_stashpv ("Types::Serialiser::Boolean", 1);
425     HV *error_stash = !CBOR_SLOW || types_error_stash
426     ? types_error_stash
427     : gv_stashpv ("Types::Serialiser::Error", 1);
428 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
429     ? cbor_tagged_stash
430     : gv_stashpv ("CBOR::XS::Tagged" , 1);
431 root 1.1
432 root 1.11 HV *stash = SvSTASH (sv);
433    
434     if (stash == boolean_stash)
435 root 1.34 {
436 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
437 root 1.34 return;
438     }
439 root 1.11 else if (stash == error_stash)
440 root 1.34 {
441 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
442 root 1.34 return;
443     }
444 root 1.11 else if (stash == tagged_stash)
445 root 1.6 {
446     if (svt != SVt_PVAV)
447     croak ("encountered CBOR::XS::Tagged object that isn't an array");
448    
449 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
450 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
451 root 1.34
452     return;
453 root 1.6 }
454 root 1.34 }
455    
456     if (ecb_expect_false (SvREFCNT (sv) > 1)
457     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
458     {
459 root 1.63 if (ecb_expect_false (!enc->shareable))
460 root 1.34 enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
461    
462     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
463    
464     if (SvOK (*svp))
465     {
466     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
467 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
468 root 1.34 return;
469     }
470     else
471     {
472     sv_setuv (*svp, enc->shareable_idx);
473     ++enc->shareable_idx;
474     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
475     }
476     }
477    
478     if (ecb_expect_false (SvOBJECT (sv)))
479     {
480     HV *stash = SvSTASH (sv);
481     GV *method;
482    
483 root 1.60 if (enc->cbor.flags & F_FORBID_OBJECTS)
484     croak ("encountered object '%s', but forbid_objects is enabled",
485     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
486     else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
487 root 1.1 {
488 root 1.11 dSP;
489    
490 root 1.50 ENTER; SAVETMPS;
491     PUSHMARK (SP);
492 root 1.6 // we re-bless the reference to get overload and other niceties right
493 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
494    
495     PUTBACK;
496     // G_SCALAR ensures that return value is 1
497     call_sv ((SV *)GvCV (method), G_SCALAR);
498     SPAGAIN;
499    
500     // catch this surprisingly common error
501     if (SvROK (TOPs) && SvRV (TOPs) == sv)
502     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
503    
504     encode_sv (enc, POPs);
505    
506     PUTBACK;
507    
508     FREETMPS; LEAVE;
509     }
510     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
511     {
512     dSP;
513 root 1.6
514 root 1.50 ENTER; SAVETMPS;
515     SAVESTACK_POS ();
516     PUSHMARK (SP);
517 root 1.11 EXTEND (SP, 2);
518     // we re-bless the reference to get overload and other niceties right
519     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
520     PUSHs (sv_cbor);
521 root 1.1
522 root 1.11 PUTBACK;
523     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
524     SPAGAIN;
525 root 1.6
526 root 1.11 // catch this surprisingly common error
527     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
528     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
529 root 1.6
530 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
531 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
532 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
533 root 1.6
534 root 1.11 while (count)
535     encode_sv (enc, SP[1 - count--]);
536 root 1.6
537 root 1.11 PUTBACK;
538 root 1.6
539 root 1.11 FREETMPS; LEAVE;
540 root 1.1 }
541 root 1.11 else
542     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
543     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
544 root 1.1 }
545     else if (svt == SVt_PVHV)
546     encode_hv (enc, (HV *)sv);
547     else if (svt == SVt_PVAV)
548     encode_av (enc, (AV *)sv);
549 root 1.18 else
550 root 1.1 {
551 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
552     encode_sv (enc, sv);
553 root 1.1 }
554     }
555    
556     static void
557     encode_nv (enc_t *enc, SV *sv)
558     {
559     double nv = SvNVX (sv);
560    
561     need (enc, 9);
562    
563 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
564     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
565 root 1.1 //TODO: maybe I32?
566 root 1.5 else if (ecb_expect_false (nv == (float)nv))
567 root 1.1 {
568 root 1.63 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
569    
570 root 1.1 uint32_t fp = ecb_float_to_binary32 (nv);
571    
572     if (!ecb_big_endian ())
573     fp = ecb_bswap32 (fp);
574    
575     memcpy (enc->cur, &fp, 4);
576     enc->cur += 4;
577     }
578     else
579     {
580 root 1.63 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
581    
582 root 1.1 uint64_t fp = ecb_double_to_binary64 (nv);
583    
584     if (!ecb_big_endian ())
585     fp = ecb_bswap64 (fp);
586    
587     memcpy (enc->cur, &fp, 8);
588     enc->cur += 8;
589     }
590     }
591    
592     static void
593     encode_sv (enc_t *enc, SV *sv)
594     {
595     SvGETMAGIC (sv);
596    
597     if (SvPOKp (sv))
598     {
599     STRLEN len;
600     char *str = SvPV (sv, len);
601 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
602 root 1.1 }
603     else if (SvNOKp (sv))
604     encode_nv (enc, sv);
605     else if (SvIOKp (sv))
606     {
607     if (SvIsUV (sv))
608 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
609 root 1.1 else if (SvIVX (sv) >= 0)
610 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
611 root 1.1 else
612 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
613 root 1.1 }
614     else if (SvROK (sv))
615     encode_rv (enc, SvRV (sv));
616     else if (!SvOK (sv))
617 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
618 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
619 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
620 root 1.1 else
621     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
622     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
623     }
624    
625     static SV *
626     encode_cbor (SV *scalar, CBOR *cbor)
627     {
628 root 1.48 enc_t enc = { 0 };
629 root 1.1
630 root 1.63 enc.cbor = *cbor;
631     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
632     enc.cur = SvPVX (enc.sv);
633     enc.end = SvEND (enc.sv);
634 root 1.1
635     SvPOK_only (enc.sv);
636 root 1.20
637 root 1.32 if (cbor->flags & F_PACK_STRINGS)
638 root 1.20 {
639     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
640     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
641     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
642     }
643    
644 root 1.1 encode_sv (&enc, scalar);
645    
646     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
647     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
648    
649     if (enc.cbor.flags & F_SHRINK)
650     shrink (enc.sv);
651    
652     return enc.sv;
653     }
654    
655     /////////////////////////////////////////////////////////////////////////////
656     // decoder
657    
658     // structure used for decoding CBOR
659     typedef struct
660     {
661     U8 *cur; // current parser pointer
662     U8 *end; // end of input string
663     const char *err; // parse error, if != 0
664     CBOR cbor;
665     U32 depth; // recursion depth
666     U32 maxdepth; // recursion depth limit
667 root 1.19 AV *shareable;
668 root 1.20 AV *stringref;
669 root 1.27 SV *decode_tagged;
670 root 1.59 SV *err_sv; // optional sv for error, needs to be freed
671 root 1.1 } dec_t;
672    
673 root 1.59 // set dec->err to ERRSV
674     ecb_cold static void
675     err_errsv (dec_t *dec)
676     {
677     if (!dec->err)
678     {
679     dec->err_sv = newSVsv (ERRSV);
680    
681     // chop off the trailing \n
682     SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1);
683     *SvEND (dec->err_sv) = 0;
684    
685     dec->err = SvPVutf8_nolen (dec->err_sv);
686     }
687     }
688 root 1.1
689 root 1.59 // the following functions are used to reduce code size and help the compiler to optimise
690     ecb_cold static void
691     err_set (dec_t *dec, const char *reason)
692     {
693     if (!dec->err)
694     dec->err = reason;
695     }
696    
697     ecb_cold static void
698     err_unexpected_end (dec_t *dec)
699     {
700     err_set (dec, "unexpected end of CBOR data");
701     }
702    
703     #define ERR_DO(do) SB do; goto fail; SE
704     #define ERR(reason) ERR_DO (err_set (dec, reason))
705     #define ERR_ERRSV ERR_DO (err_errsv (dec))
706    
707 root 1.61 #define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec))
708 root 1.1
709 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
710 root 1.1 #define DEC_DEC_DEPTH --dec->depth
711    
712     static UV
713     decode_uint (dec_t *dec)
714     {
715 root 1.35 U8 m = *dec->cur & MINOR_MASK;
716     ++dec->cur;
717    
718     if (ecb_expect_true (m < LENGTH_EXT1))
719     return m;
720 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
721     {
722     WANT (1);
723     dec->cur += 1;
724     return dec->cur[-1];
725     }
726     else if (ecb_expect_true (m == LENGTH_EXT2))
727     {
728     WANT (2);
729     dec->cur += 2;
730     return (((UV)dec->cur[-2]) << 8)
731     | ((UV)dec->cur[-1]);
732     }
733     else if (ecb_expect_true (m == LENGTH_EXT4))
734     {
735     WANT (4);
736     dec->cur += 4;
737     return (((UV)dec->cur[-4]) << 24)
738     | (((UV)dec->cur[-3]) << 16)
739     | (((UV)dec->cur[-2]) << 8)
740     | ((UV)dec->cur[-1]);
741     }
742     else if (ecb_expect_true (m == LENGTH_EXT8))
743 root 1.1 {
744 root 1.36 WANT (8);
745     dec->cur += 8;
746 root 1.34
747 root 1.36 return
748 root 1.34 #if UVSIZE < 8
749 root 1.36 0
750 root 1.34 #else
751 root 1.36 (((UV)dec->cur[-8]) << 56)
752     | (((UV)dec->cur[-7]) << 48)
753     | (((UV)dec->cur[-6]) << 40)
754     | (((UV)dec->cur[-5]) << 32)
755 root 1.34 #endif
756 root 1.36 | (((UV)dec->cur[-4]) << 24)
757     | (((UV)dec->cur[-3]) << 16)
758     | (((UV)dec->cur[-2]) << 8)
759     | ((UV)dec->cur[-1]);
760 root 1.1 }
761 root 1.36 else
762     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
763 root 1.1
764     fail:
765     return 0;
766     }
767    
768     static SV *decode_sv (dec_t *dec);
769    
770     static SV *
771     decode_av (dec_t *dec)
772     {
773     AV *av = newAV ();
774    
775     DEC_INC_DEPTH;
776    
777 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
778 root 1.1 {
779     ++dec->cur;
780    
781     for (;;)
782     {
783     WANT (1);
784    
785 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
786 root 1.1 {
787     ++dec->cur;
788     break;
789     }
790    
791     av_push (av, decode_sv (dec));
792     }
793     }
794     else
795     {
796 root 1.57 UV i, len = decode_uint (dec);
797 root 1.1
798 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
799 root 1.1 av_fill (av, len - 1);
800    
801     for (i = 0; i < len; ++i)
802     AvARRAY (av)[i] = decode_sv (dec);
803     }
804    
805     DEC_DEC_DEPTH;
806     return newRV_noinc ((SV *)av);
807    
808     fail:
809 root 1.63 SvREFCNT_dec_NN (av);
810 root 1.1 DEC_DEC_DEPTH;
811     return &PL_sv_undef;
812     }
813    
814 root 1.16 static void
815     decode_he (dec_t *dec, HV *hv)
816     {
817     // for speed reasons, we specialcase single-string
818 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
819    
820 root 1.23 if (ecb_expect_true (!dec->stringref))
821 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
822 root 1.21 {
823 root 1.56 STRLEN len = decode_uint (dec);
824 root 1.21 char *key = (char *)dec->cur;
825 root 1.16
826 root 1.49 WANT (len);
827 root 1.21 dec->cur += len;
828    
829 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
830 root 1.16
831 root 1.21 return;
832     }
833 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
834 root 1.21 {
835 root 1.56 STRLEN len = decode_uint (dec);
836 root 1.21 char *key = (char *)dec->cur;
837 root 1.16
838 root 1.49 WANT (len);
839 root 1.21 dec->cur += len;
840 root 1.20
841 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
842     if (!is_utf8_string (key, len))
843     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
844 root 1.16
845 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
846 root 1.16
847 root 1.21 return;
848     }
849 root 1.20
850 root 1.21 SV *k = decode_sv (dec);
851     SV *v = decode_sv (dec);
852 root 1.16
853 root 1.59 // we leak memory if uncaught exceptions are thrown by random magical
854     // methods, and this is hopefully the only place where it can happen,
855     // so if there is a chance of an exception, take the very slow path.
856     // since catching exceptions is "undocumented/internal/forbidden" by
857     // the new p5p powers, we need to call out to a perl function :/
858     if (ecb_expect_false (SvAMAGIC (k)))
859     {
860     dSP;
861    
862     ENTER; SAVETMPS;
863     PUSHMARK (SP);
864     EXTEND (SP, 3);
865     PUSHs (sv_2mortal (newRV_inc ((SV *)hv)));
866     PUSHs (sv_2mortal (k));
867     PUSHs (sv_2mortal (v));
868    
869     PUTBACK;
870     call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL);
871     SPAGAIN;
872    
873     FREETMPS; LEAVE;
874    
875     if (SvTRUE (ERRSV))
876     ERR_ERRSV;
877    
878     return;
879     }
880    
881 root 1.21 hv_store_ent (hv, k, v, 0);
882 root 1.63 SvREFCNT_dec_NN (k);
883 root 1.38
884     fail:
885     ;
886 root 1.16 }
887    
888 root 1.1 static SV *
889     decode_hv (dec_t *dec)
890     {
891     HV *hv = newHV ();
892    
893     DEC_INC_DEPTH;
894    
895 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
896 root 1.1 {
897     ++dec->cur;
898    
899     for (;;)
900     {
901     WANT (1);
902    
903 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
904 root 1.1 {
905     ++dec->cur;
906     break;
907     }
908    
909 root 1.16 decode_he (dec, hv);
910 root 1.1 }
911     }
912     else
913     {
914 root 1.57 UV pairs = decode_uint (dec);
915    
916     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
917 root 1.1
918 root 1.16 while (pairs--)
919     decode_he (dec, hv);
920 root 1.1 }
921    
922     DEC_DEC_DEPTH;
923     return newRV_noinc ((SV *)hv);
924    
925     fail:
926 root 1.63 SvREFCNT_dec_NN (hv);
927 root 1.1 DEC_DEC_DEPTH;
928     return &PL_sv_undef;
929     }
930    
931     static SV *
932     decode_str (dec_t *dec, int utf8)
933     {
934 root 1.6 SV *sv = 0;
935 root 1.1
936 root 1.63 if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF))
937 root 1.1 {
938 root 1.33 // indefinite length strings
939 root 1.1 ++dec->cur;
940    
941 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
942 root 1.33
943 root 1.1 sv = newSVpvn ("", 0);
944    
945     for (;;)
946     {
947     WANT (1);
948    
949 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
950     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
951 root 1.33 {
952     ++dec->cur;
953     break;
954     }
955     else
956     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
957    
958     STRLEN len = decode_uint (dec);
959 root 1.1
960 root 1.33 WANT (len);
961     sv_catpvn (sv, dec->cur, len);
962     dec->cur += len;
963 root 1.1 }
964     }
965     else
966     {
967     STRLEN len = decode_uint (dec);
968    
969     WANT (len);
970     sv = newSVpvn (dec->cur, len);
971     dec->cur += len;
972 root 1.25
973     if (ecb_expect_false (dec->stringref)
974     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
975     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
976 root 1.1 }
977    
978     if (utf8)
979 root 1.38 {
980     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
981     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
982     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
983    
984     SvUTF8_on (sv);
985     }
986 root 1.1
987     return sv;
988    
989     fail:
990 root 1.6 SvREFCNT_dec (sv);
991 root 1.1 return &PL_sv_undef;
992     }
993    
994     static SV *
995 root 1.3 decode_tagged (dec_t *dec)
996     {
997 root 1.19 SV *sv = 0;
998 root 1.3 UV tag = decode_uint (dec);
999 root 1.19
1000     WANT (1);
1001 root 1.3
1002 root 1.18 switch (tag)
1003     {
1004     case CBOR_TAG_MAGIC:
1005 root 1.20 sv = decode_sv (dec);
1006     break;
1007 root 1.18
1008     case CBOR_TAG_INDIRECTION:
1009 root 1.20 sv = newRV_noinc (decode_sv (dec));
1010     break;
1011    
1012     case CBOR_TAG_STRINGREF_NAMESPACE:
1013     {
1014 root 1.63 // do not use SAVETMPS/FREETMPS, as these will
1015 root 1.55 // erase mortalised caches, e.g. "shareable"
1016     ENTER;
1017 root 1.20
1018     SAVESPTR (dec->stringref);
1019     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
1020    
1021     sv = decode_sv (dec);
1022    
1023 root 1.55 LEAVE;
1024 root 1.20 }
1025     break;
1026    
1027     case CBOR_TAG_STRINGREF:
1028     {
1029 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1030 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
1031    
1032     UV idx = decode_uint (dec);
1033    
1034     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
1035     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
1036    
1037     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
1038     }
1039     break;
1040 root 1.11
1041 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
1042     {
1043     if (ecb_expect_false (!dec->shareable))
1044     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
1045    
1046 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
1047     {
1048     sv = newSV (0);
1049     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
1050 root 1.18
1051 root 1.37 SV *osv = decode_sv (dec);
1052     sv_setsv (sv, osv);
1053     SvREFCNT_dec_NN (osv);
1054     }
1055     else
1056     {
1057     av_push (dec->shareable, &PL_sv_undef);
1058     int idx = AvFILLp (dec->shareable);
1059     sv = decode_sv (dec);
1060     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
1061     }
1062 root 1.19 }
1063 root 1.20 break;
1064 root 1.18
1065     case CBOR_TAG_VALUE_SHAREDREF:
1066 root 1.17 {
1067 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1068 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1069 root 1.18
1070 root 1.19 UV idx = decode_uint (dec);
1071    
1072 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1073 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1074    
1075 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1076 root 1.37
1077     if (sv == &PL_sv_undef)
1078     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1079 root 1.17 }
1080 root 1.20 break;
1081 root 1.17
1082 root 1.18 case CBOR_TAG_PERL_OBJECT:
1083     {
1084 root 1.60 if (dec->cbor.flags & F_FORBID_OBJECTS)
1085     goto filter;
1086    
1087 root 1.19 sv = decode_sv (dec);
1088    
1089 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1090     ERR ("corrupted CBOR data (non-array perl object)");
1091    
1092     AV *av = (AV *)SvRV (sv);
1093     int len = av_len (av) + 1;
1094     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1095    
1096     if (!stash)
1097     ERR ("cannot decode perl-object (package does not exist)");
1098    
1099     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1100    
1101     if (!method)
1102     ERR ("cannot decode perl-object (package does not have a THAW method)");
1103    
1104     dSP;
1105    
1106 root 1.50 ENTER; SAVETMPS;
1107     PUSHMARK (SP);
1108 root 1.18 EXTEND (SP, len + 1);
1109     // we re-bless the reference to get overload and other niceties right
1110     PUSHs (*av_fetch (av, 0, 1));
1111     PUSHs (sv_cbor);
1112    
1113     int i;
1114    
1115     for (i = 1; i < len; ++i)
1116     PUSHs (*av_fetch (av, i, 1));
1117    
1118     PUTBACK;
1119     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1120     SPAGAIN;
1121    
1122     if (SvTRUE (ERRSV))
1123     {
1124     FREETMPS; LEAVE;
1125 root 1.59 ERR_ERRSV;
1126 root 1.18 }
1127    
1128 root 1.63 SvREFCNT_dec_NN (sv);
1129 root 1.18 sv = SvREFCNT_inc (POPs);
1130 root 1.11
1131 root 1.18 PUTBACK;
1132 root 1.11
1133 root 1.18 FREETMPS; LEAVE;
1134     }
1135 root 1.20 break;
1136 root 1.9
1137 root 1.18 default:
1138 root 1.60 filter:
1139 root 1.18 {
1140 root 1.58 SV *tag_sv = newSVuv (tag);
1141    
1142 root 1.19 sv = decode_sv (dec);
1143    
1144 root 1.27 dSP;
1145 root 1.50 ENTER; SAVETMPS;
1146     SAVESTACK_POS ();
1147     PUSHMARK (SP);
1148 root 1.27 EXTEND (SP, 2);
1149 root 1.58 PUSHs (tag_sv);
1150 root 1.27 PUSHs (sv);
1151    
1152     PUTBACK;
1153     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1154     SPAGAIN;
1155    
1156     if (SvTRUE (ERRSV))
1157     {
1158 root 1.63 SvREFCNT_dec_NN (tag_sv);
1159 root 1.27 FREETMPS; LEAVE;
1160 root 1.59 ERR_ERRSV;
1161 root 1.27 }
1162    
1163     if (count)
1164     {
1165 root 1.63 SvREFCNT_dec_NN (tag_sv);
1166     SvREFCNT_dec_NN (sv);
1167     sv = SvREFCNT_inc_NN (POPs);
1168 root 1.27 }
1169     else
1170     {
1171     AV *av = newAV ();
1172 root 1.58 av_push (av, tag_sv);
1173 root 1.27 av_push (av, sv);
1174    
1175     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1176     ? cbor_tagged_stash
1177     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1178     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1179     }
1180 root 1.7
1181 root 1.27 PUTBACK;
1182    
1183     FREETMPS; LEAVE;
1184 root 1.18 }
1185 root 1.20 break;
1186 root 1.11 }
1187 root 1.9
1188 root 1.20 return sv;
1189    
1190 root 1.9 fail:
1191     SvREFCNT_dec (sv);
1192     return &PL_sv_undef;
1193 root 1.3 }
1194    
1195     static SV *
1196 root 1.1 decode_sv (dec_t *dec)
1197     {
1198     WANT (1);
1199    
1200 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1201 root 1.1 {
1202 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1203     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1204     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1205     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1206     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1207     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1208     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1209    
1210     case MAJOR_MISC >> MAJOR_SHIFT:
1211     switch (*dec->cur++ & MINOR_MASK)
1212 root 1.1 {
1213 root 1.35 case SIMPLE_FALSE:
1214 root 1.1 #if CBOR_SLOW
1215 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1216 root 1.1 #endif
1217 root 1.10 return newSVsv (types_false);
1218 root 1.35 case SIMPLE_TRUE:
1219 root 1.1 #if CBOR_SLOW
1220 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1221 root 1.1 #endif
1222 root 1.10 return newSVsv (types_true);
1223 root 1.35 case SIMPLE_NULL:
1224 root 1.1 return newSVsv (&PL_sv_undef);
1225 root 1.35 case SIMPLE_UNDEF:
1226 root 1.10 #if CBOR_SLOW
1227     types_error = get_bool ("Types::Serialiser::error");
1228     #endif
1229     return newSVsv (types_error);
1230 root 1.1
1231 root 1.35 case MISC_FLOAT16:
1232 root 1.2 {
1233     WANT (2);
1234    
1235     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1236     dec->cur += 2;
1237    
1238     return newSVnv (ecb_binary16_to_float (fp));
1239     }
1240 root 1.1
1241 root 1.35 case MISC_FLOAT32:
1242 root 1.1 {
1243     uint32_t fp;
1244     WANT (4);
1245     memcpy (&fp, dec->cur, 4);
1246     dec->cur += 4;
1247    
1248     if (!ecb_big_endian ())
1249     fp = ecb_bswap32 (fp);
1250    
1251     return newSVnv (ecb_binary32_to_float (fp));
1252     }
1253    
1254 root 1.35 case MISC_FLOAT64:
1255 root 1.1 {
1256     uint64_t fp;
1257     WANT (8);
1258     memcpy (&fp, dec->cur, 8);
1259     dec->cur += 8;
1260    
1261     if (!ecb_big_endian ())
1262     fp = ecb_bswap64 (fp);
1263    
1264     return newSVnv (ecb_binary64_to_double (fp));
1265     }
1266    
1267 root 1.35 // 0..19 unassigned simple
1268 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1269     // 28-30 unassigned misc
1270     // 31 break code
1271 root 1.1 default:
1272 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1273 root 1.1 }
1274    
1275     break;
1276     }
1277    
1278     fail:
1279     return &PL_sv_undef;
1280     }
1281    
1282     static SV *
1283     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1284     {
1285 root 1.48 dec_t dec = { 0 };
1286 root 1.1 SV *sv;
1287 root 1.16 STRLEN len;
1288     char *data = SvPVbyte (string, len);
1289 root 1.1
1290 root 1.16 if (len > cbor->max_size && cbor->max_size)
1291     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1292     (unsigned long)len, (unsigned long)cbor->max_size);
1293 root 1.1
1294     dec.cbor = *cbor;
1295 root 1.16 dec.cur = (U8 *)data;
1296     dec.end = (U8 *)data + len;
1297 root 1.1
1298     sv = decode_sv (&dec);
1299    
1300     if (offset_return)
1301     *offset_return = dec.cur;
1302    
1303     if (!(offset_return || !sv))
1304 root 1.2 if (dec.cur != dec.end && !dec.err)
1305     dec.err = "garbage after CBOR object";
1306    
1307     if (dec.err)
1308 root 1.1 {
1309 root 1.39 if (dec.shareable)
1310     {
1311 root 1.58 // need to break cyclic links, which would all be in shareable
1312 root 1.39 int i;
1313     SV **svp;
1314    
1315     for (i = av_len (dec.shareable) + 1; i--; )
1316     if ((svp = av_fetch (dec.shareable, i, 0)))
1317     sv_setsv (*svp, &PL_sv_undef);
1318     }
1319    
1320 root 1.63 SvREFCNT_dec_NN (sv);
1321 root 1.59
1322     if (dec.err_sv)
1323     sv_2mortal (dec.err_sv);
1324    
1325 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1326 root 1.1 }
1327    
1328     sv = sv_2mortal (sv);
1329    
1330     return sv;
1331     }
1332    
1333     /////////////////////////////////////////////////////////////////////////////
1334 root 1.40 // incremental parser
1335    
1336     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1337    
1338     // returns 0 for notyet, 1 for success or error
1339     static int
1340     incr_parse (CBOR *self, SV *cborstr)
1341     {
1342     STRLEN cur;
1343     SvPV (cborstr, cur);
1344    
1345     while (ecb_expect_true (self->incr_need <= cur))
1346     {
1347     // table of integer count bytes
1348     static I8 incr_len[MINOR_MASK + 1] = {
1349     0, 0, 0, 0, 0, 0, 0, 0,
1350     0, 0, 0, 0, 0, 0, 0, 0,
1351     0, 0, 0, 0, 0, 0, 0, 0,
1352     1, 2, 4, 8,-1,-1,-1,-2
1353     };
1354    
1355     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1356     U8 m = *p & MINOR_MASK;
1357     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1358     I8 ilen = incr_len[m];
1359    
1360     self->incr_need = self->incr_pos + 1;
1361    
1362     if (ecb_expect_false (ilen < 0))
1363     {
1364     if (m != MINOR_INDEF)
1365     return 1; // error
1366    
1367     if (*p == (MAJOR_MISC | MINOR_INDEF))
1368     {
1369     if (count >= 0)
1370     return 1; // error
1371    
1372     count = 1;
1373     }
1374     else
1375     {
1376     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1377     count = -1;
1378     }
1379     }
1380     else
1381     {
1382     self->incr_need += ilen;
1383     if (ecb_expect_false (self->incr_need > cur))
1384     return 0;
1385    
1386     int major = *p >> MAJOR_SHIFT;
1387    
1388     switch (major)
1389     {
1390 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1391     ++count; // tags merely prefix another value
1392     break;
1393    
1394 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1395     case MAJOR_TEXT >> MAJOR_SHIFT:
1396     case MAJOR_ARRAY >> MAJOR_SHIFT:
1397     case MAJOR_MAP >> MAJOR_SHIFT:
1398     {
1399     UV len;
1400    
1401     if (ecb_expect_false (ilen))
1402     {
1403     len = 0;
1404    
1405     do {
1406     len = (len << 8) | *++p;
1407     } while (--ilen);
1408     }
1409     else
1410     len = m;
1411    
1412     switch (major)
1413     {
1414     case MAJOR_BYTES >> MAJOR_SHIFT:
1415     case MAJOR_TEXT >> MAJOR_SHIFT:
1416     self->incr_need += len;
1417     if (ecb_expect_false (self->incr_need > cur))
1418     return 0;
1419    
1420     break;
1421    
1422     case MAJOR_MAP >> MAJOR_SHIFT:
1423     len <<= 1;
1424     case MAJOR_ARRAY >> MAJOR_SHIFT:
1425     if (len)
1426     {
1427     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1428     count = len + 1;
1429     }
1430     break;
1431     }
1432     }
1433     }
1434     }
1435    
1436     self->incr_pos = self->incr_need;
1437    
1438     if (count > 0)
1439     {
1440     while (!--count)
1441     {
1442     if (!AvFILLp (self->incr_count))
1443     return 1; // done
1444    
1445     SvREFCNT_dec_NN (av_pop (self->incr_count));
1446     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1447     }
1448    
1449     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1450     }
1451     }
1452    
1453     return 0;
1454     }
1455    
1456    
1457     /////////////////////////////////////////////////////////////////////////////
1458 root 1.1 // XS interface functions
1459    
1460     MODULE = CBOR::XS PACKAGE = CBOR::XS
1461    
1462     BOOT:
1463     {
1464     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1465 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1466 root 1.1
1467 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1468     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1469    
1470     types_true = get_bool ("Types::Serialiser::true" );
1471     types_false = get_bool ("Types::Serialiser::false");
1472     types_error = get_bool ("Types::Serialiser::error");
1473 root 1.11
1474 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1475    
1476 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1477     SvREADONLY_on (sv_cbor);
1478 root 1.56
1479     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1480 root 1.1 }
1481    
1482     PROTOTYPES: DISABLE
1483    
1484     void CLONE (...)
1485     CODE:
1486 root 1.10 cbor_stash = 0;
1487     cbor_tagged_stash = 0;
1488     types_error_stash = 0;
1489     types_boolean_stash = 0;
1490 root 1.1
1491     void new (char *klass)
1492     PPCODE:
1493     {
1494     SV *pv = NEWSV (0, sizeof (CBOR));
1495     SvPOK_only (pv);
1496     cbor_init ((CBOR *)SvPVX (pv));
1497     XPUSHs (sv_2mortal (sv_bless (
1498     newRV_noinc (pv),
1499     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1500     )));
1501     }
1502    
1503     void shrink (CBOR *self, int enable = 1)
1504     ALIAS:
1505     shrink = F_SHRINK
1506     allow_unknown = F_ALLOW_UNKNOWN
1507 root 1.18 allow_sharing = F_ALLOW_SHARING
1508 root 1.37 allow_cycles = F_ALLOW_CYCLES
1509 root 1.60 forbid_objects = F_FORBID_OBJECTS
1510 root 1.32 pack_strings = F_PACK_STRINGS
1511 root 1.54 text_keys = F_TEXT_KEYS
1512     text_strings = F_TEXT_STRINGS
1513 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1514 root 1.1 PPCODE:
1515     {
1516     if (enable)
1517     self->flags |= ix;
1518     else
1519     self->flags &= ~ix;
1520    
1521     XPUSHs (ST (0));
1522     }
1523    
1524     void get_shrink (CBOR *self)
1525     ALIAS:
1526     get_shrink = F_SHRINK
1527     get_allow_unknown = F_ALLOW_UNKNOWN
1528 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1529 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1530 root 1.60 get_forbid_objects = F_FORBID_OBJECTS
1531 root 1.32 get_pack_strings = F_PACK_STRINGS
1532 root 1.54 get_text_keys = F_TEXT_KEYS
1533     get_text_strings = F_TEXT_STRINGS
1534 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1535 root 1.1 PPCODE:
1536     XPUSHs (boolSV (self->flags & ix));
1537    
1538     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1539     PPCODE:
1540     self->max_depth = max_depth;
1541     XPUSHs (ST (0));
1542    
1543     U32 get_max_depth (CBOR *self)
1544     CODE:
1545     RETVAL = self->max_depth;
1546     OUTPUT:
1547     RETVAL
1548    
1549     void max_size (CBOR *self, U32 max_size = 0)
1550     PPCODE:
1551     self->max_size = max_size;
1552     XPUSHs (ST (0));
1553    
1554     int get_max_size (CBOR *self)
1555     CODE:
1556     RETVAL = self->max_size;
1557     OUTPUT:
1558     RETVAL
1559    
1560 root 1.27 void filter (CBOR *self, SV *filter = 0)
1561     PPCODE:
1562     SvREFCNT_dec (self->filter);
1563     self->filter = filter ? newSVsv (filter) : filter;
1564     XPUSHs (ST (0));
1565    
1566     SV *get_filter (CBOR *self)
1567     CODE:
1568     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1569     OUTPUT:
1570     RETVAL
1571    
1572 root 1.1 void encode (CBOR *self, SV *scalar)
1573     PPCODE:
1574     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1575     XPUSHs (scalar);
1576    
1577     void decode (CBOR *self, SV *cborstr)
1578     PPCODE:
1579     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1580     XPUSHs (cborstr);
1581    
1582     void decode_prefix (CBOR *self, SV *cborstr)
1583     PPCODE:
1584     {
1585     SV *sv;
1586     char *offset;
1587     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1588     EXTEND (SP, 2);
1589     PUSHs (sv);
1590     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1591     }
1592    
1593 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1594 root 1.42 ALIAS:
1595     incr_parse_multiple = 1
1596 root 1.40 PPCODE:
1597     {
1598     if (SvUTF8 (cborstr))
1599     sv_utf8_downgrade (cborstr, 0);
1600    
1601     if (!self->incr_count)
1602     {
1603     self->incr_count = newAV ();
1604     self->incr_pos = 0;
1605     self->incr_need = 1;
1606    
1607     av_push (self->incr_count, newSViv (1));
1608     }
1609    
1610 root 1.41 do
1611 root 1.40 {
1612     if (!incr_parse (self, cborstr))
1613     {
1614     if (self->incr_need > self->max_size && self->max_size)
1615     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1616     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1617    
1618     break;
1619     }
1620    
1621 root 1.41 SV *sv;
1622     char *offset;
1623 root 1.40
1624 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1625     XPUSHs (sv);
1626 root 1.40
1627 root 1.41 sv_chop (cborstr, offset);
1628 root 1.40
1629 root 1.41 av_clear (self->incr_count);
1630     av_push (self->incr_count, newSViv (1));
1631 root 1.40
1632 root 1.41 self->incr_pos = 0;
1633     self->incr_need = self->incr_pos + 1;
1634 root 1.40 }
1635 root 1.42 while (ix);
1636 root 1.40 }
1637    
1638     void incr_reset (CBOR *self)
1639     CODE:
1640     {
1641     SvREFCNT_dec (self->incr_count);
1642     self->incr_count = 0;
1643     }
1644    
1645 root 1.27 void DESTROY (CBOR *self)
1646     PPCODE:
1647     cbor_free (self);
1648    
1649 root 1.1 PROTOTYPES: ENABLE
1650    
1651     void encode_cbor (SV *scalar)
1652 root 1.36 ALIAS:
1653     encode_cbor = 0
1654     encode_cbor_sharing = F_ALLOW_SHARING
1655 root 1.1 PPCODE:
1656     {
1657     CBOR cbor;
1658     cbor_init (&cbor);
1659 root 1.36 cbor.flags |= ix;
1660 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1661     XPUSHs (scalar);
1662     }
1663    
1664     void decode_cbor (SV *cborstr)
1665     PPCODE:
1666     {
1667     CBOR cbor;
1668     cbor_init (&cbor);
1669     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1670     XPUSHs (cborstr);
1671     }
1672