ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.58
Committed: Fri Nov 25 13:27:29 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.57: +7 -3 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12 root 1.44 #define ECB_NO_THREADS 1
13 root 1.1 #include "ecb.h"
14    
15 root 1.14 // compatibility with perl <5.18
16     #ifndef HvNAMELEN_get
17     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
18     #endif
19     #ifndef HvNAMELEN
20     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
21     #endif
22     #ifndef HvNAMEUTF8
23     # define HvNAMEUTF8(hv) 0
24     #endif
25 root 1.28 #ifndef SvREFCNT_dec_NN
26     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
27     #endif
28 root 1.14
29 root 1.35 // known major and minor types
30     enum cbor_type
31     {
32     MAJOR_SHIFT = 5,
33     MINOR_MASK = 0x1f,
34    
35     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
36     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
37     MAJOR_BYTES = 2 << MAJOR_SHIFT,
38     MAJOR_TEXT = 3 << MAJOR_SHIFT,
39     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
40     MAJOR_MAP = 5 << MAJOR_SHIFT,
41     MAJOR_TAG = 6 << MAJOR_SHIFT,
42     MAJOR_MISC = 7 << MAJOR_SHIFT,
43    
44     // INT/STRING/ARRAY/MAP subtypes
45     LENGTH_EXT1 = 24,
46     LENGTH_EXT2 = 25,
47     LENGTH_EXT4 = 26,
48     LENGTH_EXT8 = 27,
49    
50     // SIMPLE types (effectively MISC subtypes)
51     SIMPLE_FALSE = 20,
52     SIMPLE_TRUE = 21,
53     SIMPLE_NULL = 22,
54     SIMPLE_UNDEF = 23,
55    
56     // MISC subtype (unused)
57     MISC_EXT1 = 24,
58     MISC_FLOAT16 = 25,
59     MISC_FLOAT32 = 26,
60     MISC_FLOAT64 = 27,
61    
62     // BYTES/TEXT/ARRAY/MAP
63     MINOR_INDEF = 31,
64     };
65    
66 root 1.9 // known tags
67 root 1.8 enum cbor_tag
68     {
69 root 1.35 // extensions
70     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
71     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
72     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
73     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
74     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
76     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
77    
78     // rfc7049
79     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
80     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
81     CBOR_TAG_POS_BIGNUM = 2, // byte string
82     CBOR_TAG_NEG_BIGNUM = 3, // byte string
83     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
84     CBOR_TAG_BIGFLOAT = 5, // array
85    
86     CBOR_TAG_CONV_B64U = 21, // base64url, any
87     CBOR_TAG_CONV_B64 = 22, // base64, any
88     CBOR_TAG_CONV_HEX = 23, // base16, any
89     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
90    
91     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
92     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
93     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
94     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
95     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
96 root 1.8
97 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
98 root 1.8 };
99    
100 root 1.24 #define F_SHRINK 0x00000001UL
101     #define F_ALLOW_UNKNOWN 0x00000002UL
102 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
103 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
104     #define F_PACK_STRINGS 0x00000010UL
105 root 1.54 #define F_TEXT_KEYS 0x00000020UL
106     #define F_TEXT_STRINGS 0x00000040UL
107 root 1.53 #define F_VALIDATE_UTF8 0x00000080UL
108 root 1.1
109     #define INIT_SIZE 32 // initial scalar size to be allocated
110    
111     #define SB do {
112     #define SE } while (0)
113    
114     #define IN_RANGE_INC(type,val,beg,end) \
115     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
116     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
117    
118     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
119    
120     #ifdef USE_ITHREADS
121     # define CBOR_SLOW 1
122     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
123     #else
124     # define CBOR_SLOW 0
125     # define CBOR_STASH cbor_stash
126     #endif
127    
128 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
129 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
130 root 1.1
131     typedef struct {
132     U32 flags;
133     U32 max_depth;
134     STRLEN max_size;
135 root 1.27 SV *filter;
136 root 1.40
137     // for the incremental parser
138     STRLEN incr_pos; // the current offset into the text
139     STRLEN incr_need; // minimum bytes needed to decode
140     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
141 root 1.1 } CBOR;
142    
143 root 1.5 ecb_inline void
144 root 1.1 cbor_init (CBOR *cbor)
145     {
146     Zero (cbor, 1, CBOR);
147     cbor->max_depth = 512;
148     }
149    
150 root 1.27 ecb_inline void
151     cbor_free (CBOR *cbor)
152     {
153     SvREFCNT_dec (cbor->filter);
154 root 1.40 SvREFCNT_dec (cbor->incr_count);
155 root 1.27 }
156    
157 root 1.1 /////////////////////////////////////////////////////////////////////////////
158     // utility functions
159    
160 root 1.5 ecb_inline SV *
161 root 1.1 get_bool (const char *name)
162     {
163     SV *sv = get_sv (name, 1);
164    
165     SvREADONLY_on (sv);
166     SvREADONLY_on (SvRV (sv));
167    
168     return sv;
169     }
170    
171 root 1.5 ecb_inline void
172 root 1.1 shrink (SV *sv)
173     {
174     sv_utf8_downgrade (sv, 1);
175    
176     if (SvLEN (sv) > SvCUR (sv) + 1)
177     {
178     #ifdef SvPV_shrink_to_cur
179     SvPV_shrink_to_cur (sv);
180     #elif defined (SvPV_renew)
181     SvPV_renew (sv, SvCUR (sv) + 1);
182     #endif
183     }
184     }
185    
186 root 1.21 // minimum length of a string to be registered for stringref
187     ecb_inline int
188     minimum_string_length (UV idx)
189     {
190     return idx > 23
191     ? idx > 0xffU
192     ? idx > 0xffffU
193     ? idx > 0xffffffffU
194 root 1.29 ? 11
195     : 7
196 root 1.21 : 5
197     : 4
198     : 3;
199     }
200    
201 root 1.1 /////////////////////////////////////////////////////////////////////////////
202     // encoder
203    
204     // structure used for encoding CBOR
205     typedef struct
206     {
207     char *cur; // SvPVX (sv) + current output position
208     char *end; // SvEND (sv)
209     SV *sv; // result scalar
210     CBOR cbor;
211     U32 depth; // recursion level
212 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
213     UV stringref_idx;
214 root 1.19 HV *shareable; // ptr => index, or 0
215     UV shareable_idx;
216 root 1.1 } enc_t;
217    
218 root 1.5 ecb_inline void
219 root 1.1 need (enc_t *enc, STRLEN len)
220     {
221 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
222 root 1.1 {
223     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
224     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
225     enc->cur = SvPVX (enc->sv) + cur;
226     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
227     }
228     }
229    
230 root 1.5 ecb_inline void
231 root 1.1 encode_ch (enc_t *enc, char ch)
232     {
233     need (enc, 1);
234     *enc->cur++ = ch;
235     }
236    
237     static void
238     encode_uint (enc_t *enc, int major, UV len)
239     {
240     need (enc, 9);
241    
242 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
243 root 1.1 *enc->cur++ = major | len;
244 root 1.36 else if (ecb_expect_true (len <= 0xffU))
245 root 1.1 {
246 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
247 root 1.1 *enc->cur++ = len;
248     }
249 root 1.36 else if (len <= 0xffffU)
250 root 1.1 {
251 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
252 root 1.1 *enc->cur++ = len >> 8;
253     *enc->cur++ = len;
254     }
255 root 1.36 else if (len <= 0xffffffffU)
256 root 1.1 {
257 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
258 root 1.1 *enc->cur++ = len >> 24;
259     *enc->cur++ = len >> 16;
260     *enc->cur++ = len >> 8;
261     *enc->cur++ = len;
262     }
263 root 1.4 else
264 root 1.1 {
265 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
266 root 1.1 *enc->cur++ = len >> 56;
267     *enc->cur++ = len >> 48;
268     *enc->cur++ = len >> 40;
269     *enc->cur++ = len >> 32;
270     *enc->cur++ = len >> 24;
271     *enc->cur++ = len >> 16;
272     *enc->cur++ = len >> 8;
273     *enc->cur++ = len;
274     }
275     }
276    
277 root 1.21 ecb_inline void
278     encode_tag (enc_t *enc, UV tag)
279     {
280 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
281 root 1.21 }
282    
283 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284     ecb_noinline static void
285     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286     {
287     STRLEN ulen = len;
288     U8 *p, *pend = (U8 *)str + len;
289    
290     for (p = (U8 *)str; p < pend; ++p)
291     ulen += *p >> 7; // count set high bits
292    
293     encode_uint (enc, MAJOR_TEXT, ulen);
294    
295     need (enc, ulen);
296     for (p = (U8 *)str; p < pend; ++p)
297     if (*p < 0x80)
298     *enc->cur++ = *p;
299     else
300     {
301     *enc->cur++ = 0xc0 + (*p >> 6);
302     *enc->cur++ = 0x80 + (*p & 63);
303     }
304     }
305    
306 root 1.30 ecb_inline void
307 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
308 root 1.30 {
309 root 1.53 if (ecb_expect_false (upgrade_utf8))
310 root 1.51 if (!utf8)
311     {
312 root 1.53 encode_str_utf8 (enc, utf8, str, len);
313 root 1.51 return;
314     }
315    
316 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
317 root 1.30 need (enc, len);
318     memcpy (enc->cur, str, len);
319     enc->cur += len;
320     }
321    
322 root 1.54 ecb_inline void
323 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
324 root 1.1 {
325 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
326 root 1.21 {
327 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
328 root 1.21
329     if (SvOK (*svp))
330     {
331     // already registered, use stringref
332     encode_tag (enc, CBOR_TAG_STRINGREF);
333 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
334 root 1.21 return;
335     }
336     else if (len >= minimum_string_length (enc->stringref_idx))
337     {
338     // register only
339     sv_setuv (*svp, enc->stringref_idx);
340     ++enc->stringref_idx;
341     }
342     }
343    
344 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
345 root 1.1 }
346    
347     static void encode_sv (enc_t *enc, SV *sv);
348    
349     static void
350     encode_av (enc_t *enc, AV *av)
351     {
352     int i, len = av_len (av);
353    
354     if (enc->depth >= enc->cbor.max_depth)
355     croak (ERR_NESTING_EXCEEDED);
356    
357     ++enc->depth;
358    
359 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
360 root 1.1
361 root 1.45 if (SvMAGICAL (av))
362     for (i = 0; i <= len; ++i)
363     {
364     SV **svp = av_fetch (av, i, 0);
365     encode_sv (enc, svp ? *svp : &PL_sv_undef);
366     }
367     else
368     for (i = 0; i <= len; ++i)
369 root 1.46 {
370     SV *sv = AvARRAY (av)[i];
371     encode_sv (enc, sv ? sv : &PL_sv_undef);
372     }
373 root 1.1
374     --enc->depth;
375     }
376    
377     static void
378     encode_hv (enc_t *enc, HV *hv)
379     {
380     HE *he;
381    
382     if (enc->depth >= enc->cbor.max_depth)
383     croak (ERR_NESTING_EXCEEDED);
384    
385     ++enc->depth;
386    
387     int pairs = hv_iterinit (hv);
388     int mg = SvMAGICAL (hv);
389    
390     if (mg)
391 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
392 root 1.1 else
393 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
394 root 1.1
395     while ((he = hv_iternext (hv)))
396     {
397 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
398     encode_sv (enc, HeSVKEY (he));
399 root 1.1 else
400 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
401 root 1.1
402 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
403 root 1.1 }
404    
405     if (mg)
406 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
407 root 1.1
408     --enc->depth;
409     }
410    
411     // encode objects, arrays and special \0=false and \1=true values.
412     static void
413     encode_rv (enc_t *enc, SV *sv)
414     {
415 root 1.19 SvGETMAGIC (sv);
416 root 1.1
417 root 1.19 svtype svt = SvTYPE (sv);
418 root 1.18
419 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
420 root 1.1 {
421 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
422     ? types_boolean_stash
423     : gv_stashpv ("Types::Serialiser::Boolean", 1);
424     HV *error_stash = !CBOR_SLOW || types_error_stash
425     ? types_error_stash
426     : gv_stashpv ("Types::Serialiser::Error", 1);
427 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
428     ? cbor_tagged_stash
429     : gv_stashpv ("CBOR::XS::Tagged" , 1);
430 root 1.1
431 root 1.11 HV *stash = SvSTASH (sv);
432    
433     if (stash == boolean_stash)
434 root 1.34 {
435 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
436 root 1.34 return;
437     }
438 root 1.11 else if (stash == error_stash)
439 root 1.34 {
440 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
441 root 1.34 return;
442     }
443 root 1.11 else if (stash == tagged_stash)
444 root 1.6 {
445     if (svt != SVt_PVAV)
446     croak ("encountered CBOR::XS::Tagged object that isn't an array");
447    
448 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
449 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
450 root 1.34
451     return;
452 root 1.6 }
453 root 1.34 }
454    
455     if (ecb_expect_false (SvREFCNT (sv) > 1)
456     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
457     {
458     if (!enc->shareable)
459     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
460    
461     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
462    
463     if (SvOK (*svp))
464     {
465     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
466 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
467 root 1.34 return;
468     }
469     else
470     {
471     sv_setuv (*svp, enc->shareable_idx);
472     ++enc->shareable_idx;
473     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
474     }
475     }
476    
477     if (ecb_expect_false (SvOBJECT (sv)))
478     {
479     HV *stash = SvSTASH (sv);
480     GV *method;
481    
482     if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
483 root 1.1 {
484 root 1.11 dSP;
485    
486 root 1.50 ENTER; SAVETMPS;
487     PUSHMARK (SP);
488 root 1.6 // we re-bless the reference to get overload and other niceties right
489 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
490    
491     PUTBACK;
492     // G_SCALAR ensures that return value is 1
493     call_sv ((SV *)GvCV (method), G_SCALAR);
494     SPAGAIN;
495    
496     // catch this surprisingly common error
497     if (SvROK (TOPs) && SvRV (TOPs) == sv)
498     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
499    
500     encode_sv (enc, POPs);
501    
502     PUTBACK;
503    
504     FREETMPS; LEAVE;
505     }
506     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
507     {
508     dSP;
509 root 1.6
510 root 1.50 ENTER; SAVETMPS;
511     SAVESTACK_POS ();
512     PUSHMARK (SP);
513 root 1.11 EXTEND (SP, 2);
514     // we re-bless the reference to get overload and other niceties right
515     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
516     PUSHs (sv_cbor);
517 root 1.1
518 root 1.11 PUTBACK;
519     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
520     SPAGAIN;
521 root 1.6
522 root 1.11 // catch this surprisingly common error
523     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
524     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
525 root 1.6
526 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
527 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
528 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
529 root 1.6
530 root 1.11 while (count)
531     encode_sv (enc, SP[1 - count--]);
532 root 1.6
533 root 1.11 PUTBACK;
534 root 1.6
535 root 1.11 FREETMPS; LEAVE;
536 root 1.1 }
537 root 1.11 else
538     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
539     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
540 root 1.1 }
541     else if (svt == SVt_PVHV)
542     encode_hv (enc, (HV *)sv);
543     else if (svt == SVt_PVAV)
544     encode_av (enc, (AV *)sv);
545 root 1.18 else
546 root 1.1 {
547 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
548     encode_sv (enc, sv);
549 root 1.1 }
550     }
551    
552     static void
553     encode_nv (enc_t *enc, SV *sv)
554     {
555     double nv = SvNVX (sv);
556    
557     need (enc, 9);
558    
559 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
560     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
561 root 1.1 //TODO: maybe I32?
562 root 1.5 else if (ecb_expect_false (nv == (float)nv))
563 root 1.1 {
564     uint32_t fp = ecb_float_to_binary32 (nv);
565    
566 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
567 root 1.1
568     if (!ecb_big_endian ())
569     fp = ecb_bswap32 (fp);
570    
571     memcpy (enc->cur, &fp, 4);
572     enc->cur += 4;
573     }
574     else
575     {
576     uint64_t fp = ecb_double_to_binary64 (nv);
577    
578 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
579 root 1.1
580     if (!ecb_big_endian ())
581     fp = ecb_bswap64 (fp);
582    
583     memcpy (enc->cur, &fp, 8);
584     enc->cur += 8;
585     }
586     }
587    
588     static void
589     encode_sv (enc_t *enc, SV *sv)
590     {
591     SvGETMAGIC (sv);
592    
593     if (SvPOKp (sv))
594     {
595     STRLEN len;
596     char *str = SvPV (sv, len);
597 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
598 root 1.1 }
599     else if (SvNOKp (sv))
600     encode_nv (enc, sv);
601     else if (SvIOKp (sv))
602     {
603     if (SvIsUV (sv))
604 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
605 root 1.1 else if (SvIVX (sv) >= 0)
606 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
607 root 1.1 else
608 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
609 root 1.1 }
610     else if (SvROK (sv))
611     encode_rv (enc, SvRV (sv));
612     else if (!SvOK (sv))
613 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
614 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
615 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
616 root 1.1 else
617     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
618     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
619     }
620    
621     static SV *
622     encode_cbor (SV *scalar, CBOR *cbor)
623     {
624 root 1.48 enc_t enc = { 0 };
625 root 1.1
626     enc.cbor = *cbor;
627     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
628     enc.cur = SvPVX (enc.sv);
629     enc.end = SvEND (enc.sv);
630    
631     SvPOK_only (enc.sv);
632 root 1.20
633 root 1.32 if (cbor->flags & F_PACK_STRINGS)
634 root 1.20 {
635     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
636     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
637     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
638     }
639    
640 root 1.1 encode_sv (&enc, scalar);
641    
642     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
643     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
644    
645     if (enc.cbor.flags & F_SHRINK)
646     shrink (enc.sv);
647    
648     return enc.sv;
649     }
650    
651     /////////////////////////////////////////////////////////////////////////////
652     // decoder
653    
654     // structure used for decoding CBOR
655     typedef struct
656     {
657     U8 *cur; // current parser pointer
658     U8 *end; // end of input string
659     const char *err; // parse error, if != 0
660     CBOR cbor;
661     U32 depth; // recursion depth
662     U32 maxdepth; // recursion depth limit
663 root 1.19 AV *shareable;
664 root 1.20 AV *stringref;
665 root 1.27 SV *decode_tagged;
666 root 1.1 } dec_t;
667    
668     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
669    
670 root 1.57 #define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR ("unexpected end of CBOR data")
671 root 1.1
672 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
673 root 1.1 #define DEC_DEC_DEPTH --dec->depth
674    
675     static UV
676     decode_uint (dec_t *dec)
677     {
678 root 1.35 U8 m = *dec->cur & MINOR_MASK;
679     ++dec->cur;
680    
681     if (ecb_expect_true (m < LENGTH_EXT1))
682     return m;
683 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
684     {
685     WANT (1);
686     dec->cur += 1;
687     return dec->cur[-1];
688     }
689     else if (ecb_expect_true (m == LENGTH_EXT2))
690     {
691     WANT (2);
692     dec->cur += 2;
693     return (((UV)dec->cur[-2]) << 8)
694     | ((UV)dec->cur[-1]);
695     }
696     else if (ecb_expect_true (m == LENGTH_EXT4))
697     {
698     WANT (4);
699     dec->cur += 4;
700     return (((UV)dec->cur[-4]) << 24)
701     | (((UV)dec->cur[-3]) << 16)
702     | (((UV)dec->cur[-2]) << 8)
703     | ((UV)dec->cur[-1]);
704     }
705     else if (ecb_expect_true (m == LENGTH_EXT8))
706 root 1.1 {
707 root 1.36 WANT (8);
708     dec->cur += 8;
709 root 1.34
710 root 1.36 return
711 root 1.34 #if UVSIZE < 8
712 root 1.36 0
713 root 1.34 #else
714 root 1.36 (((UV)dec->cur[-8]) << 56)
715     | (((UV)dec->cur[-7]) << 48)
716     | (((UV)dec->cur[-6]) << 40)
717     | (((UV)dec->cur[-5]) << 32)
718 root 1.34 #endif
719 root 1.36 | (((UV)dec->cur[-4]) << 24)
720     | (((UV)dec->cur[-3]) << 16)
721     | (((UV)dec->cur[-2]) << 8)
722     | ((UV)dec->cur[-1]);
723 root 1.1 }
724 root 1.36 else
725     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
726 root 1.1
727     fail:
728     return 0;
729     }
730    
731     static SV *decode_sv (dec_t *dec);
732    
733     static SV *
734     decode_av (dec_t *dec)
735     {
736     AV *av = newAV ();
737    
738     DEC_INC_DEPTH;
739    
740 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
741 root 1.1 {
742     ++dec->cur;
743    
744     for (;;)
745     {
746     WANT (1);
747    
748 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
749 root 1.1 {
750     ++dec->cur;
751     break;
752     }
753    
754     av_push (av, decode_sv (dec));
755     }
756     }
757     else
758     {
759 root 1.57 UV i, len = decode_uint (dec);
760 root 1.1
761 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
762 root 1.1 av_fill (av, len - 1);
763    
764     for (i = 0; i < len; ++i)
765     AvARRAY (av)[i] = decode_sv (dec);
766     }
767    
768     DEC_DEC_DEPTH;
769     return newRV_noinc ((SV *)av);
770    
771     fail:
772     SvREFCNT_dec (av);
773     DEC_DEC_DEPTH;
774     return &PL_sv_undef;
775     }
776    
777 root 1.16 static void
778     decode_he (dec_t *dec, HV *hv)
779     {
780     // for speed reasons, we specialcase single-string
781 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
782    
783 root 1.23 if (ecb_expect_true (!dec->stringref))
784 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
785 root 1.21 {
786 root 1.56 STRLEN len = decode_uint (dec);
787 root 1.21 char *key = (char *)dec->cur;
788 root 1.16
789 root 1.49 WANT (len);
790 root 1.21 dec->cur += len;
791    
792 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
793 root 1.16
794 root 1.21 return;
795     }
796 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
797 root 1.21 {
798 root 1.56 STRLEN len = decode_uint (dec);
799 root 1.21 char *key = (char *)dec->cur;
800 root 1.16
801 root 1.49 WANT (len);
802 root 1.21 dec->cur += len;
803 root 1.20
804 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
805     if (!is_utf8_string (key, len))
806     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
807 root 1.16
808 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
809 root 1.16
810 root 1.21 return;
811     }
812 root 1.20
813 root 1.21 SV *k = decode_sv (dec);
814     SV *v = decode_sv (dec);
815 root 1.16
816 root 1.21 hv_store_ent (hv, k, v, 0);
817     SvREFCNT_dec (k);
818 root 1.38
819     fail:
820     ;
821 root 1.16 }
822    
823 root 1.1 static SV *
824     decode_hv (dec_t *dec)
825     {
826     HV *hv = newHV ();
827    
828     DEC_INC_DEPTH;
829    
830 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
831 root 1.1 {
832     ++dec->cur;
833    
834     for (;;)
835     {
836     WANT (1);
837    
838 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
839 root 1.1 {
840     ++dec->cur;
841     break;
842     }
843    
844 root 1.16 decode_he (dec, hv);
845 root 1.1 }
846     }
847     else
848     {
849 root 1.57 UV pairs = decode_uint (dec);
850    
851     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
852 root 1.1
853 root 1.16 while (pairs--)
854     decode_he (dec, hv);
855 root 1.1 }
856    
857     DEC_DEC_DEPTH;
858     return newRV_noinc ((SV *)hv);
859    
860     fail:
861     SvREFCNT_dec (hv);
862     DEC_DEC_DEPTH;
863     return &PL_sv_undef;
864     }
865    
866     static SV *
867     decode_str (dec_t *dec, int utf8)
868     {
869 root 1.6 SV *sv = 0;
870 root 1.1
871 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
872 root 1.1 {
873 root 1.33 // indefinite length strings
874 root 1.1 ++dec->cur;
875    
876 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
877 root 1.33
878 root 1.1 sv = newSVpvn ("", 0);
879    
880     for (;;)
881     {
882     WANT (1);
883    
884 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
885     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
886 root 1.33 {
887     ++dec->cur;
888     break;
889     }
890     else
891     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
892    
893     STRLEN len = decode_uint (dec);
894 root 1.1
895 root 1.33 WANT (len);
896     sv_catpvn (sv, dec->cur, len);
897     dec->cur += len;
898 root 1.1 }
899     }
900     else
901     {
902     STRLEN len = decode_uint (dec);
903    
904     WANT (len);
905     sv = newSVpvn (dec->cur, len);
906     dec->cur += len;
907 root 1.25
908     if (ecb_expect_false (dec->stringref)
909     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
910     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
911 root 1.1 }
912    
913     if (utf8)
914 root 1.38 {
915     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
916     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
917     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
918    
919     SvUTF8_on (sv);
920     }
921 root 1.1
922     return sv;
923    
924     fail:
925 root 1.6 SvREFCNT_dec (sv);
926 root 1.1 return &PL_sv_undef;
927     }
928    
929     static SV *
930 root 1.3 decode_tagged (dec_t *dec)
931     {
932 root 1.19 SV *sv = 0;
933 root 1.3 UV tag = decode_uint (dec);
934 root 1.19
935     WANT (1);
936 root 1.3
937 root 1.18 switch (tag)
938     {
939     case CBOR_TAG_MAGIC:
940 root 1.20 sv = decode_sv (dec);
941     break;
942 root 1.18
943     case CBOR_TAG_INDIRECTION:
944 root 1.20 sv = newRV_noinc (decode_sv (dec));
945     break;
946    
947     case CBOR_TAG_STRINGREF_NAMESPACE:
948     {
949 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
950     // erase mortalised caches, e.g. "shareable"
951     ENTER;
952 root 1.20
953     SAVESPTR (dec->stringref);
954     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
955    
956     sv = decode_sv (dec);
957    
958 root 1.55 LEAVE;
959 root 1.20 }
960     break;
961    
962     case CBOR_TAG_STRINGREF:
963     {
964 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
965 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
966    
967     UV idx = decode_uint (dec);
968    
969     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
970     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
971    
972     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
973     }
974     break;
975 root 1.11
976 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
977     {
978     if (ecb_expect_false (!dec->shareable))
979     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
980    
981 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
982     {
983     sv = newSV (0);
984     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
985 root 1.18
986 root 1.37 SV *osv = decode_sv (dec);
987     sv_setsv (sv, osv);
988     SvREFCNT_dec_NN (osv);
989     }
990     else
991     {
992     av_push (dec->shareable, &PL_sv_undef);
993     int idx = AvFILLp (dec->shareable);
994     sv = decode_sv (dec);
995     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
996     }
997 root 1.19 }
998 root 1.20 break;
999 root 1.18
1000     case CBOR_TAG_VALUE_SHAREDREF:
1001 root 1.17 {
1002 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1003 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1004 root 1.18
1005 root 1.19 UV idx = decode_uint (dec);
1006    
1007 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1008 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1009    
1010 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1011 root 1.37
1012     if (sv == &PL_sv_undef)
1013     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1014 root 1.17 }
1015 root 1.20 break;
1016 root 1.17
1017 root 1.18 case CBOR_TAG_PERL_OBJECT:
1018     {
1019 root 1.19 sv = decode_sv (dec);
1020    
1021 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1022     ERR ("corrupted CBOR data (non-array perl object)");
1023    
1024     AV *av = (AV *)SvRV (sv);
1025     int len = av_len (av) + 1;
1026     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1027    
1028     if (!stash)
1029     ERR ("cannot decode perl-object (package does not exist)");
1030    
1031     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1032    
1033     if (!method)
1034     ERR ("cannot decode perl-object (package does not have a THAW method)");
1035    
1036     dSP;
1037    
1038 root 1.50 ENTER; SAVETMPS;
1039     PUSHMARK (SP);
1040 root 1.18 EXTEND (SP, len + 1);
1041     // we re-bless the reference to get overload and other niceties right
1042     PUSHs (*av_fetch (av, 0, 1));
1043     PUSHs (sv_cbor);
1044    
1045     int i;
1046    
1047     for (i = 1; i < len; ++i)
1048     PUSHs (*av_fetch (av, i, 1));
1049    
1050     PUTBACK;
1051     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1052     SPAGAIN;
1053    
1054     if (SvTRUE (ERRSV))
1055     {
1056     FREETMPS; LEAVE;
1057     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1058     }
1059    
1060     SvREFCNT_dec (sv);
1061     sv = SvREFCNT_inc (POPs);
1062 root 1.11
1063 root 1.18 PUTBACK;
1064 root 1.11
1065 root 1.18 FREETMPS; LEAVE;
1066     }
1067 root 1.20 break;
1068 root 1.9
1069 root 1.18 default:
1070     {
1071 root 1.58 SV *tag_sv = newSVuv (tag);
1072    
1073 root 1.19 sv = decode_sv (dec);
1074    
1075 root 1.27 dSP;
1076 root 1.50 ENTER; SAVETMPS;
1077     SAVESTACK_POS ();
1078     PUSHMARK (SP);
1079 root 1.27 EXTEND (SP, 2);
1080 root 1.58 PUSHs (tag_sv);
1081 root 1.27 PUSHs (sv);
1082    
1083     PUTBACK;
1084     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1085     SPAGAIN;
1086    
1087     if (SvTRUE (ERRSV))
1088     {
1089 root 1.58 SvREFCNT_dec (tag_sv);
1090 root 1.27 FREETMPS; LEAVE;
1091     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1092     }
1093    
1094     if (count)
1095     {
1096 root 1.58 SvREFCNT_dec (tag_sv);
1097 root 1.27 SvREFCNT_dec (sv);
1098     sv = SvREFCNT_inc (POPs);
1099     }
1100     else
1101     {
1102     AV *av = newAV ();
1103 root 1.58 av_push (av, tag_sv);
1104 root 1.27 av_push (av, sv);
1105    
1106     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1107     ? cbor_tagged_stash
1108     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1109     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1110     }
1111 root 1.7
1112 root 1.27 PUTBACK;
1113    
1114     FREETMPS; LEAVE;
1115 root 1.18 }
1116 root 1.20 break;
1117 root 1.11 }
1118 root 1.9
1119 root 1.20 return sv;
1120    
1121 root 1.9 fail:
1122     SvREFCNT_dec (sv);
1123     return &PL_sv_undef;
1124 root 1.3 }
1125    
1126     static SV *
1127 root 1.1 decode_sv (dec_t *dec)
1128     {
1129     WANT (1);
1130    
1131 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1132 root 1.1 {
1133 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1134     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1135     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1136     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1137     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1138     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1139     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1140    
1141     case MAJOR_MISC >> MAJOR_SHIFT:
1142     switch (*dec->cur++ & MINOR_MASK)
1143 root 1.1 {
1144 root 1.35 case SIMPLE_FALSE:
1145 root 1.1 #if CBOR_SLOW
1146 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1147 root 1.1 #endif
1148 root 1.10 return newSVsv (types_false);
1149 root 1.35 case SIMPLE_TRUE:
1150 root 1.1 #if CBOR_SLOW
1151 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1152 root 1.1 #endif
1153 root 1.10 return newSVsv (types_true);
1154 root 1.35 case SIMPLE_NULL:
1155 root 1.1 return newSVsv (&PL_sv_undef);
1156 root 1.35 case SIMPLE_UNDEF:
1157 root 1.10 #if CBOR_SLOW
1158     types_error = get_bool ("Types::Serialiser::error");
1159     #endif
1160     return newSVsv (types_error);
1161 root 1.1
1162 root 1.35 case MISC_FLOAT16:
1163 root 1.2 {
1164     WANT (2);
1165    
1166     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1167     dec->cur += 2;
1168    
1169     return newSVnv (ecb_binary16_to_float (fp));
1170     }
1171 root 1.1
1172 root 1.35 case MISC_FLOAT32:
1173 root 1.1 {
1174     uint32_t fp;
1175     WANT (4);
1176     memcpy (&fp, dec->cur, 4);
1177     dec->cur += 4;
1178    
1179     if (!ecb_big_endian ())
1180     fp = ecb_bswap32 (fp);
1181    
1182     return newSVnv (ecb_binary32_to_float (fp));
1183     }
1184    
1185 root 1.35 case MISC_FLOAT64:
1186 root 1.1 {
1187     uint64_t fp;
1188     WANT (8);
1189     memcpy (&fp, dec->cur, 8);
1190     dec->cur += 8;
1191    
1192     if (!ecb_big_endian ())
1193     fp = ecb_bswap64 (fp);
1194    
1195     return newSVnv (ecb_binary64_to_double (fp));
1196     }
1197    
1198 root 1.35 // 0..19 unassigned simple
1199 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1200     // 28-30 unassigned misc
1201     // 31 break code
1202 root 1.1 default:
1203 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1204 root 1.1 }
1205    
1206     break;
1207     }
1208    
1209     fail:
1210     return &PL_sv_undef;
1211     }
1212    
1213     static SV *
1214     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1215     {
1216 root 1.48 dec_t dec = { 0 };
1217 root 1.1 SV *sv;
1218 root 1.16 STRLEN len;
1219     char *data = SvPVbyte (string, len);
1220 root 1.1
1221 root 1.16 if (len > cbor->max_size && cbor->max_size)
1222     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1223     (unsigned long)len, (unsigned long)cbor->max_size);
1224 root 1.1
1225     dec.cbor = *cbor;
1226 root 1.16 dec.cur = (U8 *)data;
1227     dec.end = (U8 *)data + len;
1228 root 1.1
1229     sv = decode_sv (&dec);
1230    
1231     if (offset_return)
1232     *offset_return = dec.cur;
1233    
1234     if (!(offset_return || !sv))
1235 root 1.2 if (dec.cur != dec.end && !dec.err)
1236     dec.err = "garbage after CBOR object";
1237    
1238     if (dec.err)
1239 root 1.1 {
1240 root 1.39 if (dec.shareable)
1241     {
1242 root 1.58 // need to break cyclic links, which would all be in shareable
1243 root 1.39 int i;
1244     SV **svp;
1245    
1246     for (i = av_len (dec.shareable) + 1; i--; )
1247     if ((svp = av_fetch (dec.shareable, i, 0)))
1248     sv_setsv (*svp, &PL_sv_undef);
1249     }
1250    
1251 root 1.2 SvREFCNT_dec (sv);
1252 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1253 root 1.1 }
1254    
1255     sv = sv_2mortal (sv);
1256    
1257     return sv;
1258     }
1259    
1260     /////////////////////////////////////////////////////////////////////////////
1261 root 1.40 // incremental parser
1262    
1263     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1264    
1265     // returns 0 for notyet, 1 for success or error
1266     static int
1267     incr_parse (CBOR *self, SV *cborstr)
1268     {
1269     STRLEN cur;
1270     SvPV (cborstr, cur);
1271    
1272     while (ecb_expect_true (self->incr_need <= cur))
1273     {
1274     // table of integer count bytes
1275     static I8 incr_len[MINOR_MASK + 1] = {
1276     0, 0, 0, 0, 0, 0, 0, 0,
1277     0, 0, 0, 0, 0, 0, 0, 0,
1278     0, 0, 0, 0, 0, 0, 0, 0,
1279     1, 2, 4, 8,-1,-1,-1,-2
1280     };
1281    
1282     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1283     U8 m = *p & MINOR_MASK;
1284     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1285     I8 ilen = incr_len[m];
1286    
1287     self->incr_need = self->incr_pos + 1;
1288    
1289     if (ecb_expect_false (ilen < 0))
1290     {
1291     if (m != MINOR_INDEF)
1292     return 1; // error
1293    
1294     if (*p == (MAJOR_MISC | MINOR_INDEF))
1295     {
1296     if (count >= 0)
1297     return 1; // error
1298    
1299     count = 1;
1300     }
1301     else
1302     {
1303     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1304     count = -1;
1305     }
1306     }
1307     else
1308     {
1309     self->incr_need += ilen;
1310     if (ecb_expect_false (self->incr_need > cur))
1311     return 0;
1312    
1313     int major = *p >> MAJOR_SHIFT;
1314    
1315     switch (major)
1316     {
1317 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1318     ++count; // tags merely prefix another value
1319     break;
1320    
1321 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1322     case MAJOR_TEXT >> MAJOR_SHIFT:
1323     case MAJOR_ARRAY >> MAJOR_SHIFT:
1324     case MAJOR_MAP >> MAJOR_SHIFT:
1325     {
1326     UV len;
1327    
1328     if (ecb_expect_false (ilen))
1329     {
1330     len = 0;
1331    
1332     do {
1333     len = (len << 8) | *++p;
1334     } while (--ilen);
1335     }
1336     else
1337     len = m;
1338    
1339     switch (major)
1340     {
1341     case MAJOR_BYTES >> MAJOR_SHIFT:
1342     case MAJOR_TEXT >> MAJOR_SHIFT:
1343     self->incr_need += len;
1344     if (ecb_expect_false (self->incr_need > cur))
1345     return 0;
1346    
1347     break;
1348    
1349     case MAJOR_MAP >> MAJOR_SHIFT:
1350     len <<= 1;
1351     case MAJOR_ARRAY >> MAJOR_SHIFT:
1352     if (len)
1353     {
1354     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1355     count = len + 1;
1356     }
1357     break;
1358     }
1359     }
1360     }
1361     }
1362    
1363     self->incr_pos = self->incr_need;
1364    
1365     if (count > 0)
1366     {
1367     while (!--count)
1368     {
1369     if (!AvFILLp (self->incr_count))
1370     return 1; // done
1371    
1372     SvREFCNT_dec_NN (av_pop (self->incr_count));
1373     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1374     }
1375    
1376     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1377     }
1378     }
1379    
1380     return 0;
1381     }
1382    
1383    
1384     /////////////////////////////////////////////////////////////////////////////
1385 root 1.1 // XS interface functions
1386    
1387     MODULE = CBOR::XS PACKAGE = CBOR::XS
1388    
1389     BOOT:
1390     {
1391     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1392 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1393 root 1.1
1394 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1395     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1396    
1397     types_true = get_bool ("Types::Serialiser::true" );
1398     types_false = get_bool ("Types::Serialiser::false");
1399     types_error = get_bool ("Types::Serialiser::error");
1400 root 1.11
1401 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1402    
1403 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1404     SvREADONLY_on (sv_cbor);
1405 root 1.56
1406     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1407 root 1.1 }
1408    
1409     PROTOTYPES: DISABLE
1410    
1411     void CLONE (...)
1412     CODE:
1413 root 1.10 cbor_stash = 0;
1414     cbor_tagged_stash = 0;
1415     types_error_stash = 0;
1416     types_boolean_stash = 0;
1417 root 1.1
1418     void new (char *klass)
1419     PPCODE:
1420     {
1421     SV *pv = NEWSV (0, sizeof (CBOR));
1422     SvPOK_only (pv);
1423     cbor_init ((CBOR *)SvPVX (pv));
1424     XPUSHs (sv_2mortal (sv_bless (
1425     newRV_noinc (pv),
1426     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1427     )));
1428     }
1429    
1430     void shrink (CBOR *self, int enable = 1)
1431     ALIAS:
1432     shrink = F_SHRINK
1433     allow_unknown = F_ALLOW_UNKNOWN
1434 root 1.18 allow_sharing = F_ALLOW_SHARING
1435 root 1.37 allow_cycles = F_ALLOW_CYCLES
1436 root 1.32 pack_strings = F_PACK_STRINGS
1437 root 1.54 text_keys = F_TEXT_KEYS
1438     text_strings = F_TEXT_STRINGS
1439 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1440 root 1.1 PPCODE:
1441     {
1442     if (enable)
1443     self->flags |= ix;
1444     else
1445     self->flags &= ~ix;
1446    
1447     XPUSHs (ST (0));
1448     }
1449    
1450     void get_shrink (CBOR *self)
1451     ALIAS:
1452     get_shrink = F_SHRINK
1453     get_allow_unknown = F_ALLOW_UNKNOWN
1454 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1455 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1456 root 1.32 get_pack_strings = F_PACK_STRINGS
1457 root 1.54 get_text_keys = F_TEXT_KEYS
1458     get_text_strings = F_TEXT_STRINGS
1459 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1460 root 1.1 PPCODE:
1461     XPUSHs (boolSV (self->flags & ix));
1462    
1463     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1464     PPCODE:
1465     self->max_depth = max_depth;
1466     XPUSHs (ST (0));
1467    
1468     U32 get_max_depth (CBOR *self)
1469     CODE:
1470     RETVAL = self->max_depth;
1471     OUTPUT:
1472     RETVAL
1473    
1474     void max_size (CBOR *self, U32 max_size = 0)
1475     PPCODE:
1476     self->max_size = max_size;
1477     XPUSHs (ST (0));
1478    
1479     int get_max_size (CBOR *self)
1480     CODE:
1481     RETVAL = self->max_size;
1482     OUTPUT:
1483     RETVAL
1484    
1485 root 1.27 void filter (CBOR *self, SV *filter = 0)
1486     PPCODE:
1487     SvREFCNT_dec (self->filter);
1488     self->filter = filter ? newSVsv (filter) : filter;
1489     XPUSHs (ST (0));
1490    
1491     SV *get_filter (CBOR *self)
1492     CODE:
1493     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1494     OUTPUT:
1495     RETVAL
1496    
1497 root 1.1 void encode (CBOR *self, SV *scalar)
1498     PPCODE:
1499     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1500     XPUSHs (scalar);
1501    
1502     void decode (CBOR *self, SV *cborstr)
1503     PPCODE:
1504     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1505     XPUSHs (cborstr);
1506    
1507     void decode_prefix (CBOR *self, SV *cborstr)
1508     PPCODE:
1509     {
1510     SV *sv;
1511     char *offset;
1512     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1513     EXTEND (SP, 2);
1514     PUSHs (sv);
1515     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1516     }
1517    
1518 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1519 root 1.42 ALIAS:
1520     incr_parse_multiple = 1
1521 root 1.40 PPCODE:
1522     {
1523     if (SvUTF8 (cborstr))
1524     sv_utf8_downgrade (cborstr, 0);
1525    
1526     if (!self->incr_count)
1527     {
1528     self->incr_count = newAV ();
1529     self->incr_pos = 0;
1530     self->incr_need = 1;
1531    
1532     av_push (self->incr_count, newSViv (1));
1533     }
1534    
1535 root 1.41 do
1536 root 1.40 {
1537     if (!incr_parse (self, cborstr))
1538     {
1539     if (self->incr_need > self->max_size && self->max_size)
1540     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1541     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1542    
1543     break;
1544     }
1545    
1546 root 1.41 SV *sv;
1547     char *offset;
1548 root 1.40
1549 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1550     XPUSHs (sv);
1551 root 1.40
1552 root 1.41 sv_chop (cborstr, offset);
1553 root 1.40
1554 root 1.41 av_clear (self->incr_count);
1555     av_push (self->incr_count, newSViv (1));
1556 root 1.40
1557 root 1.41 self->incr_pos = 0;
1558     self->incr_need = self->incr_pos + 1;
1559 root 1.40 }
1560 root 1.42 while (ix);
1561 root 1.40 }
1562    
1563     void incr_reset (CBOR *self)
1564     CODE:
1565     {
1566     SvREFCNT_dec (self->incr_count);
1567     self->incr_count = 0;
1568     }
1569    
1570 root 1.27 void DESTROY (CBOR *self)
1571     PPCODE:
1572     cbor_free (self);
1573    
1574 root 1.1 PROTOTYPES: ENABLE
1575    
1576     void encode_cbor (SV *scalar)
1577 root 1.36 ALIAS:
1578     encode_cbor = 0
1579     encode_cbor_sharing = F_ALLOW_SHARING
1580 root 1.1 PPCODE:
1581     {
1582     CBOR cbor;
1583     cbor_init (&cbor);
1584 root 1.36 cbor.flags |= ix;
1585 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1586     XPUSHs (scalar);
1587     }
1588    
1589     void decode_cbor (SV *cborstr)
1590     PPCODE:
1591     {
1592     CBOR cbor;
1593     cbor_init (&cbor);
1594     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1595     XPUSHs (cborstr);
1596     }
1597