ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.57
Committed: Fri Nov 25 12:16:12 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.56: +5 -3 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12 root 1.44 #define ECB_NO_THREADS 1
13 root 1.1 #include "ecb.h"
14    
15 root 1.14 // compatibility with perl <5.18
16     #ifndef HvNAMELEN_get
17     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
18     #endif
19     #ifndef HvNAMELEN
20     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
21     #endif
22     #ifndef HvNAMEUTF8
23     # define HvNAMEUTF8(hv) 0
24     #endif
25 root 1.28 #ifndef SvREFCNT_dec_NN
26     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
27     #endif
28 root 1.14
29 root 1.35 // known major and minor types
30     enum cbor_type
31     {
32     MAJOR_SHIFT = 5,
33     MINOR_MASK = 0x1f,
34    
35     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
36     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
37     MAJOR_BYTES = 2 << MAJOR_SHIFT,
38     MAJOR_TEXT = 3 << MAJOR_SHIFT,
39     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
40     MAJOR_MAP = 5 << MAJOR_SHIFT,
41     MAJOR_TAG = 6 << MAJOR_SHIFT,
42     MAJOR_MISC = 7 << MAJOR_SHIFT,
43    
44     // INT/STRING/ARRAY/MAP subtypes
45     LENGTH_EXT1 = 24,
46     LENGTH_EXT2 = 25,
47     LENGTH_EXT4 = 26,
48     LENGTH_EXT8 = 27,
49    
50     // SIMPLE types (effectively MISC subtypes)
51     SIMPLE_FALSE = 20,
52     SIMPLE_TRUE = 21,
53     SIMPLE_NULL = 22,
54     SIMPLE_UNDEF = 23,
55    
56     // MISC subtype (unused)
57     MISC_EXT1 = 24,
58     MISC_FLOAT16 = 25,
59     MISC_FLOAT32 = 26,
60     MISC_FLOAT64 = 27,
61    
62     // BYTES/TEXT/ARRAY/MAP
63     MINOR_INDEF = 31,
64     };
65    
66 root 1.9 // known tags
67 root 1.8 enum cbor_tag
68     {
69 root 1.35 // extensions
70     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
71     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
72     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
73     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
74     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
76     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
77    
78     // rfc7049
79     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
80     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
81     CBOR_TAG_POS_BIGNUM = 2, // byte string
82     CBOR_TAG_NEG_BIGNUM = 3, // byte string
83     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
84     CBOR_TAG_BIGFLOAT = 5, // array
85    
86     CBOR_TAG_CONV_B64U = 21, // base64url, any
87     CBOR_TAG_CONV_B64 = 22, // base64, any
88     CBOR_TAG_CONV_HEX = 23, // base16, any
89     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
90    
91     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
92     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
93     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
94     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
95     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
96 root 1.8
97 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
98 root 1.8 };
99    
100 root 1.24 #define F_SHRINK 0x00000001UL
101     #define F_ALLOW_UNKNOWN 0x00000002UL
102 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
103 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
104     #define F_PACK_STRINGS 0x00000010UL
105 root 1.54 #define F_TEXT_KEYS 0x00000020UL
106     #define F_TEXT_STRINGS 0x00000040UL
107 root 1.53 #define F_VALIDATE_UTF8 0x00000080UL
108 root 1.1
109     #define INIT_SIZE 32 // initial scalar size to be allocated
110    
111     #define SB do {
112     #define SE } while (0)
113    
114     #define IN_RANGE_INC(type,val,beg,end) \
115     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
116     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
117    
118     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
119    
120     #ifdef USE_ITHREADS
121     # define CBOR_SLOW 1
122     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
123     #else
124     # define CBOR_SLOW 0
125     # define CBOR_STASH cbor_stash
126     #endif
127    
128 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
129 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
130 root 1.1
131     typedef struct {
132     U32 flags;
133     U32 max_depth;
134     STRLEN max_size;
135 root 1.27 SV *filter;
136 root 1.40
137     // for the incremental parser
138     STRLEN incr_pos; // the current offset into the text
139     STRLEN incr_need; // minimum bytes needed to decode
140     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
141 root 1.1 } CBOR;
142    
143 root 1.5 ecb_inline void
144 root 1.1 cbor_init (CBOR *cbor)
145     {
146     Zero (cbor, 1, CBOR);
147     cbor->max_depth = 512;
148     }
149    
150 root 1.27 ecb_inline void
151     cbor_free (CBOR *cbor)
152     {
153     SvREFCNT_dec (cbor->filter);
154 root 1.40 SvREFCNT_dec (cbor->incr_count);
155 root 1.27 }
156    
157 root 1.1 /////////////////////////////////////////////////////////////////////////////
158     // utility functions
159    
160 root 1.5 ecb_inline SV *
161 root 1.1 get_bool (const char *name)
162     {
163     SV *sv = get_sv (name, 1);
164    
165     SvREADONLY_on (sv);
166     SvREADONLY_on (SvRV (sv));
167    
168     return sv;
169     }
170    
171 root 1.5 ecb_inline void
172 root 1.1 shrink (SV *sv)
173     {
174     sv_utf8_downgrade (sv, 1);
175    
176     if (SvLEN (sv) > SvCUR (sv) + 1)
177     {
178     #ifdef SvPV_shrink_to_cur
179     SvPV_shrink_to_cur (sv);
180     #elif defined (SvPV_renew)
181     SvPV_renew (sv, SvCUR (sv) + 1);
182     #endif
183     }
184     }
185    
186 root 1.21 // minimum length of a string to be registered for stringref
187     ecb_inline int
188     minimum_string_length (UV idx)
189     {
190     return idx > 23
191     ? idx > 0xffU
192     ? idx > 0xffffU
193     ? idx > 0xffffffffU
194 root 1.29 ? 11
195     : 7
196 root 1.21 : 5
197     : 4
198     : 3;
199     }
200    
201 root 1.1 /////////////////////////////////////////////////////////////////////////////
202     // encoder
203    
204     // structure used for encoding CBOR
205     typedef struct
206     {
207     char *cur; // SvPVX (sv) + current output position
208     char *end; // SvEND (sv)
209     SV *sv; // result scalar
210     CBOR cbor;
211     U32 depth; // recursion level
212 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
213     UV stringref_idx;
214 root 1.19 HV *shareable; // ptr => index, or 0
215     UV shareable_idx;
216 root 1.1 } enc_t;
217    
218 root 1.5 ecb_inline void
219 root 1.1 need (enc_t *enc, STRLEN len)
220     {
221 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
222 root 1.1 {
223     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
224     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
225     enc->cur = SvPVX (enc->sv) + cur;
226     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
227     }
228     }
229    
230 root 1.5 ecb_inline void
231 root 1.1 encode_ch (enc_t *enc, char ch)
232     {
233     need (enc, 1);
234     *enc->cur++ = ch;
235     }
236    
237     static void
238     encode_uint (enc_t *enc, int major, UV len)
239     {
240     need (enc, 9);
241    
242 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
243 root 1.1 *enc->cur++ = major | len;
244 root 1.36 else if (ecb_expect_true (len <= 0xffU))
245 root 1.1 {
246 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
247 root 1.1 *enc->cur++ = len;
248     }
249 root 1.36 else if (len <= 0xffffU)
250 root 1.1 {
251 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
252 root 1.1 *enc->cur++ = len >> 8;
253     *enc->cur++ = len;
254     }
255 root 1.36 else if (len <= 0xffffffffU)
256 root 1.1 {
257 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
258 root 1.1 *enc->cur++ = len >> 24;
259     *enc->cur++ = len >> 16;
260     *enc->cur++ = len >> 8;
261     *enc->cur++ = len;
262     }
263 root 1.4 else
264 root 1.1 {
265 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
266 root 1.1 *enc->cur++ = len >> 56;
267     *enc->cur++ = len >> 48;
268     *enc->cur++ = len >> 40;
269     *enc->cur++ = len >> 32;
270     *enc->cur++ = len >> 24;
271     *enc->cur++ = len >> 16;
272     *enc->cur++ = len >> 8;
273     *enc->cur++ = len;
274     }
275     }
276    
277 root 1.21 ecb_inline void
278     encode_tag (enc_t *enc, UV tag)
279     {
280 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
281 root 1.21 }
282    
283 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284     ecb_noinline static void
285     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286     {
287     STRLEN ulen = len;
288     U8 *p, *pend = (U8 *)str + len;
289    
290     for (p = (U8 *)str; p < pend; ++p)
291     ulen += *p >> 7; // count set high bits
292    
293     encode_uint (enc, MAJOR_TEXT, ulen);
294    
295     need (enc, ulen);
296     for (p = (U8 *)str; p < pend; ++p)
297     if (*p < 0x80)
298     *enc->cur++ = *p;
299     else
300     {
301     *enc->cur++ = 0xc0 + (*p >> 6);
302     *enc->cur++ = 0x80 + (*p & 63);
303     }
304     }
305    
306 root 1.30 ecb_inline void
307 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
308 root 1.30 {
309 root 1.53 if (ecb_expect_false (upgrade_utf8))
310 root 1.51 if (!utf8)
311     {
312 root 1.53 encode_str_utf8 (enc, utf8, str, len);
313 root 1.51 return;
314     }
315    
316 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
317 root 1.30 need (enc, len);
318     memcpy (enc->cur, str, len);
319     enc->cur += len;
320     }
321    
322 root 1.54 ecb_inline void
323 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
324 root 1.1 {
325 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
326 root 1.21 {
327 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
328 root 1.21
329     if (SvOK (*svp))
330     {
331     // already registered, use stringref
332     encode_tag (enc, CBOR_TAG_STRINGREF);
333 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
334 root 1.21 return;
335     }
336     else if (len >= minimum_string_length (enc->stringref_idx))
337     {
338     // register only
339     sv_setuv (*svp, enc->stringref_idx);
340     ++enc->stringref_idx;
341     }
342     }
343    
344 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
345 root 1.1 }
346    
347     static void encode_sv (enc_t *enc, SV *sv);
348    
349     static void
350     encode_av (enc_t *enc, AV *av)
351     {
352     int i, len = av_len (av);
353    
354     if (enc->depth >= enc->cbor.max_depth)
355     croak (ERR_NESTING_EXCEEDED);
356    
357     ++enc->depth;
358    
359 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
360 root 1.1
361 root 1.45 if (SvMAGICAL (av))
362     for (i = 0; i <= len; ++i)
363     {
364     SV **svp = av_fetch (av, i, 0);
365     encode_sv (enc, svp ? *svp : &PL_sv_undef);
366     }
367     else
368     for (i = 0; i <= len; ++i)
369 root 1.46 {
370     SV *sv = AvARRAY (av)[i];
371     encode_sv (enc, sv ? sv : &PL_sv_undef);
372     }
373 root 1.1
374     --enc->depth;
375     }
376    
377     static void
378     encode_hv (enc_t *enc, HV *hv)
379     {
380     HE *he;
381    
382     if (enc->depth >= enc->cbor.max_depth)
383     croak (ERR_NESTING_EXCEEDED);
384    
385     ++enc->depth;
386    
387     int pairs = hv_iterinit (hv);
388     int mg = SvMAGICAL (hv);
389    
390     if (mg)
391 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
392 root 1.1 else
393 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
394 root 1.1
395     while ((he = hv_iternext (hv)))
396     {
397 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
398     encode_sv (enc, HeSVKEY (he));
399 root 1.1 else
400 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
401 root 1.1
402 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
403 root 1.1 }
404    
405     if (mg)
406 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
407 root 1.1
408     --enc->depth;
409     }
410    
411     // encode objects, arrays and special \0=false and \1=true values.
412     static void
413     encode_rv (enc_t *enc, SV *sv)
414     {
415 root 1.19 SvGETMAGIC (sv);
416 root 1.1
417 root 1.19 svtype svt = SvTYPE (sv);
418 root 1.18
419 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
420 root 1.1 {
421 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
422     ? types_boolean_stash
423     : gv_stashpv ("Types::Serialiser::Boolean", 1);
424     HV *error_stash = !CBOR_SLOW || types_error_stash
425     ? types_error_stash
426     : gv_stashpv ("Types::Serialiser::Error", 1);
427 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
428     ? cbor_tagged_stash
429     : gv_stashpv ("CBOR::XS::Tagged" , 1);
430 root 1.1
431 root 1.11 HV *stash = SvSTASH (sv);
432    
433     if (stash == boolean_stash)
434 root 1.34 {
435 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
436 root 1.34 return;
437     }
438 root 1.11 else if (stash == error_stash)
439 root 1.34 {
440 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
441 root 1.34 return;
442     }
443 root 1.11 else if (stash == tagged_stash)
444 root 1.6 {
445     if (svt != SVt_PVAV)
446     croak ("encountered CBOR::XS::Tagged object that isn't an array");
447    
448 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
449 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
450 root 1.34
451     return;
452 root 1.6 }
453 root 1.34 }
454    
455     if (ecb_expect_false (SvREFCNT (sv) > 1)
456     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
457     {
458     if (!enc->shareable)
459     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
460    
461     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
462    
463     if (SvOK (*svp))
464     {
465     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
466 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
467 root 1.34 return;
468     }
469     else
470     {
471     sv_setuv (*svp, enc->shareable_idx);
472     ++enc->shareable_idx;
473     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
474     }
475     }
476    
477     if (ecb_expect_false (SvOBJECT (sv)))
478     {
479     HV *stash = SvSTASH (sv);
480     GV *method;
481    
482     if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
483 root 1.1 {
484 root 1.11 dSP;
485    
486 root 1.50 ENTER; SAVETMPS;
487     PUSHMARK (SP);
488 root 1.6 // we re-bless the reference to get overload and other niceties right
489 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
490    
491     PUTBACK;
492     // G_SCALAR ensures that return value is 1
493     call_sv ((SV *)GvCV (method), G_SCALAR);
494     SPAGAIN;
495    
496     // catch this surprisingly common error
497     if (SvROK (TOPs) && SvRV (TOPs) == sv)
498     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
499    
500     encode_sv (enc, POPs);
501    
502     PUTBACK;
503    
504     FREETMPS; LEAVE;
505     }
506     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
507     {
508     dSP;
509 root 1.6
510 root 1.50 ENTER; SAVETMPS;
511     SAVESTACK_POS ();
512     PUSHMARK (SP);
513 root 1.11 EXTEND (SP, 2);
514     // we re-bless the reference to get overload and other niceties right
515     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
516     PUSHs (sv_cbor);
517 root 1.1
518 root 1.11 PUTBACK;
519     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
520     SPAGAIN;
521 root 1.6
522 root 1.11 // catch this surprisingly common error
523     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
524     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
525 root 1.6
526 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
527 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
528 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
529 root 1.6
530 root 1.11 while (count)
531     encode_sv (enc, SP[1 - count--]);
532 root 1.6
533 root 1.11 PUTBACK;
534 root 1.6
535 root 1.11 FREETMPS; LEAVE;
536 root 1.1 }
537 root 1.11 else
538     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
539     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
540 root 1.1 }
541     else if (svt == SVt_PVHV)
542     encode_hv (enc, (HV *)sv);
543     else if (svt == SVt_PVAV)
544     encode_av (enc, (AV *)sv);
545 root 1.18 else
546 root 1.1 {
547 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
548     encode_sv (enc, sv);
549 root 1.1 }
550     }
551    
552     static void
553     encode_nv (enc_t *enc, SV *sv)
554     {
555     double nv = SvNVX (sv);
556    
557     need (enc, 9);
558    
559 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
560     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
561 root 1.1 //TODO: maybe I32?
562 root 1.5 else if (ecb_expect_false (nv == (float)nv))
563 root 1.1 {
564     uint32_t fp = ecb_float_to_binary32 (nv);
565    
566 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
567 root 1.1
568     if (!ecb_big_endian ())
569     fp = ecb_bswap32 (fp);
570    
571     memcpy (enc->cur, &fp, 4);
572     enc->cur += 4;
573     }
574     else
575     {
576     uint64_t fp = ecb_double_to_binary64 (nv);
577    
578 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
579 root 1.1
580     if (!ecb_big_endian ())
581     fp = ecb_bswap64 (fp);
582    
583     memcpy (enc->cur, &fp, 8);
584     enc->cur += 8;
585     }
586     }
587    
588     static void
589     encode_sv (enc_t *enc, SV *sv)
590     {
591     SvGETMAGIC (sv);
592    
593     if (SvPOKp (sv))
594     {
595     STRLEN len;
596     char *str = SvPV (sv, len);
597 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
598 root 1.1 }
599     else if (SvNOKp (sv))
600     encode_nv (enc, sv);
601     else if (SvIOKp (sv))
602     {
603     if (SvIsUV (sv))
604 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
605 root 1.1 else if (SvIVX (sv) >= 0)
606 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
607 root 1.1 else
608 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
609 root 1.1 }
610     else if (SvROK (sv))
611     encode_rv (enc, SvRV (sv));
612     else if (!SvOK (sv))
613 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
614 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
615 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
616 root 1.1 else
617     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
618     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
619     }
620    
621     static SV *
622     encode_cbor (SV *scalar, CBOR *cbor)
623     {
624 root 1.48 enc_t enc = { 0 };
625 root 1.1
626     enc.cbor = *cbor;
627     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
628     enc.cur = SvPVX (enc.sv);
629     enc.end = SvEND (enc.sv);
630    
631     SvPOK_only (enc.sv);
632 root 1.20
633 root 1.32 if (cbor->flags & F_PACK_STRINGS)
634 root 1.20 {
635     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
636     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
637     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
638     }
639    
640 root 1.1 encode_sv (&enc, scalar);
641    
642     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
643     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
644    
645     if (enc.cbor.flags & F_SHRINK)
646     shrink (enc.sv);
647    
648     return enc.sv;
649     }
650    
651     /////////////////////////////////////////////////////////////////////////////
652     // decoder
653    
654     // structure used for decoding CBOR
655     typedef struct
656     {
657     U8 *cur; // current parser pointer
658     U8 *end; // end of input string
659     const char *err; // parse error, if != 0
660     CBOR cbor;
661     U32 depth; // recursion depth
662     U32 maxdepth; // recursion depth limit
663 root 1.19 AV *shareable;
664 root 1.20 AV *stringref;
665 root 1.27 SV *decode_tagged;
666 root 1.1 } dec_t;
667    
668     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
669    
670 root 1.57 #define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR ("unexpected end of CBOR data")
671 root 1.1
672 root 1.56 #define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED)
673 root 1.1 #define DEC_DEC_DEPTH --dec->depth
674    
675     static UV
676     decode_uint (dec_t *dec)
677     {
678 root 1.35 U8 m = *dec->cur & MINOR_MASK;
679     ++dec->cur;
680    
681     if (ecb_expect_true (m < LENGTH_EXT1))
682     return m;
683 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
684     {
685     WANT (1);
686     dec->cur += 1;
687     return dec->cur[-1];
688     }
689     else if (ecb_expect_true (m == LENGTH_EXT2))
690     {
691     WANT (2);
692     dec->cur += 2;
693     return (((UV)dec->cur[-2]) << 8)
694     | ((UV)dec->cur[-1]);
695     }
696     else if (ecb_expect_true (m == LENGTH_EXT4))
697     {
698     WANT (4);
699     dec->cur += 4;
700     return (((UV)dec->cur[-4]) << 24)
701     | (((UV)dec->cur[-3]) << 16)
702     | (((UV)dec->cur[-2]) << 8)
703     | ((UV)dec->cur[-1]);
704     }
705     else if (ecb_expect_true (m == LENGTH_EXT8))
706 root 1.1 {
707 root 1.36 WANT (8);
708     dec->cur += 8;
709 root 1.34
710 root 1.36 return
711 root 1.34 #if UVSIZE < 8
712 root 1.36 0
713 root 1.34 #else
714 root 1.36 (((UV)dec->cur[-8]) << 56)
715     | (((UV)dec->cur[-7]) << 48)
716     | (((UV)dec->cur[-6]) << 40)
717     | (((UV)dec->cur[-5]) << 32)
718 root 1.34 #endif
719 root 1.36 | (((UV)dec->cur[-4]) << 24)
720     | (((UV)dec->cur[-3]) << 16)
721     | (((UV)dec->cur[-2]) << 8)
722     | ((UV)dec->cur[-1]);
723 root 1.1 }
724 root 1.36 else
725     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
726 root 1.1
727     fail:
728     return 0;
729     }
730    
731     static SV *decode_sv (dec_t *dec);
732    
733     static SV *
734     decode_av (dec_t *dec)
735     {
736     AV *av = newAV ();
737    
738     DEC_INC_DEPTH;
739    
740 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
741 root 1.1 {
742     ++dec->cur;
743    
744     for (;;)
745     {
746     WANT (1);
747    
748 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
749 root 1.1 {
750     ++dec->cur;
751     break;
752     }
753    
754     av_push (av, decode_sv (dec));
755     }
756     }
757     else
758     {
759 root 1.57 UV i, len = decode_uint (dec);
760 root 1.1
761 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
762 root 1.1 av_fill (av, len - 1);
763    
764     for (i = 0; i < len; ++i)
765     AvARRAY (av)[i] = decode_sv (dec);
766     }
767    
768     DEC_DEC_DEPTH;
769     return newRV_noinc ((SV *)av);
770    
771     fail:
772     SvREFCNT_dec (av);
773     DEC_DEC_DEPTH;
774     return &PL_sv_undef;
775     }
776    
777 root 1.16 static void
778     decode_he (dec_t *dec, HV *hv)
779     {
780     // for speed reasons, we specialcase single-string
781 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
782    
783 root 1.23 if (ecb_expect_true (!dec->stringref))
784 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
785 root 1.21 {
786 root 1.56 STRLEN len = decode_uint (dec);
787 root 1.21 char *key = (char *)dec->cur;
788 root 1.16
789 root 1.49 WANT (len);
790 root 1.21 dec->cur += len;
791    
792 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
793 root 1.16
794 root 1.21 return;
795     }
796 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
797 root 1.21 {
798 root 1.56 STRLEN len = decode_uint (dec);
799 root 1.21 char *key = (char *)dec->cur;
800 root 1.16
801 root 1.49 WANT (len);
802 root 1.21 dec->cur += len;
803 root 1.20
804 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
805     if (!is_utf8_string (key, len))
806     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
807 root 1.16
808 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
809 root 1.16
810 root 1.21 return;
811     }
812 root 1.20
813 root 1.21 SV *k = decode_sv (dec);
814     SV *v = decode_sv (dec);
815 root 1.16
816 root 1.21 hv_store_ent (hv, k, v, 0);
817     SvREFCNT_dec (k);
818 root 1.38
819     fail:
820     ;
821 root 1.16 }
822    
823 root 1.1 static SV *
824     decode_hv (dec_t *dec)
825     {
826     HV *hv = newHV ();
827    
828     DEC_INC_DEPTH;
829    
830 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
831 root 1.1 {
832     ++dec->cur;
833    
834     for (;;)
835     {
836     WANT (1);
837    
838 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
839 root 1.1 {
840     ++dec->cur;
841     break;
842     }
843    
844 root 1.16 decode_he (dec, hv);
845 root 1.1 }
846     }
847     else
848     {
849 root 1.57 UV pairs = decode_uint (dec);
850    
851     WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes
852 root 1.1
853 root 1.16 while (pairs--)
854     decode_he (dec, hv);
855 root 1.1 }
856    
857     DEC_DEC_DEPTH;
858     return newRV_noinc ((SV *)hv);
859    
860     fail:
861     SvREFCNT_dec (hv);
862     DEC_DEC_DEPTH;
863     return &PL_sv_undef;
864     }
865    
866     static SV *
867     decode_str (dec_t *dec, int utf8)
868     {
869 root 1.6 SV *sv = 0;
870 root 1.1
871 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
872 root 1.1 {
873 root 1.33 // indefinite length strings
874 root 1.1 ++dec->cur;
875    
876 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
877 root 1.33
878 root 1.1 sv = newSVpvn ("", 0);
879    
880     for (;;)
881     {
882     WANT (1);
883    
884 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
885     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
886 root 1.33 {
887     ++dec->cur;
888     break;
889     }
890     else
891     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
892    
893     STRLEN len = decode_uint (dec);
894 root 1.1
895 root 1.33 WANT (len);
896     sv_catpvn (sv, dec->cur, len);
897     dec->cur += len;
898 root 1.1 }
899     }
900     else
901     {
902     STRLEN len = decode_uint (dec);
903    
904     WANT (len);
905     sv = newSVpvn (dec->cur, len);
906     dec->cur += len;
907 root 1.25
908     if (ecb_expect_false (dec->stringref)
909     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
910     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
911 root 1.1 }
912    
913     if (utf8)
914 root 1.38 {
915     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
916     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
917     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
918    
919     SvUTF8_on (sv);
920     }
921 root 1.1
922     return sv;
923    
924     fail:
925 root 1.6 SvREFCNT_dec (sv);
926 root 1.1 return &PL_sv_undef;
927     }
928    
929     static SV *
930 root 1.3 decode_tagged (dec_t *dec)
931     {
932 root 1.19 SV *sv = 0;
933 root 1.3 UV tag = decode_uint (dec);
934 root 1.19
935     WANT (1);
936 root 1.3
937 root 1.18 switch (tag)
938     {
939     case CBOR_TAG_MAGIC:
940 root 1.20 sv = decode_sv (dec);
941     break;
942 root 1.18
943     case CBOR_TAG_INDIRECTION:
944 root 1.20 sv = newRV_noinc (decode_sv (dec));
945     break;
946    
947     case CBOR_TAG_STRINGREF_NAMESPACE:
948     {
949 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
950     // erase mortalised caches, e.g. "shareable"
951     ENTER;
952 root 1.20
953     SAVESPTR (dec->stringref);
954     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
955    
956     sv = decode_sv (dec);
957    
958 root 1.55 LEAVE;
959 root 1.20 }
960     break;
961    
962     case CBOR_TAG_STRINGREF:
963     {
964 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
965 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
966    
967     UV idx = decode_uint (dec);
968    
969     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
970     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
971    
972     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
973     }
974     break;
975 root 1.11
976 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
977     {
978     if (ecb_expect_false (!dec->shareable))
979     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
980    
981 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
982     {
983     sv = newSV (0);
984     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
985 root 1.18
986 root 1.37 SV *osv = decode_sv (dec);
987     sv_setsv (sv, osv);
988     SvREFCNT_dec_NN (osv);
989     }
990     else
991     {
992     av_push (dec->shareable, &PL_sv_undef);
993     int idx = AvFILLp (dec->shareable);
994     sv = decode_sv (dec);
995     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
996     }
997 root 1.19 }
998 root 1.20 break;
999 root 1.18
1000     case CBOR_TAG_VALUE_SHAREDREF:
1001 root 1.17 {
1002 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1003 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1004 root 1.18
1005 root 1.19 UV idx = decode_uint (dec);
1006    
1007 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1008 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1009    
1010 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1011 root 1.37
1012     if (sv == &PL_sv_undef)
1013     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1014 root 1.17 }
1015 root 1.20 break;
1016 root 1.17
1017 root 1.18 case CBOR_TAG_PERL_OBJECT:
1018     {
1019 root 1.19 sv = decode_sv (dec);
1020    
1021 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1022     ERR ("corrupted CBOR data (non-array perl object)");
1023    
1024     AV *av = (AV *)SvRV (sv);
1025     int len = av_len (av) + 1;
1026     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1027    
1028     if (!stash)
1029     ERR ("cannot decode perl-object (package does not exist)");
1030    
1031     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1032    
1033     if (!method)
1034     ERR ("cannot decode perl-object (package does not have a THAW method)");
1035    
1036     dSP;
1037    
1038 root 1.50 ENTER; SAVETMPS;
1039     PUSHMARK (SP);
1040 root 1.18 EXTEND (SP, len + 1);
1041     // we re-bless the reference to get overload and other niceties right
1042     PUSHs (*av_fetch (av, 0, 1));
1043     PUSHs (sv_cbor);
1044    
1045     int i;
1046    
1047     for (i = 1; i < len; ++i)
1048     PUSHs (*av_fetch (av, i, 1));
1049    
1050     PUTBACK;
1051     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1052     SPAGAIN;
1053    
1054     if (SvTRUE (ERRSV))
1055     {
1056     FREETMPS; LEAVE;
1057     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1058     }
1059    
1060     SvREFCNT_dec (sv);
1061     sv = SvREFCNT_inc (POPs);
1062 root 1.11
1063 root 1.18 PUTBACK;
1064 root 1.11
1065 root 1.18 FREETMPS; LEAVE;
1066     }
1067 root 1.20 break;
1068 root 1.9
1069 root 1.18 default:
1070     {
1071 root 1.19 sv = decode_sv (dec);
1072    
1073 root 1.27 dSP;
1074 root 1.50 ENTER; SAVETMPS;
1075     SAVESTACK_POS ();
1076     PUSHMARK (SP);
1077 root 1.27 EXTEND (SP, 2);
1078     PUSHs (newSVuv (tag));
1079     PUSHs (sv);
1080    
1081     PUTBACK;
1082     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1083     SPAGAIN;
1084    
1085     if (SvTRUE (ERRSV))
1086     {
1087     FREETMPS; LEAVE;
1088     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1089     }
1090    
1091     if (count)
1092     {
1093     SvREFCNT_dec (sv);
1094     sv = SvREFCNT_inc (POPs);
1095     }
1096     else
1097     {
1098     AV *av = newAV ();
1099     av_push (av, newSVuv (tag));
1100     av_push (av, sv);
1101    
1102     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1103     ? cbor_tagged_stash
1104     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1105     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1106     }
1107 root 1.7
1108 root 1.27 PUTBACK;
1109    
1110     FREETMPS; LEAVE;
1111 root 1.18 }
1112 root 1.20 break;
1113 root 1.11 }
1114 root 1.9
1115 root 1.20 return sv;
1116    
1117 root 1.9 fail:
1118     SvREFCNT_dec (sv);
1119     return &PL_sv_undef;
1120 root 1.3 }
1121    
1122     static SV *
1123 root 1.1 decode_sv (dec_t *dec)
1124     {
1125     WANT (1);
1126    
1127 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1128 root 1.1 {
1129 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1130     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1131     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1132     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1133     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1134     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1135     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1136    
1137     case MAJOR_MISC >> MAJOR_SHIFT:
1138     switch (*dec->cur++ & MINOR_MASK)
1139 root 1.1 {
1140 root 1.35 case SIMPLE_FALSE:
1141 root 1.1 #if CBOR_SLOW
1142 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1143 root 1.1 #endif
1144 root 1.10 return newSVsv (types_false);
1145 root 1.35 case SIMPLE_TRUE:
1146 root 1.1 #if CBOR_SLOW
1147 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1148 root 1.1 #endif
1149 root 1.10 return newSVsv (types_true);
1150 root 1.35 case SIMPLE_NULL:
1151 root 1.1 return newSVsv (&PL_sv_undef);
1152 root 1.35 case SIMPLE_UNDEF:
1153 root 1.10 #if CBOR_SLOW
1154     types_error = get_bool ("Types::Serialiser::error");
1155     #endif
1156     return newSVsv (types_error);
1157 root 1.1
1158 root 1.35 case MISC_FLOAT16:
1159 root 1.2 {
1160     WANT (2);
1161    
1162     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1163     dec->cur += 2;
1164    
1165     return newSVnv (ecb_binary16_to_float (fp));
1166     }
1167 root 1.1
1168 root 1.35 case MISC_FLOAT32:
1169 root 1.1 {
1170     uint32_t fp;
1171     WANT (4);
1172     memcpy (&fp, dec->cur, 4);
1173     dec->cur += 4;
1174    
1175     if (!ecb_big_endian ())
1176     fp = ecb_bswap32 (fp);
1177    
1178     return newSVnv (ecb_binary32_to_float (fp));
1179     }
1180    
1181 root 1.35 case MISC_FLOAT64:
1182 root 1.1 {
1183     uint64_t fp;
1184     WANT (8);
1185     memcpy (&fp, dec->cur, 8);
1186     dec->cur += 8;
1187    
1188     if (!ecb_big_endian ())
1189     fp = ecb_bswap64 (fp);
1190    
1191     return newSVnv (ecb_binary64_to_double (fp));
1192     }
1193    
1194 root 1.35 // 0..19 unassigned simple
1195 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1196     // 28-30 unassigned misc
1197     // 31 break code
1198 root 1.1 default:
1199 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1200 root 1.1 }
1201    
1202     break;
1203     }
1204    
1205     fail:
1206     return &PL_sv_undef;
1207     }
1208    
1209     static SV *
1210     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1211     {
1212 root 1.48 dec_t dec = { 0 };
1213 root 1.1 SV *sv;
1214 root 1.16 STRLEN len;
1215     char *data = SvPVbyte (string, len);
1216 root 1.1
1217 root 1.16 if (len > cbor->max_size && cbor->max_size)
1218     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1219     (unsigned long)len, (unsigned long)cbor->max_size);
1220 root 1.1
1221     dec.cbor = *cbor;
1222 root 1.16 dec.cur = (U8 *)data;
1223     dec.end = (U8 *)data + len;
1224 root 1.1
1225     sv = decode_sv (&dec);
1226    
1227     if (offset_return)
1228     *offset_return = dec.cur;
1229    
1230     if (!(offset_return || !sv))
1231 root 1.2 if (dec.cur != dec.end && !dec.err)
1232     dec.err = "garbage after CBOR object";
1233    
1234     if (dec.err)
1235 root 1.1 {
1236 root 1.39 if (dec.shareable)
1237     {
1238     // need to break cyclic links, which whould all be in shareable
1239     int i;
1240     SV **svp;
1241    
1242     for (i = av_len (dec.shareable) + 1; i--; )
1243     if ((svp = av_fetch (dec.shareable, i, 0)))
1244     sv_setsv (*svp, &PL_sv_undef);
1245     }
1246    
1247 root 1.2 SvREFCNT_dec (sv);
1248 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1249 root 1.1 }
1250    
1251     sv = sv_2mortal (sv);
1252    
1253     return sv;
1254     }
1255    
1256     /////////////////////////////////////////////////////////////////////////////
1257 root 1.40 // incremental parser
1258    
1259     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1260    
1261     // returns 0 for notyet, 1 for success or error
1262     static int
1263     incr_parse (CBOR *self, SV *cborstr)
1264     {
1265     STRLEN cur;
1266     SvPV (cborstr, cur);
1267    
1268     while (ecb_expect_true (self->incr_need <= cur))
1269     {
1270     // table of integer count bytes
1271     static I8 incr_len[MINOR_MASK + 1] = {
1272     0, 0, 0, 0, 0, 0, 0, 0,
1273     0, 0, 0, 0, 0, 0, 0, 0,
1274     0, 0, 0, 0, 0, 0, 0, 0,
1275     1, 2, 4, 8,-1,-1,-1,-2
1276     };
1277    
1278     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1279     U8 m = *p & MINOR_MASK;
1280     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1281     I8 ilen = incr_len[m];
1282    
1283     self->incr_need = self->incr_pos + 1;
1284    
1285     if (ecb_expect_false (ilen < 0))
1286     {
1287     if (m != MINOR_INDEF)
1288     return 1; // error
1289    
1290     if (*p == (MAJOR_MISC | MINOR_INDEF))
1291     {
1292     if (count >= 0)
1293     return 1; // error
1294    
1295     count = 1;
1296     }
1297     else
1298     {
1299     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1300     count = -1;
1301     }
1302     }
1303     else
1304     {
1305     self->incr_need += ilen;
1306     if (ecb_expect_false (self->incr_need > cur))
1307     return 0;
1308    
1309     int major = *p >> MAJOR_SHIFT;
1310    
1311     switch (major)
1312     {
1313 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1314     ++count; // tags merely prefix another value
1315     break;
1316    
1317 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1318     case MAJOR_TEXT >> MAJOR_SHIFT:
1319     case MAJOR_ARRAY >> MAJOR_SHIFT:
1320     case MAJOR_MAP >> MAJOR_SHIFT:
1321     {
1322     UV len;
1323    
1324     if (ecb_expect_false (ilen))
1325     {
1326     len = 0;
1327    
1328     do {
1329     len = (len << 8) | *++p;
1330     } while (--ilen);
1331     }
1332     else
1333     len = m;
1334    
1335     switch (major)
1336     {
1337     case MAJOR_BYTES >> MAJOR_SHIFT:
1338     case MAJOR_TEXT >> MAJOR_SHIFT:
1339     self->incr_need += len;
1340     if (ecb_expect_false (self->incr_need > cur))
1341     return 0;
1342    
1343     break;
1344    
1345     case MAJOR_MAP >> MAJOR_SHIFT:
1346     len <<= 1;
1347     case MAJOR_ARRAY >> MAJOR_SHIFT:
1348     if (len)
1349     {
1350     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1351     count = len + 1;
1352     }
1353     break;
1354     }
1355     }
1356     }
1357     }
1358    
1359     self->incr_pos = self->incr_need;
1360    
1361     if (count > 0)
1362     {
1363     while (!--count)
1364     {
1365     if (!AvFILLp (self->incr_count))
1366     return 1; // done
1367    
1368     SvREFCNT_dec_NN (av_pop (self->incr_count));
1369     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1370     }
1371    
1372     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1373     }
1374     }
1375    
1376     return 0;
1377     }
1378    
1379    
1380     /////////////////////////////////////////////////////////////////////////////
1381 root 1.1 // XS interface functions
1382    
1383     MODULE = CBOR::XS PACKAGE = CBOR::XS
1384    
1385     BOOT:
1386     {
1387     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1388 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1389 root 1.1
1390 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1391     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1392    
1393     types_true = get_bool ("Types::Serialiser::true" );
1394     types_false = get_bool ("Types::Serialiser::false");
1395     types_error = get_bool ("Types::Serialiser::error");
1396 root 1.11
1397 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1398    
1399 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1400     SvREADONLY_on (sv_cbor);
1401 root 1.56
1402     assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1));
1403 root 1.1 }
1404    
1405     PROTOTYPES: DISABLE
1406    
1407     void CLONE (...)
1408     CODE:
1409 root 1.10 cbor_stash = 0;
1410     cbor_tagged_stash = 0;
1411     types_error_stash = 0;
1412     types_boolean_stash = 0;
1413 root 1.1
1414     void new (char *klass)
1415     PPCODE:
1416     {
1417     SV *pv = NEWSV (0, sizeof (CBOR));
1418     SvPOK_only (pv);
1419     cbor_init ((CBOR *)SvPVX (pv));
1420     XPUSHs (sv_2mortal (sv_bless (
1421     newRV_noinc (pv),
1422     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1423     )));
1424     }
1425    
1426     void shrink (CBOR *self, int enable = 1)
1427     ALIAS:
1428     shrink = F_SHRINK
1429     allow_unknown = F_ALLOW_UNKNOWN
1430 root 1.18 allow_sharing = F_ALLOW_SHARING
1431 root 1.37 allow_cycles = F_ALLOW_CYCLES
1432 root 1.32 pack_strings = F_PACK_STRINGS
1433 root 1.54 text_keys = F_TEXT_KEYS
1434     text_strings = F_TEXT_STRINGS
1435 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1436 root 1.1 PPCODE:
1437     {
1438     if (enable)
1439     self->flags |= ix;
1440     else
1441     self->flags &= ~ix;
1442    
1443     XPUSHs (ST (0));
1444     }
1445    
1446     void get_shrink (CBOR *self)
1447     ALIAS:
1448     get_shrink = F_SHRINK
1449     get_allow_unknown = F_ALLOW_UNKNOWN
1450 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1451 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1452 root 1.32 get_pack_strings = F_PACK_STRINGS
1453 root 1.54 get_text_keys = F_TEXT_KEYS
1454     get_text_strings = F_TEXT_STRINGS
1455 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1456 root 1.1 PPCODE:
1457     XPUSHs (boolSV (self->flags & ix));
1458    
1459     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1460     PPCODE:
1461     self->max_depth = max_depth;
1462     XPUSHs (ST (0));
1463    
1464     U32 get_max_depth (CBOR *self)
1465     CODE:
1466     RETVAL = self->max_depth;
1467     OUTPUT:
1468     RETVAL
1469    
1470     void max_size (CBOR *self, U32 max_size = 0)
1471     PPCODE:
1472     self->max_size = max_size;
1473     XPUSHs (ST (0));
1474    
1475     int get_max_size (CBOR *self)
1476     CODE:
1477     RETVAL = self->max_size;
1478     OUTPUT:
1479     RETVAL
1480    
1481 root 1.27 void filter (CBOR *self, SV *filter = 0)
1482     PPCODE:
1483     SvREFCNT_dec (self->filter);
1484     self->filter = filter ? newSVsv (filter) : filter;
1485     XPUSHs (ST (0));
1486    
1487     SV *get_filter (CBOR *self)
1488     CODE:
1489     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1490     OUTPUT:
1491     RETVAL
1492    
1493 root 1.1 void encode (CBOR *self, SV *scalar)
1494     PPCODE:
1495     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1496     XPUSHs (scalar);
1497    
1498     void decode (CBOR *self, SV *cborstr)
1499     PPCODE:
1500     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1501     XPUSHs (cborstr);
1502    
1503     void decode_prefix (CBOR *self, SV *cborstr)
1504     PPCODE:
1505     {
1506     SV *sv;
1507     char *offset;
1508     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1509     EXTEND (SP, 2);
1510     PUSHs (sv);
1511     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1512     }
1513    
1514 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1515 root 1.42 ALIAS:
1516     incr_parse_multiple = 1
1517 root 1.40 PPCODE:
1518     {
1519     if (SvUTF8 (cborstr))
1520     sv_utf8_downgrade (cborstr, 0);
1521    
1522     if (!self->incr_count)
1523     {
1524     self->incr_count = newAV ();
1525     self->incr_pos = 0;
1526     self->incr_need = 1;
1527    
1528     av_push (self->incr_count, newSViv (1));
1529     }
1530    
1531 root 1.41 do
1532 root 1.40 {
1533     if (!incr_parse (self, cborstr))
1534     {
1535     if (self->incr_need > self->max_size && self->max_size)
1536     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1537     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1538    
1539     break;
1540     }
1541    
1542 root 1.41 SV *sv;
1543     char *offset;
1544 root 1.40
1545 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1546     XPUSHs (sv);
1547 root 1.40
1548 root 1.41 sv_chop (cborstr, offset);
1549 root 1.40
1550 root 1.41 av_clear (self->incr_count);
1551     av_push (self->incr_count, newSViv (1));
1552 root 1.40
1553 root 1.41 self->incr_pos = 0;
1554     self->incr_need = self->incr_pos + 1;
1555 root 1.40 }
1556 root 1.42 while (ix);
1557 root 1.40 }
1558    
1559     void incr_reset (CBOR *self)
1560     CODE:
1561     {
1562     SvREFCNT_dec (self->incr_count);
1563     self->incr_count = 0;
1564     }
1565    
1566 root 1.27 void DESTROY (CBOR *self)
1567     PPCODE:
1568     cbor_free (self);
1569    
1570 root 1.1 PROTOTYPES: ENABLE
1571    
1572     void encode_cbor (SV *scalar)
1573 root 1.36 ALIAS:
1574     encode_cbor = 0
1575     encode_cbor_sharing = F_ALLOW_SHARING
1576 root 1.1 PPCODE:
1577     {
1578     CBOR cbor;
1579     cbor_init (&cbor);
1580 root 1.36 cbor.flags |= ix;
1581 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1582     XPUSHs (scalar);
1583     }
1584    
1585     void decode_cbor (SV *cborstr)
1586     PPCODE:
1587     {
1588     CBOR cbor;
1589     cbor_init (&cbor);
1590     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1591     XPUSHs (cborstr);
1592     }
1593