ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.55
Committed: Fri Nov 25 06:13:16 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
Changes since 1.54: +4 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12 root 1.44 #define ECB_NO_THREADS 1
13 root 1.1 #include "ecb.h"
14    
15 root 1.14 // compatibility with perl <5.18
16     #ifndef HvNAMELEN_get
17     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
18     #endif
19     #ifndef HvNAMELEN
20     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
21     #endif
22     #ifndef HvNAMEUTF8
23     # define HvNAMEUTF8(hv) 0
24     #endif
25 root 1.28 #ifndef SvREFCNT_dec_NN
26     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
27     #endif
28 root 1.14
29 root 1.35 // known major and minor types
30     enum cbor_type
31     {
32     MAJOR_SHIFT = 5,
33     MINOR_MASK = 0x1f,
34    
35     MAJOR_POS_INT = 0 << MAJOR_SHIFT,
36     MAJOR_NEG_INT = 1 << MAJOR_SHIFT,
37     MAJOR_BYTES = 2 << MAJOR_SHIFT,
38     MAJOR_TEXT = 3 << MAJOR_SHIFT,
39     MAJOR_ARRAY = 4 << MAJOR_SHIFT,
40     MAJOR_MAP = 5 << MAJOR_SHIFT,
41     MAJOR_TAG = 6 << MAJOR_SHIFT,
42     MAJOR_MISC = 7 << MAJOR_SHIFT,
43    
44     // INT/STRING/ARRAY/MAP subtypes
45     LENGTH_EXT1 = 24,
46     LENGTH_EXT2 = 25,
47     LENGTH_EXT4 = 26,
48     LENGTH_EXT8 = 27,
49    
50     // SIMPLE types (effectively MISC subtypes)
51     SIMPLE_FALSE = 20,
52     SIMPLE_TRUE = 21,
53     SIMPLE_NULL = 22,
54     SIMPLE_UNDEF = 23,
55    
56     // MISC subtype (unused)
57     MISC_EXT1 = 24,
58     MISC_FLOAT16 = 25,
59     MISC_FLOAT32 = 26,
60     MISC_FLOAT64 = 27,
61    
62     // BYTES/TEXT/ARRAY/MAP
63     MINOR_INDEF = 31,
64     };
65    
66 root 1.9 // known tags
67 root 1.8 enum cbor_tag
68     {
69 root 1.35 // extensions
70     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
71     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
72     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
73     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
74     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
75     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
76     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
77    
78     // rfc7049
79     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
80     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
81     CBOR_TAG_POS_BIGNUM = 2, // byte string
82     CBOR_TAG_NEG_BIGNUM = 3, // byte string
83     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
84     CBOR_TAG_BIGFLOAT = 5, // array
85    
86     CBOR_TAG_CONV_B64U = 21, // base64url, any
87     CBOR_TAG_CONV_B64 = 22, // base64, any
88     CBOR_TAG_CONV_HEX = 23, // base16, any
89     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
90    
91     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
92     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
93     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
94     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
95     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
96 root 1.8
97 root 1.35 CBOR_TAG_MAGIC = 55799, // self-describe cbor
98 root 1.8 };
99    
100 root 1.24 #define F_SHRINK 0x00000001UL
101     #define F_ALLOW_UNKNOWN 0x00000002UL
102 root 1.32 #define F_ALLOW_SHARING 0x00000004UL
103 root 1.37 #define F_ALLOW_CYCLES 0x00000008UL
104     #define F_PACK_STRINGS 0x00000010UL
105 root 1.54 #define F_TEXT_KEYS 0x00000020UL
106     #define F_TEXT_STRINGS 0x00000040UL
107 root 1.53 #define F_VALIDATE_UTF8 0x00000080UL
108 root 1.1
109     #define INIT_SIZE 32 // initial scalar size to be allocated
110    
111     #define SB do {
112     #define SE } while (0)
113    
114     #define IN_RANGE_INC(type,val,beg,end) \
115     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
116     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
117    
118     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
119    
120     #ifdef USE_ITHREADS
121     # define CBOR_SLOW 1
122     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
123     #else
124     # define CBOR_SLOW 0
125     # define CBOR_STASH cbor_stash
126     #endif
127    
128 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
129 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
130 root 1.1
131     typedef struct {
132     U32 flags;
133     U32 max_depth;
134     STRLEN max_size;
135 root 1.27 SV *filter;
136 root 1.40
137     // for the incremental parser
138     STRLEN incr_pos; // the current offset into the text
139     STRLEN incr_need; // minimum bytes needed to decode
140     AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef.
141 root 1.1 } CBOR;
142    
143 root 1.5 ecb_inline void
144 root 1.1 cbor_init (CBOR *cbor)
145     {
146     Zero (cbor, 1, CBOR);
147     cbor->max_depth = 512;
148     }
149    
150 root 1.27 ecb_inline void
151     cbor_free (CBOR *cbor)
152     {
153     SvREFCNT_dec (cbor->filter);
154 root 1.40 SvREFCNT_dec (cbor->incr_count);
155 root 1.27 }
156    
157 root 1.1 /////////////////////////////////////////////////////////////////////////////
158     // utility functions
159    
160 root 1.5 ecb_inline SV *
161 root 1.1 get_bool (const char *name)
162     {
163     SV *sv = get_sv (name, 1);
164    
165     SvREADONLY_on (sv);
166     SvREADONLY_on (SvRV (sv));
167    
168     return sv;
169     }
170    
171 root 1.5 ecb_inline void
172 root 1.1 shrink (SV *sv)
173     {
174     sv_utf8_downgrade (sv, 1);
175    
176     if (SvLEN (sv) > SvCUR (sv) + 1)
177     {
178     #ifdef SvPV_shrink_to_cur
179     SvPV_shrink_to_cur (sv);
180     #elif defined (SvPV_renew)
181     SvPV_renew (sv, SvCUR (sv) + 1);
182     #endif
183     }
184     }
185    
186 root 1.21 // minimum length of a string to be registered for stringref
187     ecb_inline int
188     minimum_string_length (UV idx)
189     {
190     return idx > 23
191     ? idx > 0xffU
192     ? idx > 0xffffU
193     ? idx > 0xffffffffU
194 root 1.29 ? 11
195     : 7
196 root 1.21 : 5
197     : 4
198     : 3;
199     }
200    
201 root 1.1 /////////////////////////////////////////////////////////////////////////////
202     // encoder
203    
204     // structure used for encoding CBOR
205     typedef struct
206     {
207     char *cur; // SvPVX (sv) + current output position
208     char *end; // SvEND (sv)
209     SV *sv; // result scalar
210     CBOR cbor;
211     U32 depth; // recursion level
212 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
213     UV stringref_idx;
214 root 1.19 HV *shareable; // ptr => index, or 0
215     UV shareable_idx;
216 root 1.1 } enc_t;
217    
218 root 1.5 ecb_inline void
219 root 1.1 need (enc_t *enc, STRLEN len)
220     {
221 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
222 root 1.1 {
223     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
224     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
225     enc->cur = SvPVX (enc->sv) + cur;
226     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
227     }
228     }
229    
230 root 1.5 ecb_inline void
231 root 1.1 encode_ch (enc_t *enc, char ch)
232     {
233     need (enc, 1);
234     *enc->cur++ = ch;
235     }
236    
237     static void
238     encode_uint (enc_t *enc, int major, UV len)
239     {
240     need (enc, 9);
241    
242 root 1.35 if (ecb_expect_true (len < LENGTH_EXT1))
243 root 1.1 *enc->cur++ = major | len;
244 root 1.36 else if (ecb_expect_true (len <= 0xffU))
245 root 1.1 {
246 root 1.35 *enc->cur++ = major | LENGTH_EXT1;
247 root 1.1 *enc->cur++ = len;
248     }
249 root 1.36 else if (len <= 0xffffU)
250 root 1.1 {
251 root 1.35 *enc->cur++ = major | LENGTH_EXT2;
252 root 1.1 *enc->cur++ = len >> 8;
253     *enc->cur++ = len;
254     }
255 root 1.36 else if (len <= 0xffffffffU)
256 root 1.1 {
257 root 1.35 *enc->cur++ = major | LENGTH_EXT4;
258 root 1.1 *enc->cur++ = len >> 24;
259     *enc->cur++ = len >> 16;
260     *enc->cur++ = len >> 8;
261     *enc->cur++ = len;
262     }
263 root 1.4 else
264 root 1.1 {
265 root 1.35 *enc->cur++ = major | LENGTH_EXT8;
266 root 1.1 *enc->cur++ = len >> 56;
267     *enc->cur++ = len >> 48;
268     *enc->cur++ = len >> 40;
269     *enc->cur++ = len >> 32;
270     *enc->cur++ = len >> 24;
271     *enc->cur++ = len >> 16;
272     *enc->cur++ = len >> 8;
273     *enc->cur++ = len;
274     }
275     }
276    
277 root 1.21 ecb_inline void
278     encode_tag (enc_t *enc, UV tag)
279     {
280 root 1.35 encode_uint (enc, MAJOR_TAG, tag);
281 root 1.21 }
282    
283 root 1.53 // exceptional (hopefully) slow path for byte strings that need to be utf8-encoded
284     ecb_noinline static void
285     encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len)
286     {
287     STRLEN ulen = len;
288     U8 *p, *pend = (U8 *)str + len;
289    
290     for (p = (U8 *)str; p < pend; ++p)
291     ulen += *p >> 7; // count set high bits
292    
293     encode_uint (enc, MAJOR_TEXT, ulen);
294    
295     need (enc, ulen);
296     for (p = (U8 *)str; p < pend; ++p)
297     if (*p < 0x80)
298     *enc->cur++ = *p;
299     else
300     {
301     *enc->cur++ = 0xc0 + (*p >> 6);
302     *enc->cur++ = 0x80 + (*p & 63);
303     }
304     }
305    
306 root 1.30 ecb_inline void
307 root 1.53 encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
308 root 1.30 {
309 root 1.53 if (ecb_expect_false (upgrade_utf8))
310 root 1.51 if (!utf8)
311     {
312 root 1.53 encode_str_utf8 (enc, utf8, str, len);
313 root 1.51 return;
314     }
315    
316 root 1.35 encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len);
317 root 1.30 need (enc, len);
318     memcpy (enc->cur, str, len);
319     enc->cur += len;
320     }
321    
322 root 1.54 ecb_inline void
323 root 1.53 encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len)
324 root 1.1 {
325 root 1.32 if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS))
326 root 1.21 {
327 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
328 root 1.21
329     if (SvOK (*svp))
330     {
331     // already registered, use stringref
332     encode_tag (enc, CBOR_TAG_STRINGREF);
333 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
334 root 1.21 return;
335     }
336     else if (len >= minimum_string_length (enc->stringref_idx))
337     {
338     // register only
339     sv_setuv (*svp, enc->stringref_idx);
340     ++enc->stringref_idx;
341     }
342     }
343    
344 root 1.53 encode_str (enc, upgrade_utf8, utf8, str, len);
345 root 1.1 }
346    
347     static void encode_sv (enc_t *enc, SV *sv);
348    
349     static void
350     encode_av (enc_t *enc, AV *av)
351     {
352     int i, len = av_len (av);
353    
354     if (enc->depth >= enc->cbor.max_depth)
355     croak (ERR_NESTING_EXCEEDED);
356    
357     ++enc->depth;
358    
359 root 1.35 encode_uint (enc, MAJOR_ARRAY, len + 1);
360 root 1.1
361 root 1.45 if (SvMAGICAL (av))
362     for (i = 0; i <= len; ++i)
363     {
364     SV **svp = av_fetch (av, i, 0);
365     encode_sv (enc, svp ? *svp : &PL_sv_undef);
366     }
367     else
368     for (i = 0; i <= len; ++i)
369 root 1.46 {
370     SV *sv = AvARRAY (av)[i];
371     encode_sv (enc, sv ? sv : &PL_sv_undef);
372     }
373 root 1.1
374     --enc->depth;
375     }
376    
377     static void
378     encode_hv (enc_t *enc, HV *hv)
379     {
380     HE *he;
381    
382     if (enc->depth >= enc->cbor.max_depth)
383     croak (ERR_NESTING_EXCEEDED);
384    
385     ++enc->depth;
386    
387     int pairs = hv_iterinit (hv);
388     int mg = SvMAGICAL (hv);
389    
390     if (mg)
391 root 1.35 encode_ch (enc, MAJOR_MAP | MINOR_INDEF);
392 root 1.1 else
393 root 1.35 encode_uint (enc, MAJOR_MAP, pairs);
394 root 1.1
395     while ((he = hv_iternext (hv)))
396     {
397 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
398     encode_sv (enc, HeSVKEY (he));
399 root 1.1 else
400 root 1.54 encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he));
401 root 1.1
402 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
403 root 1.1 }
404    
405     if (mg)
406 root 1.35 encode_ch (enc, MAJOR_MISC | MINOR_INDEF);
407 root 1.1
408     --enc->depth;
409     }
410    
411     // encode objects, arrays and special \0=false and \1=true values.
412     static void
413     encode_rv (enc_t *enc, SV *sv)
414     {
415 root 1.19 SvGETMAGIC (sv);
416 root 1.1
417 root 1.19 svtype svt = SvTYPE (sv);
418 root 1.18
419 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
420 root 1.1 {
421 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
422     ? types_boolean_stash
423     : gv_stashpv ("Types::Serialiser::Boolean", 1);
424     HV *error_stash = !CBOR_SLOW || types_error_stash
425     ? types_error_stash
426     : gv_stashpv ("Types::Serialiser::Error", 1);
427 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
428     ? cbor_tagged_stash
429     : gv_stashpv ("CBOR::XS::Tagged" , 1);
430 root 1.1
431 root 1.11 HV *stash = SvSTASH (sv);
432    
433     if (stash == boolean_stash)
434 root 1.34 {
435 root 1.35 encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE);
436 root 1.34 return;
437     }
438 root 1.11 else if (stash == error_stash)
439 root 1.34 {
440 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
441 root 1.34 return;
442     }
443 root 1.11 else if (stash == tagged_stash)
444 root 1.6 {
445     if (svt != SVt_PVAV)
446     croak ("encountered CBOR::XS::Tagged object that isn't an array");
447    
448 root 1.35 encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1)));
449 root 1.6 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
450 root 1.34
451     return;
452 root 1.6 }
453 root 1.34 }
454    
455     if (ecb_expect_false (SvREFCNT (sv) > 1)
456     && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING))
457     {
458     if (!enc->shareable)
459     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
460    
461     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
462    
463     if (SvOK (*svp))
464     {
465     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
466 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUV (*svp));
467 root 1.34 return;
468     }
469     else
470     {
471     sv_setuv (*svp, enc->shareable_idx);
472     ++enc->shareable_idx;
473     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
474     }
475     }
476    
477     if (ecb_expect_false (SvOBJECT (sv)))
478     {
479     HV *stash = SvSTASH (sv);
480     GV *method;
481    
482     if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
483 root 1.1 {
484 root 1.11 dSP;
485    
486 root 1.50 ENTER; SAVETMPS;
487     PUSHMARK (SP);
488 root 1.6 // we re-bless the reference to get overload and other niceties right
489 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
490    
491     PUTBACK;
492     // G_SCALAR ensures that return value is 1
493     call_sv ((SV *)GvCV (method), G_SCALAR);
494     SPAGAIN;
495    
496     // catch this surprisingly common error
497     if (SvROK (TOPs) && SvRV (TOPs) == sv)
498     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
499    
500     encode_sv (enc, POPs);
501    
502     PUTBACK;
503    
504     FREETMPS; LEAVE;
505     }
506     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
507     {
508     dSP;
509 root 1.6
510 root 1.50 ENTER; SAVETMPS;
511     SAVESTACK_POS ();
512     PUSHMARK (SP);
513 root 1.11 EXTEND (SP, 2);
514     // we re-bless the reference to get overload and other niceties right
515     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
516     PUSHs (sv_cbor);
517 root 1.1
518 root 1.11 PUTBACK;
519     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
520     SPAGAIN;
521 root 1.6
522 root 1.11 // catch this surprisingly common error
523     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
524     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
525 root 1.6
526 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
527 root 1.35 encode_uint (enc, MAJOR_ARRAY, count + 1);
528 root 1.53 encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
529 root 1.6
530 root 1.11 while (count)
531     encode_sv (enc, SP[1 - count--]);
532 root 1.6
533 root 1.11 PUTBACK;
534 root 1.6
535 root 1.11 FREETMPS; LEAVE;
536 root 1.1 }
537 root 1.11 else
538     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
539     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
540 root 1.1 }
541     else if (svt == SVt_PVHV)
542     encode_hv (enc, (HV *)sv);
543     else if (svt == SVt_PVAV)
544     encode_av (enc, (AV *)sv);
545 root 1.18 else
546 root 1.1 {
547 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
548     encode_sv (enc, sv);
549 root 1.1 }
550     }
551    
552     static void
553     encode_nv (enc_t *enc, SV *sv)
554     {
555     double nv = SvNVX (sv);
556    
557     need (enc, 9);
558    
559 root 1.35 if (ecb_expect_false (nv == (NV)(U32)nv))
560     encode_uint (enc, MAJOR_POS_INT, (U32)nv);
561 root 1.1 //TODO: maybe I32?
562 root 1.5 else if (ecb_expect_false (nv == (float)nv))
563 root 1.1 {
564     uint32_t fp = ecb_float_to_binary32 (nv);
565    
566 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT32;
567 root 1.1
568     if (!ecb_big_endian ())
569     fp = ecb_bswap32 (fp);
570    
571     memcpy (enc->cur, &fp, 4);
572     enc->cur += 4;
573     }
574     else
575     {
576     uint64_t fp = ecb_double_to_binary64 (nv);
577    
578 root 1.35 *enc->cur++ = MAJOR_MISC | MISC_FLOAT64;
579 root 1.1
580     if (!ecb_big_endian ())
581     fp = ecb_bswap64 (fp);
582    
583     memcpy (enc->cur, &fp, 8);
584     enc->cur += 8;
585     }
586     }
587    
588     static void
589     encode_sv (enc_t *enc, SV *sv)
590     {
591     SvGETMAGIC (sv);
592    
593     if (SvPOKp (sv))
594     {
595     STRLEN len;
596     char *str = SvPV (sv, len);
597 root 1.54 encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len);
598 root 1.1 }
599     else if (SvNOKp (sv))
600     encode_nv (enc, sv);
601     else if (SvIOKp (sv))
602     {
603     if (SvIsUV (sv))
604 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvUVX (sv));
605 root 1.1 else if (SvIVX (sv) >= 0)
606 root 1.35 encode_uint (enc, MAJOR_POS_INT, SvIVX (sv));
607 root 1.1 else
608 root 1.35 encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1));
609 root 1.1 }
610     else if (SvROK (sv))
611     encode_rv (enc, SvRV (sv));
612     else if (!SvOK (sv))
613 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_NULL);
614 root 1.1 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
615 root 1.35 encode_ch (enc, MAJOR_MISC | SIMPLE_UNDEF);
616 root 1.1 else
617     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
618     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
619     }
620    
621     static SV *
622     encode_cbor (SV *scalar, CBOR *cbor)
623     {
624 root 1.48 enc_t enc = { 0 };
625 root 1.1
626     enc.cbor = *cbor;
627     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
628     enc.cur = SvPVX (enc.sv);
629     enc.end = SvEND (enc.sv);
630    
631     SvPOK_only (enc.sv);
632 root 1.20
633 root 1.32 if (cbor->flags & F_PACK_STRINGS)
634 root 1.20 {
635     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
636     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
637     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
638     }
639    
640 root 1.1 encode_sv (&enc, scalar);
641    
642     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
643     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
644    
645     if (enc.cbor.flags & F_SHRINK)
646     shrink (enc.sv);
647    
648     return enc.sv;
649     }
650    
651     /////////////////////////////////////////////////////////////////////////////
652     // decoder
653    
654     // structure used for decoding CBOR
655     typedef struct
656     {
657     U8 *cur; // current parser pointer
658     U8 *end; // end of input string
659     const char *err; // parse error, if != 0
660     CBOR cbor;
661     U32 depth; // recursion depth
662     U32 maxdepth; // recursion depth limit
663 root 1.19 AV *shareable;
664 root 1.20 AV *stringref;
665 root 1.27 SV *decode_tagged;
666 root 1.1 } dec_t;
667    
668     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
669    
670 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
671 root 1.1
672     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
673     #define DEC_DEC_DEPTH --dec->depth
674    
675     static UV
676     decode_uint (dec_t *dec)
677     {
678 root 1.35 U8 m = *dec->cur & MINOR_MASK;
679     ++dec->cur;
680    
681     if (ecb_expect_true (m < LENGTH_EXT1))
682     return m;
683 root 1.36 else if (ecb_expect_true (m == LENGTH_EXT1))
684     {
685     WANT (1);
686     dec->cur += 1;
687     return dec->cur[-1];
688     }
689     else if (ecb_expect_true (m == LENGTH_EXT2))
690     {
691     WANT (2);
692     dec->cur += 2;
693     return (((UV)dec->cur[-2]) << 8)
694     | ((UV)dec->cur[-1]);
695     }
696     else if (ecb_expect_true (m == LENGTH_EXT4))
697     {
698     WANT (4);
699     dec->cur += 4;
700     return (((UV)dec->cur[-4]) << 24)
701     | (((UV)dec->cur[-3]) << 16)
702     | (((UV)dec->cur[-2]) << 8)
703     | ((UV)dec->cur[-1]);
704     }
705     else if (ecb_expect_true (m == LENGTH_EXT8))
706 root 1.1 {
707 root 1.36 WANT (8);
708     dec->cur += 8;
709 root 1.34
710 root 1.36 return
711 root 1.34 #if UVSIZE < 8
712 root 1.36 0
713 root 1.34 #else
714 root 1.36 (((UV)dec->cur[-8]) << 56)
715     | (((UV)dec->cur[-7]) << 48)
716     | (((UV)dec->cur[-6]) << 40)
717     | (((UV)dec->cur[-5]) << 32)
718 root 1.34 #endif
719 root 1.36 | (((UV)dec->cur[-4]) << 24)
720     | (((UV)dec->cur[-3]) << 16)
721     | (((UV)dec->cur[-2]) << 8)
722     | ((UV)dec->cur[-1]);
723 root 1.1 }
724 root 1.36 else
725     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
726 root 1.1
727     fail:
728     return 0;
729     }
730    
731     static SV *decode_sv (dec_t *dec);
732    
733     static SV *
734     decode_av (dec_t *dec)
735     {
736     AV *av = newAV ();
737    
738     DEC_INC_DEPTH;
739    
740 root 1.35 if (*dec->cur == (MAJOR_ARRAY | MINOR_INDEF))
741 root 1.1 {
742     ++dec->cur;
743    
744     for (;;)
745     {
746     WANT (1);
747    
748 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
749 root 1.1 {
750     ++dec->cur;
751     break;
752     }
753    
754     av_push (av, decode_sv (dec));
755     }
756     }
757     else
758     {
759     int i, len = decode_uint (dec);
760    
761 root 1.36 WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays
762 root 1.1 av_fill (av, len - 1);
763    
764     for (i = 0; i < len; ++i)
765     AvARRAY (av)[i] = decode_sv (dec);
766     }
767    
768     DEC_DEC_DEPTH;
769     return newRV_noinc ((SV *)av);
770    
771     fail:
772     SvREFCNT_dec (av);
773     DEC_DEC_DEPTH;
774     return &PL_sv_undef;
775     }
776    
777 root 1.16 static void
778     decode_he (dec_t *dec, HV *hv)
779     {
780     // for speed reasons, we specialcase single-string
781 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
782    
783 root 1.23 if (ecb_expect_true (!dec->stringref))
784 root 1.43 if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8))
785 root 1.21 {
786     I32 len = decode_uint (dec);
787     char *key = (char *)dec->cur;
788 root 1.16
789 root 1.49 WANT (len);
790 root 1.21 dec->cur += len;
791    
792 root 1.49 hv_store (hv, key, len, decode_sv (dec), 0);
793 root 1.16
794 root 1.21 return;
795     }
796 root 1.43 else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8))
797 root 1.21 {
798     I32 len = decode_uint (dec);
799     char *key = (char *)dec->cur;
800 root 1.16
801 root 1.49 WANT (len);
802 root 1.21 dec->cur += len;
803 root 1.20
804 root 1.38 if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
805     if (!is_utf8_string (key, len))
806     ERR ("corrupted CBOR data (invalid UTF-8 in map key)");
807 root 1.16
808 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
809 root 1.16
810 root 1.21 return;
811     }
812 root 1.20
813 root 1.21 SV *k = decode_sv (dec);
814     SV *v = decode_sv (dec);
815 root 1.16
816 root 1.21 hv_store_ent (hv, k, v, 0);
817     SvREFCNT_dec (k);
818 root 1.38
819     fail:
820     ;
821 root 1.16 }
822    
823 root 1.1 static SV *
824     decode_hv (dec_t *dec)
825     {
826     HV *hv = newHV ();
827    
828     DEC_INC_DEPTH;
829    
830 root 1.35 if (*dec->cur == (MAJOR_MAP | MINOR_INDEF))
831 root 1.1 {
832     ++dec->cur;
833    
834     for (;;)
835     {
836     WANT (1);
837    
838 root 1.35 if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
839 root 1.1 {
840     ++dec->cur;
841     break;
842     }
843    
844 root 1.16 decode_he (dec, hv);
845 root 1.1 }
846     }
847     else
848     {
849 root 1.16 int pairs = decode_uint (dec);
850 root 1.1
851 root 1.16 while (pairs--)
852     decode_he (dec, hv);
853 root 1.1 }
854    
855     DEC_DEC_DEPTH;
856     return newRV_noinc ((SV *)hv);
857    
858     fail:
859     SvREFCNT_dec (hv);
860     DEC_DEC_DEPTH;
861     return &PL_sv_undef;
862     }
863    
864     static SV *
865     decode_str (dec_t *dec, int utf8)
866     {
867 root 1.6 SV *sv = 0;
868 root 1.1
869 root 1.35 if ((*dec->cur & MINOR_MASK) == MINOR_INDEF)
870 root 1.1 {
871 root 1.33 // indefinite length strings
872 root 1.1 ++dec->cur;
873    
874 root 1.35 U8 major = *dec->cur & MAJOR_MISC;
875 root 1.33
876 root 1.1 sv = newSVpvn ("", 0);
877    
878     for (;;)
879     {
880     WANT (1);
881    
882 root 1.35 if ((*dec->cur - major) > LENGTH_EXT8)
883     if (*dec->cur == (MAJOR_MISC | MINOR_INDEF))
884 root 1.33 {
885     ++dec->cur;
886     break;
887     }
888     else
889     ERR ("corrupted CBOR data (invalid chunks in indefinite length string)");
890    
891     STRLEN len = decode_uint (dec);
892 root 1.1
893 root 1.33 WANT (len);
894     sv_catpvn (sv, dec->cur, len);
895     dec->cur += len;
896 root 1.1 }
897     }
898     else
899     {
900     STRLEN len = decode_uint (dec);
901    
902     WANT (len);
903     sv = newSVpvn (dec->cur, len);
904     dec->cur += len;
905 root 1.25
906     if (ecb_expect_false (dec->stringref)
907     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
908     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
909 root 1.1 }
910    
911     if (utf8)
912 root 1.38 {
913     if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8))
914     if (!is_utf8_string (SvPVX (sv), SvCUR (sv)))
915     ERR ("corrupted CBOR data (invalid UTF-8 in text string)");
916    
917     SvUTF8_on (sv);
918     }
919 root 1.1
920     return sv;
921    
922     fail:
923 root 1.6 SvREFCNT_dec (sv);
924 root 1.1 return &PL_sv_undef;
925     }
926    
927     static SV *
928 root 1.3 decode_tagged (dec_t *dec)
929     {
930 root 1.19 SV *sv = 0;
931 root 1.3 UV tag = decode_uint (dec);
932 root 1.19
933     WANT (1);
934 root 1.3
935 root 1.18 switch (tag)
936     {
937     case CBOR_TAG_MAGIC:
938 root 1.20 sv = decode_sv (dec);
939     break;
940 root 1.18
941     case CBOR_TAG_INDIRECTION:
942 root 1.20 sv = newRV_noinc (decode_sv (dec));
943     break;
944    
945     case CBOR_TAG_STRINGREF_NAMESPACE:
946     {
947 root 1.55 // do nmot use SAVETMPS/FREETMPS, as these will
948     // erase mortalised caches, e.g. "shareable"
949     ENTER;
950 root 1.20
951     SAVESPTR (dec->stringref);
952     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
953    
954     sv = decode_sv (dec);
955    
956 root 1.55 LEAVE;
957 root 1.20 }
958     break;
959    
960     case CBOR_TAG_STRINGREF:
961     {
962 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
963 root 1.20 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
964    
965     UV idx = decode_uint (dec);
966    
967     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
968     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
969    
970     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
971     }
972     break;
973 root 1.11
974 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
975     {
976     if (ecb_expect_false (!dec->shareable))
977     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
978    
979 root 1.37 if (dec->cbor.flags & F_ALLOW_CYCLES)
980     {
981     sv = newSV (0);
982     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
983 root 1.18
984 root 1.37 SV *osv = decode_sv (dec);
985     sv_setsv (sv, osv);
986     SvREFCNT_dec_NN (osv);
987     }
988     else
989     {
990     av_push (dec->shareable, &PL_sv_undef);
991     int idx = AvFILLp (dec->shareable);
992     sv = decode_sv (dec);
993     av_store (dec->shareable, idx, SvREFCNT_inc_NN (sv));
994     }
995 root 1.19 }
996 root 1.20 break;
997 root 1.18
998     case CBOR_TAG_VALUE_SHAREDREF:
999 root 1.17 {
1000 root 1.35 if ((*dec->cur >> MAJOR_SHIFT) != (MAJOR_POS_INT >> MAJOR_SHIFT))
1001 root 1.19 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
1002 root 1.18
1003 root 1.19 UV idx = decode_uint (dec);
1004    
1005 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
1006 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
1007    
1008 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
1009 root 1.37
1010     if (sv == &PL_sv_undef)
1011     ERR ("cyclic CBOR data structure found, but allow_cycles is not enabled");
1012 root 1.17 }
1013 root 1.20 break;
1014 root 1.17
1015 root 1.18 case CBOR_TAG_PERL_OBJECT:
1016     {
1017 root 1.19 sv = decode_sv (dec);
1018    
1019 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
1020     ERR ("corrupted CBOR data (non-array perl object)");
1021    
1022     AV *av = (AV *)SvRV (sv);
1023     int len = av_len (av) + 1;
1024     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
1025    
1026     if (!stash)
1027     ERR ("cannot decode perl-object (package does not exist)");
1028    
1029     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
1030    
1031     if (!method)
1032     ERR ("cannot decode perl-object (package does not have a THAW method)");
1033    
1034     dSP;
1035    
1036 root 1.50 ENTER; SAVETMPS;
1037     PUSHMARK (SP);
1038 root 1.18 EXTEND (SP, len + 1);
1039     // we re-bless the reference to get overload and other niceties right
1040     PUSHs (*av_fetch (av, 0, 1));
1041     PUSHs (sv_cbor);
1042    
1043     int i;
1044    
1045     for (i = 1; i < len; ++i)
1046     PUSHs (*av_fetch (av, i, 1));
1047    
1048     PUTBACK;
1049     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
1050     SPAGAIN;
1051    
1052     if (SvTRUE (ERRSV))
1053     {
1054     FREETMPS; LEAVE;
1055     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1056     }
1057    
1058     SvREFCNT_dec (sv);
1059     sv = SvREFCNT_inc (POPs);
1060 root 1.11
1061 root 1.18 PUTBACK;
1062 root 1.11
1063 root 1.18 FREETMPS; LEAVE;
1064     }
1065 root 1.20 break;
1066 root 1.9
1067 root 1.18 default:
1068     {
1069 root 1.19 sv = decode_sv (dec);
1070    
1071 root 1.27 dSP;
1072 root 1.50 ENTER; SAVETMPS;
1073     SAVESTACK_POS ();
1074     PUSHMARK (SP);
1075 root 1.27 EXTEND (SP, 2);
1076     PUSHs (newSVuv (tag));
1077     PUSHs (sv);
1078    
1079     PUTBACK;
1080     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
1081     SPAGAIN;
1082    
1083     if (SvTRUE (ERRSV))
1084     {
1085     FREETMPS; LEAVE;
1086     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
1087     }
1088    
1089     if (count)
1090     {
1091     SvREFCNT_dec (sv);
1092     sv = SvREFCNT_inc (POPs);
1093     }
1094     else
1095     {
1096     AV *av = newAV ();
1097     av_push (av, newSVuv (tag));
1098     av_push (av, sv);
1099    
1100     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
1101     ? cbor_tagged_stash
1102     : gv_stashpv ("CBOR::XS::Tagged" , 1);
1103     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
1104     }
1105 root 1.7
1106 root 1.27 PUTBACK;
1107    
1108     FREETMPS; LEAVE;
1109 root 1.18 }
1110 root 1.20 break;
1111 root 1.11 }
1112 root 1.9
1113 root 1.20 return sv;
1114    
1115 root 1.9 fail:
1116     SvREFCNT_dec (sv);
1117     return &PL_sv_undef;
1118 root 1.3 }
1119    
1120     static SV *
1121 root 1.1 decode_sv (dec_t *dec)
1122     {
1123     WANT (1);
1124    
1125 root 1.35 switch (*dec->cur >> MAJOR_SHIFT)
1126 root 1.1 {
1127 root 1.35 case MAJOR_POS_INT >> MAJOR_SHIFT: return newSVuv (decode_uint (dec));
1128     case MAJOR_NEG_INT >> MAJOR_SHIFT: return newSViv (-1 - (IV)decode_uint (dec));
1129     case MAJOR_BYTES >> MAJOR_SHIFT: return decode_str (dec, 0);
1130     case MAJOR_TEXT >> MAJOR_SHIFT: return decode_str (dec, 1);
1131     case MAJOR_ARRAY >> MAJOR_SHIFT: return decode_av (dec);
1132     case MAJOR_MAP >> MAJOR_SHIFT: return decode_hv (dec);
1133     case MAJOR_TAG >> MAJOR_SHIFT: return decode_tagged (dec);
1134    
1135     case MAJOR_MISC >> MAJOR_SHIFT:
1136     switch (*dec->cur++ & MINOR_MASK)
1137 root 1.1 {
1138 root 1.35 case SIMPLE_FALSE:
1139 root 1.1 #if CBOR_SLOW
1140 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1141 root 1.1 #endif
1142 root 1.10 return newSVsv (types_false);
1143 root 1.35 case SIMPLE_TRUE:
1144 root 1.1 #if CBOR_SLOW
1145 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1146 root 1.1 #endif
1147 root 1.10 return newSVsv (types_true);
1148 root 1.35 case SIMPLE_NULL:
1149 root 1.1 return newSVsv (&PL_sv_undef);
1150 root 1.35 case SIMPLE_UNDEF:
1151 root 1.10 #if CBOR_SLOW
1152     types_error = get_bool ("Types::Serialiser::error");
1153     #endif
1154     return newSVsv (types_error);
1155 root 1.1
1156 root 1.35 case MISC_FLOAT16:
1157 root 1.2 {
1158     WANT (2);
1159    
1160     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1161     dec->cur += 2;
1162    
1163     return newSVnv (ecb_binary16_to_float (fp));
1164     }
1165 root 1.1
1166 root 1.35 case MISC_FLOAT32:
1167 root 1.1 {
1168     uint32_t fp;
1169     WANT (4);
1170     memcpy (&fp, dec->cur, 4);
1171     dec->cur += 4;
1172    
1173     if (!ecb_big_endian ())
1174     fp = ecb_bswap32 (fp);
1175    
1176     return newSVnv (ecb_binary32_to_float (fp));
1177     }
1178    
1179 root 1.35 case MISC_FLOAT64:
1180 root 1.1 {
1181     uint64_t fp;
1182     WANT (8);
1183     memcpy (&fp, dec->cur, 8);
1184     dec->cur += 8;
1185    
1186     if (!ecb_big_endian ())
1187     fp = ecb_bswap64 (fp);
1188    
1189     return newSVnv (ecb_binary64_to_double (fp));
1190     }
1191    
1192 root 1.35 // 0..19 unassigned simple
1193 root 1.40 // 24 reserved + unassigned simple (reserved values are not encodable)
1194     // 28-30 unassigned misc
1195     // 31 break code
1196 root 1.1 default:
1197 root 1.40 ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)");
1198 root 1.1 }
1199    
1200     break;
1201     }
1202    
1203     fail:
1204     return &PL_sv_undef;
1205     }
1206    
1207     static SV *
1208     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1209     {
1210 root 1.48 dec_t dec = { 0 };
1211 root 1.1 SV *sv;
1212 root 1.16 STRLEN len;
1213     char *data = SvPVbyte (string, len);
1214 root 1.1
1215 root 1.16 if (len > cbor->max_size && cbor->max_size)
1216     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1217     (unsigned long)len, (unsigned long)cbor->max_size);
1218 root 1.1
1219     dec.cbor = *cbor;
1220 root 1.16 dec.cur = (U8 *)data;
1221     dec.end = (U8 *)data + len;
1222 root 1.1
1223     sv = decode_sv (&dec);
1224    
1225     if (offset_return)
1226     *offset_return = dec.cur;
1227    
1228     if (!(offset_return || !sv))
1229 root 1.2 if (dec.cur != dec.end && !dec.err)
1230     dec.err = "garbage after CBOR object";
1231    
1232     if (dec.err)
1233 root 1.1 {
1234 root 1.39 if (dec.shareable)
1235     {
1236     // need to break cyclic links, which whould all be in shareable
1237     int i;
1238     SV **svp;
1239    
1240     for (i = av_len (dec.shareable) + 1; i--; )
1241     if ((svp = av_fetch (dec.shareable, i, 0)))
1242     sv_setsv (*svp, &PL_sv_undef);
1243     }
1244    
1245 root 1.2 SvREFCNT_dec (sv);
1246 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1247 root 1.1 }
1248    
1249     sv = sv_2mortal (sv);
1250    
1251     return sv;
1252     }
1253    
1254     /////////////////////////////////////////////////////////////////////////////
1255 root 1.40 // incremental parser
1256    
1257     #define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0)
1258    
1259     // returns 0 for notyet, 1 for success or error
1260     static int
1261     incr_parse (CBOR *self, SV *cborstr)
1262     {
1263     STRLEN cur;
1264     SvPV (cborstr, cur);
1265    
1266     while (ecb_expect_true (self->incr_need <= cur))
1267     {
1268     // table of integer count bytes
1269     static I8 incr_len[MINOR_MASK + 1] = {
1270     0, 0, 0, 0, 0, 0, 0, 0,
1271     0, 0, 0, 0, 0, 0, 0, 0,
1272     0, 0, 0, 0, 0, 0, 0, 0,
1273     1, 2, 4, 8,-1,-1,-1,-2
1274     };
1275    
1276     const U8 *p = SvPVX (cborstr) + self->incr_pos;
1277     U8 m = *p & MINOR_MASK;
1278     IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1279     I8 ilen = incr_len[m];
1280    
1281     self->incr_need = self->incr_pos + 1;
1282    
1283     if (ecb_expect_false (ilen < 0))
1284     {
1285     if (m != MINOR_INDEF)
1286     return 1; // error
1287    
1288     if (*p == (MAJOR_MISC | MINOR_INDEF))
1289     {
1290     if (count >= 0)
1291     return 1; // error
1292    
1293     count = 1;
1294     }
1295     else
1296     {
1297     av_push (self->incr_count, newSViv (-1)); //TODO: nest
1298     count = -1;
1299     }
1300     }
1301     else
1302     {
1303     self->incr_need += ilen;
1304     if (ecb_expect_false (self->incr_need > cur))
1305     return 0;
1306    
1307     int major = *p >> MAJOR_SHIFT;
1308    
1309     switch (major)
1310     {
1311 root 1.47 case MAJOR_TAG >> MAJOR_SHIFT:
1312     ++count; // tags merely prefix another value
1313     break;
1314    
1315 root 1.40 case MAJOR_BYTES >> MAJOR_SHIFT:
1316     case MAJOR_TEXT >> MAJOR_SHIFT:
1317     case MAJOR_ARRAY >> MAJOR_SHIFT:
1318     case MAJOR_MAP >> MAJOR_SHIFT:
1319     {
1320     UV len;
1321    
1322     if (ecb_expect_false (ilen))
1323     {
1324     len = 0;
1325    
1326     do {
1327     len = (len << 8) | *++p;
1328     } while (--ilen);
1329     }
1330     else
1331     len = m;
1332    
1333     switch (major)
1334     {
1335     case MAJOR_BYTES >> MAJOR_SHIFT:
1336     case MAJOR_TEXT >> MAJOR_SHIFT:
1337     self->incr_need += len;
1338     if (ecb_expect_false (self->incr_need > cur))
1339     return 0;
1340    
1341     break;
1342    
1343     case MAJOR_MAP >> MAJOR_SHIFT:
1344     len <<= 1;
1345     case MAJOR_ARRAY >> MAJOR_SHIFT:
1346     if (len)
1347     {
1348     av_push (self->incr_count, newSViv (len + 1)); //TODO: nest
1349     count = len + 1;
1350     }
1351     break;
1352     }
1353     }
1354     }
1355     }
1356    
1357     self->incr_pos = self->incr_need;
1358    
1359     if (count > 0)
1360     {
1361     while (!--count)
1362     {
1363     if (!AvFILLp (self->incr_count))
1364     return 1; // done
1365    
1366     SvREFCNT_dec_NN (av_pop (self->incr_count));
1367     count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]);
1368     }
1369    
1370     SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count;
1371     }
1372     }
1373    
1374     return 0;
1375     }
1376    
1377    
1378     /////////////////////////////////////////////////////////////////////////////
1379 root 1.1 // XS interface functions
1380    
1381     MODULE = CBOR::XS PACKAGE = CBOR::XS
1382    
1383     BOOT:
1384     {
1385     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1386 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1387 root 1.1
1388 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1389     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1390    
1391     types_true = get_bool ("Types::Serialiser::true" );
1392     types_false = get_bool ("Types::Serialiser::false");
1393     types_error = get_bool ("Types::Serialiser::error");
1394 root 1.11
1395 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1396    
1397 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1398     SvREADONLY_on (sv_cbor);
1399 root 1.1 }
1400    
1401     PROTOTYPES: DISABLE
1402    
1403     void CLONE (...)
1404     CODE:
1405 root 1.10 cbor_stash = 0;
1406     cbor_tagged_stash = 0;
1407     types_error_stash = 0;
1408     types_boolean_stash = 0;
1409 root 1.1
1410     void new (char *klass)
1411     PPCODE:
1412     {
1413     SV *pv = NEWSV (0, sizeof (CBOR));
1414     SvPOK_only (pv);
1415     cbor_init ((CBOR *)SvPVX (pv));
1416     XPUSHs (sv_2mortal (sv_bless (
1417     newRV_noinc (pv),
1418     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1419     )));
1420     }
1421    
1422     void shrink (CBOR *self, int enable = 1)
1423     ALIAS:
1424     shrink = F_SHRINK
1425     allow_unknown = F_ALLOW_UNKNOWN
1426 root 1.18 allow_sharing = F_ALLOW_SHARING
1427 root 1.37 allow_cycles = F_ALLOW_CYCLES
1428 root 1.32 pack_strings = F_PACK_STRINGS
1429 root 1.54 text_keys = F_TEXT_KEYS
1430     text_strings = F_TEXT_STRINGS
1431 root 1.38 validate_utf8 = F_VALIDATE_UTF8
1432 root 1.1 PPCODE:
1433     {
1434     if (enable)
1435     self->flags |= ix;
1436     else
1437     self->flags &= ~ix;
1438    
1439     XPUSHs (ST (0));
1440     }
1441    
1442     void get_shrink (CBOR *self)
1443     ALIAS:
1444     get_shrink = F_SHRINK
1445     get_allow_unknown = F_ALLOW_UNKNOWN
1446 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1447 root 1.37 get_allow_cycles = F_ALLOW_CYCLES
1448 root 1.32 get_pack_strings = F_PACK_STRINGS
1449 root 1.54 get_text_keys = F_TEXT_KEYS
1450     get_text_strings = F_TEXT_STRINGS
1451 root 1.38 get_validate_utf8 = F_VALIDATE_UTF8
1452 root 1.1 PPCODE:
1453     XPUSHs (boolSV (self->flags & ix));
1454    
1455     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1456     PPCODE:
1457     self->max_depth = max_depth;
1458     XPUSHs (ST (0));
1459    
1460     U32 get_max_depth (CBOR *self)
1461     CODE:
1462     RETVAL = self->max_depth;
1463     OUTPUT:
1464     RETVAL
1465    
1466     void max_size (CBOR *self, U32 max_size = 0)
1467     PPCODE:
1468     self->max_size = max_size;
1469     XPUSHs (ST (0));
1470    
1471     int get_max_size (CBOR *self)
1472     CODE:
1473     RETVAL = self->max_size;
1474     OUTPUT:
1475     RETVAL
1476    
1477 root 1.27 void filter (CBOR *self, SV *filter = 0)
1478     PPCODE:
1479     SvREFCNT_dec (self->filter);
1480     self->filter = filter ? newSVsv (filter) : filter;
1481     XPUSHs (ST (0));
1482    
1483     SV *get_filter (CBOR *self)
1484     CODE:
1485     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1486     OUTPUT:
1487     RETVAL
1488    
1489 root 1.1 void encode (CBOR *self, SV *scalar)
1490     PPCODE:
1491     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1492     XPUSHs (scalar);
1493    
1494     void decode (CBOR *self, SV *cborstr)
1495     PPCODE:
1496     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1497     XPUSHs (cborstr);
1498    
1499     void decode_prefix (CBOR *self, SV *cborstr)
1500     PPCODE:
1501     {
1502     SV *sv;
1503     char *offset;
1504     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1505     EXTEND (SP, 2);
1506     PUSHs (sv);
1507     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1508     }
1509    
1510 root 1.41 void incr_parse (CBOR *self, SV *cborstr)
1511 root 1.42 ALIAS:
1512     incr_parse_multiple = 1
1513 root 1.40 PPCODE:
1514     {
1515     if (SvUTF8 (cborstr))
1516     sv_utf8_downgrade (cborstr, 0);
1517    
1518     if (!self->incr_count)
1519     {
1520     self->incr_count = newAV ();
1521     self->incr_pos = 0;
1522     self->incr_need = 1;
1523    
1524     av_push (self->incr_count, newSViv (1));
1525     }
1526    
1527 root 1.41 do
1528 root 1.40 {
1529     if (!incr_parse (self, cborstr))
1530     {
1531     if (self->incr_need > self->max_size && self->max_size)
1532     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1533     (unsigned long)self->incr_need, (unsigned long)self->max_size);
1534    
1535     break;
1536     }
1537    
1538 root 1.41 SV *sv;
1539     char *offset;
1540 root 1.40
1541 root 1.41 PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1542     XPUSHs (sv);
1543 root 1.40
1544 root 1.41 sv_chop (cborstr, offset);
1545 root 1.40
1546 root 1.41 av_clear (self->incr_count);
1547     av_push (self->incr_count, newSViv (1));
1548 root 1.40
1549 root 1.41 self->incr_pos = 0;
1550     self->incr_need = self->incr_pos + 1;
1551 root 1.40 }
1552 root 1.42 while (ix);
1553 root 1.40 }
1554    
1555     void incr_reset (CBOR *self)
1556     CODE:
1557     {
1558     SvREFCNT_dec (self->incr_count);
1559     self->incr_count = 0;
1560     }
1561    
1562 root 1.27 void DESTROY (CBOR *self)
1563     PPCODE:
1564     cbor_free (self);
1565    
1566 root 1.1 PROTOTYPES: ENABLE
1567    
1568     void encode_cbor (SV *scalar)
1569 root 1.36 ALIAS:
1570     encode_cbor = 0
1571     encode_cbor_sharing = F_ALLOW_SHARING
1572 root 1.1 PPCODE:
1573     {
1574     CBOR cbor;
1575     cbor_init (&cbor);
1576 root 1.36 cbor.flags |= ix;
1577 root 1.1 PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1578     XPUSHs (scalar);
1579     }
1580    
1581     void decode_cbor (SV *cborstr)
1582     PPCODE:
1583     {
1584     CBOR cbor;
1585     cbor_init (&cbor);
1586     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1587     XPUSHs (cborstr);
1588     }
1589