ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.24
Committed: Wed Nov 20 16:29:02 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.23: +8 -8 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24    
25 root 1.9 // known tags
26 root 1.8 enum cbor_tag
27     {
28 root 1.9 // inofficial extensions (pending iana registration)
29 root 1.18 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30     CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 root 1.19 CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing
32 root 1.18 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33     CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34     CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
36 root 1.9
37     // rfc7049
38     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
39     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
40     CBOR_TAG_POS_BIGNUM = 2, // byte string
41     CBOR_TAG_NEG_BIGNUM = 3, // byte string
42     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
43     CBOR_TAG_BIGFLOAT = 5, // array
44    
45     CBOR_TAG_CONV_B64U = 21, // base64url, any
46     CBOR_TAG_CONV_B64 = 22, // base64, any
47     CBOR_TAG_CONV_HEX = 23, // base16, any
48     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
49    
50     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
51     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
52     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
53     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
54     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 root 1.8
56 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57 root 1.8 };
58    
59 root 1.24 #define F_SHRINK 0x00000001UL
60     #define F_ALLOW_UNKNOWN 0x00000002UL
61     #define F_ALLOW_SHARING 0x00000004UL //TODO
62     #define F_ALLOW_STRINGREF 0x00000008UL //TODO
63 root 1.1
64     #define INIT_SIZE 32 // initial scalar size to be allocated
65    
66     #define SB do {
67     #define SE } while (0)
68    
69     #define IN_RANGE_INC(type,val,beg,end) \
70     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
71     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
72    
73     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
74    
75     #ifdef USE_ITHREADS
76     # define CBOR_SLOW 1
77     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
78     #else
79     # define CBOR_SLOW 0
80     # define CBOR_STASH cbor_stash
81     #endif
82    
83 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
84 root 1.11 static SV *types_true, *types_false, *types_error, *sv_cbor;
85 root 1.1
86     typedef struct {
87     U32 flags;
88     U32 max_depth;
89     STRLEN max_size;
90     } CBOR;
91    
92 root 1.5 ecb_inline void
93 root 1.1 cbor_init (CBOR *cbor)
94     {
95     Zero (cbor, 1, CBOR);
96     cbor->max_depth = 512;
97     }
98    
99     /////////////////////////////////////////////////////////////////////////////
100     // utility functions
101    
102 root 1.5 ecb_inline SV *
103 root 1.1 get_bool (const char *name)
104     {
105     SV *sv = get_sv (name, 1);
106    
107     SvREADONLY_on (sv);
108     SvREADONLY_on (SvRV (sv));
109    
110     return sv;
111     }
112    
113 root 1.5 ecb_inline void
114 root 1.1 shrink (SV *sv)
115     {
116     sv_utf8_downgrade (sv, 1);
117    
118     if (SvLEN (sv) > SvCUR (sv) + 1)
119     {
120     #ifdef SvPV_shrink_to_cur
121     SvPV_shrink_to_cur (sv);
122     #elif defined (SvPV_renew)
123     SvPV_renew (sv, SvCUR (sv) + 1);
124     #endif
125     }
126     }
127    
128 root 1.21 // minimum length of a string to be registered for stringref
129     ecb_inline int
130     minimum_string_length (UV idx)
131     {
132     return idx > 23
133     ? idx > 0xffU
134     ? idx > 0xffffU
135     ? idx > 0xffffffffU
136     ? 7
137     : 6
138     : 5
139     : 4
140     : 3;
141     }
142    
143 root 1.1 /////////////////////////////////////////////////////////////////////////////
144     // encoder
145    
146     // structure used for encoding CBOR
147     typedef struct
148     {
149     char *cur; // SvPVX (sv) + current output position
150     char *end; // SvEND (sv)
151     SV *sv; // result scalar
152     CBOR cbor;
153     U32 depth; // recursion level
154 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
155     UV stringref_idx;
156 root 1.19 HV *shareable; // ptr => index, or 0
157     UV shareable_idx;
158 root 1.1 } enc_t;
159    
160 root 1.5 ecb_inline void
161 root 1.1 need (enc_t *enc, STRLEN len)
162     {
163 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
164 root 1.1 {
165     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
166     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
167     enc->cur = SvPVX (enc->sv) + cur;
168     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
169     }
170     }
171    
172 root 1.5 ecb_inline void
173 root 1.1 encode_ch (enc_t *enc, char ch)
174     {
175     need (enc, 1);
176     *enc->cur++ = ch;
177     }
178    
179     static void
180     encode_uint (enc_t *enc, int major, UV len)
181     {
182     need (enc, 9);
183    
184     if (len < 24)
185     *enc->cur++ = major | len;
186 root 1.4 else if (len <= 0xff)
187 root 1.1 {
188     *enc->cur++ = major | 24;
189     *enc->cur++ = len;
190     }
191 root 1.4 else if (len <= 0xffff)
192 root 1.1 {
193     *enc->cur++ = major | 25;
194     *enc->cur++ = len >> 8;
195     *enc->cur++ = len;
196     }
197 root 1.4 else if (len <= 0xffffffff)
198 root 1.1 {
199     *enc->cur++ = major | 26;
200     *enc->cur++ = len >> 24;
201     *enc->cur++ = len >> 16;
202     *enc->cur++ = len >> 8;
203     *enc->cur++ = len;
204     }
205 root 1.4 else
206 root 1.1 {
207     *enc->cur++ = major | 27;
208     *enc->cur++ = len >> 56;
209     *enc->cur++ = len >> 48;
210     *enc->cur++ = len >> 40;
211     *enc->cur++ = len >> 32;
212     *enc->cur++ = len >> 24;
213     *enc->cur++ = len >> 16;
214     *enc->cur++ = len >> 8;
215     *enc->cur++ = len;
216     }
217     }
218    
219 root 1.21 ecb_inline void
220     encode_tag (enc_t *enc, UV tag)
221     {
222     encode_uint (enc, 0xc0, tag);
223     }
224    
225 root 1.1 static void
226     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
227     {
228 root 1.24 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF))
229 root 1.21 {
230 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
231 root 1.21
232     if (SvOK (*svp))
233     {
234     // already registered, use stringref
235     encode_tag (enc, CBOR_TAG_STRINGREF);
236     encode_uint (enc, 0x00, SvUV (*svp));
237     return;
238     }
239     else if (len >= minimum_string_length (enc->stringref_idx))
240     {
241     // register only
242     sv_setuv (*svp, enc->stringref_idx);
243     ++enc->stringref_idx;
244     }
245     }
246    
247 root 1.1 encode_uint (enc, utf8 ? 0x60 : 0x40, len);
248     need (enc, len);
249     memcpy (enc->cur, str, len);
250     enc->cur += len;
251     }
252    
253     static void encode_sv (enc_t *enc, SV *sv);
254    
255     static void
256     encode_av (enc_t *enc, AV *av)
257     {
258     int i, len = av_len (av);
259    
260     if (enc->depth >= enc->cbor.max_depth)
261     croak (ERR_NESTING_EXCEEDED);
262    
263     ++enc->depth;
264    
265     encode_uint (enc, 0x80, len + 1);
266    
267     for (i = 0; i <= len; ++i)
268     {
269     SV **svp = av_fetch (av, i, 0);
270     encode_sv (enc, svp ? *svp : &PL_sv_undef);
271     }
272    
273     --enc->depth;
274     }
275    
276 root 1.20 ecb_inline void
277     encode_he (enc_t *enc, HE *he)
278     {
279     }
280    
281 root 1.1 static void
282     encode_hv (enc_t *enc, HV *hv)
283     {
284     HE *he;
285    
286     if (enc->depth >= enc->cbor.max_depth)
287     croak (ERR_NESTING_EXCEEDED);
288    
289     ++enc->depth;
290    
291     int pairs = hv_iterinit (hv);
292     int mg = SvMAGICAL (hv);
293    
294     if (mg)
295     encode_ch (enc, 0xa0 | 31);
296     else
297     encode_uint (enc, 0xa0, pairs);
298    
299     while ((he = hv_iternext (hv)))
300     {
301 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
302     encode_sv (enc, HeSVKEY (he));
303 root 1.1 else
304 root 1.21 encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
305 root 1.1
306 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
307 root 1.1 }
308    
309     if (mg)
310     encode_ch (enc, 0xe0 | 31);
311    
312     --enc->depth;
313     }
314    
315     // encode objects, arrays and special \0=false and \1=true values.
316     static void
317     encode_rv (enc_t *enc, SV *sv)
318     {
319 root 1.19 SvGETMAGIC (sv);
320 root 1.1
321 root 1.19 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
322     && ecb_expect_false (SvREFCNT (sv) > 1))
323     {
324     if (!enc->shareable)
325     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
326    
327     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
328    
329     if (SvOK (*svp))
330     {
331     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
332     encode_uint (enc, 0x00, SvUV (*svp));
333     return;
334     }
335     else
336     {
337     sv_setuv (*svp, enc->shareable_idx);
338     ++enc->shareable_idx;
339     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
340     }
341     }
342 root 1.1
343 root 1.19 svtype svt = SvTYPE (sv);
344 root 1.18
345 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
346 root 1.1 {
347 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
348     ? types_boolean_stash
349     : gv_stashpv ("Types::Serialiser::Boolean", 1);
350     HV *error_stash = !CBOR_SLOW || types_error_stash
351     ? types_error_stash
352     : gv_stashpv ("Types::Serialiser::Error", 1);
353 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
354     ? cbor_tagged_stash
355     : gv_stashpv ("CBOR::XS::Tagged" , 1);
356 root 1.1
357 root 1.11 HV *stash = SvSTASH (sv);
358     GV *method;
359    
360     if (stash == boolean_stash)
361 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
362 root 1.11 else if (stash == error_stash)
363 root 1.10 encode_ch (enc, 0xe0 | 23);
364 root 1.11 else if (stash == tagged_stash)
365 root 1.6 {
366     if (svt != SVt_PVAV)
367     croak ("encountered CBOR::XS::Tagged object that isn't an array");
368    
369     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
370     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
371     }
372 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
373 root 1.1 {
374 root 1.11 dSP;
375    
376     ENTER; SAVETMPS; PUSHMARK (SP);
377 root 1.6 // we re-bless the reference to get overload and other niceties right
378 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
379    
380     PUTBACK;
381     // G_SCALAR ensures that return value is 1
382     call_sv ((SV *)GvCV (method), G_SCALAR);
383     SPAGAIN;
384    
385     // catch this surprisingly common error
386     if (SvROK (TOPs) && SvRV (TOPs) == sv)
387     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
388    
389     encode_sv (enc, POPs);
390    
391     PUTBACK;
392    
393     FREETMPS; LEAVE;
394     }
395     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
396     {
397     dSP;
398 root 1.6
399 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
400     EXTEND (SP, 2);
401     // we re-bless the reference to get overload and other niceties right
402     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
403     PUSHs (sv_cbor);
404 root 1.1
405 root 1.11 PUTBACK;
406     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
407     SPAGAIN;
408 root 1.6
409 root 1.11 // catch this surprisingly common error
410     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
411     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
412 root 1.6
413 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
414 root 1.11 encode_uint (enc, 0x80, count + 1);
415     encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
416 root 1.6
417 root 1.11 while (count)
418     encode_sv (enc, SP[1 - count--]);
419 root 1.6
420 root 1.11 PUTBACK;
421 root 1.6
422 root 1.11 FREETMPS; LEAVE;
423 root 1.1 }
424 root 1.11 else
425     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
426     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
427 root 1.1 }
428     else if (svt == SVt_PVHV)
429     encode_hv (enc, (HV *)sv);
430     else if (svt == SVt_PVAV)
431     encode_av (enc, (AV *)sv);
432 root 1.18 else
433 root 1.1 {
434 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
435     encode_sv (enc, sv);
436 root 1.1 }
437     }
438    
439     static void
440     encode_nv (enc_t *enc, SV *sv)
441     {
442     double nv = SvNVX (sv);
443    
444     need (enc, 9);
445    
446 root 1.5 if (ecb_expect_false (nv == (U32)nv))
447 root 1.1 encode_uint (enc, 0x00, (U32)nv);
448     //TODO: maybe I32?
449 root 1.5 else if (ecb_expect_false (nv == (float)nv))
450 root 1.1 {
451     uint32_t fp = ecb_float_to_binary32 (nv);
452    
453     *enc->cur++ = 0xe0 | 26;
454    
455     if (!ecb_big_endian ())
456     fp = ecb_bswap32 (fp);
457    
458     memcpy (enc->cur, &fp, 4);
459     enc->cur += 4;
460     }
461     else
462     {
463     uint64_t fp = ecb_double_to_binary64 (nv);
464    
465     *enc->cur++ = 0xe0 | 27;
466    
467     if (!ecb_big_endian ())
468     fp = ecb_bswap64 (fp);
469    
470     memcpy (enc->cur, &fp, 8);
471     enc->cur += 8;
472     }
473     }
474    
475     static void
476     encode_sv (enc_t *enc, SV *sv)
477     {
478     SvGETMAGIC (sv);
479    
480     if (SvPOKp (sv))
481     {
482     STRLEN len;
483     char *str = SvPV (sv, len);
484     encode_str (enc, SvUTF8 (sv), str, len);
485     }
486     else if (SvNOKp (sv))
487     encode_nv (enc, sv);
488     else if (SvIOKp (sv))
489     {
490     if (SvIsUV (sv))
491     encode_uint (enc, 0x00, SvUVX (sv));
492     else if (SvIVX (sv) >= 0)
493     encode_uint (enc, 0x00, SvIVX (sv));
494     else
495     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
496     }
497     else if (SvROK (sv))
498     encode_rv (enc, SvRV (sv));
499     else if (!SvOK (sv))
500     encode_ch (enc, 0xe0 | 22);
501     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
502     encode_ch (enc, 0xe0 | 23);
503     else
504     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
505     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
506     }
507    
508     static SV *
509     encode_cbor (SV *scalar, CBOR *cbor)
510     {
511 root 1.18 enc_t enc = { };
512 root 1.1
513     enc.cbor = *cbor;
514     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
515     enc.cur = SvPVX (enc.sv);
516     enc.end = SvEND (enc.sv);
517    
518     SvPOK_only (enc.sv);
519 root 1.20
520 root 1.24 if (cbor->flags & F_ALLOW_STRINGREF)
521 root 1.20 {
522     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
523     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
524     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
525     }
526    
527 root 1.1 encode_sv (&enc, scalar);
528    
529     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
530     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
531    
532     if (enc.cbor.flags & F_SHRINK)
533     shrink (enc.sv);
534    
535     return enc.sv;
536     }
537    
538     /////////////////////////////////////////////////////////////////////////////
539     // decoder
540    
541     // structure used for decoding CBOR
542     typedef struct
543     {
544     U8 *cur; // current parser pointer
545     U8 *end; // end of input string
546     const char *err; // parse error, if != 0
547     CBOR cbor;
548     U32 depth; // recursion depth
549     U32 maxdepth; // recursion depth limit
550 root 1.19 AV *shareable;
551 root 1.20 AV *stringref;
552 root 1.1 } dec_t;
553    
554     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
555    
556 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
557 root 1.1
558     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
559     #define DEC_DEC_DEPTH --dec->depth
560    
561     static UV
562     decode_uint (dec_t *dec)
563     {
564     switch (*dec->cur & 31)
565     {
566     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
567     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
568     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
569     return *dec->cur++ & 31;
570    
571     case 24:
572     WANT (2);
573     dec->cur += 2;
574     return dec->cur[-1];
575    
576     case 25:
577     WANT (3);
578     dec->cur += 3;
579     return (((UV)dec->cur[-2]) << 8)
580     | ((UV)dec->cur[-1]);
581    
582     case 26:
583     WANT (5);
584     dec->cur += 5;
585     return (((UV)dec->cur[-4]) << 24)
586     | (((UV)dec->cur[-3]) << 16)
587     | (((UV)dec->cur[-2]) << 8)
588     | ((UV)dec->cur[-1]);
589    
590     case 27:
591     WANT (9);
592     dec->cur += 9;
593     return (((UV)dec->cur[-8]) << 56)
594     | (((UV)dec->cur[-7]) << 48)
595     | (((UV)dec->cur[-6]) << 40)
596     | (((UV)dec->cur[-5]) << 32)
597     | (((UV)dec->cur[-4]) << 24)
598     | (((UV)dec->cur[-3]) << 16)
599     | (((UV)dec->cur[-2]) << 8)
600     | ((UV)dec->cur[-1]);
601    
602     default:
603     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
604     }
605    
606     fail:
607     return 0;
608     }
609    
610     static SV *decode_sv (dec_t *dec);
611    
612     static SV *
613     decode_av (dec_t *dec)
614     {
615     AV *av = newAV ();
616    
617     DEC_INC_DEPTH;
618    
619     if ((*dec->cur & 31) == 31)
620     {
621     ++dec->cur;
622    
623     for (;;)
624     {
625     WANT (1);
626    
627 root 1.2 if (*dec->cur == (0xe0 | 31))
628 root 1.1 {
629     ++dec->cur;
630     break;
631     }
632    
633     av_push (av, decode_sv (dec));
634     }
635     }
636     else
637     {
638     int i, len = decode_uint (dec);
639    
640     av_fill (av, len - 1);
641    
642     for (i = 0; i < len; ++i)
643     AvARRAY (av)[i] = decode_sv (dec);
644     }
645    
646     DEC_DEC_DEPTH;
647     return newRV_noinc ((SV *)av);
648    
649     fail:
650     SvREFCNT_dec (av);
651     DEC_DEC_DEPTH;
652     return &PL_sv_undef;
653     }
654    
655 root 1.16 static void
656     decode_he (dec_t *dec, HV *hv)
657     {
658     // for speed reasons, we specialcase single-string
659 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
660    
661 root 1.23 if (ecb_expect_true (!dec->stringref))
662 root 1.21 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
663     {
664     I32 len = decode_uint (dec);
665     char *key = (char *)dec->cur;
666 root 1.16
667 root 1.21 dec->cur += len;
668    
669     if (ecb_expect_false (dec->stringref))
670     av_push (dec->stringref, newSVpvn (key, len));
671    
672     hv_store (hv, key, len, decode_sv (dec), 0);
673 root 1.16
674 root 1.21 return;
675     }
676     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
677     {
678     I32 len = decode_uint (dec);
679     char *key = (char *)dec->cur;
680 root 1.16
681 root 1.21 dec->cur += len;
682 root 1.20
683 root 1.21 if (ecb_expect_false (dec->stringref))
684     av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
685 root 1.16
686 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
687 root 1.16
688 root 1.21 return;
689     }
690 root 1.20
691 root 1.21 SV *k = decode_sv (dec);
692     SV *v = decode_sv (dec);
693 root 1.16
694 root 1.21 hv_store_ent (hv, k, v, 0);
695     SvREFCNT_dec (k);
696 root 1.16 }
697    
698 root 1.1 static SV *
699     decode_hv (dec_t *dec)
700     {
701     HV *hv = newHV ();
702    
703     DEC_INC_DEPTH;
704    
705     if ((*dec->cur & 31) == 31)
706     {
707     ++dec->cur;
708    
709     for (;;)
710     {
711     WANT (1);
712    
713 root 1.2 if (*dec->cur == (0xe0 | 31))
714 root 1.1 {
715     ++dec->cur;
716     break;
717     }
718    
719 root 1.16 decode_he (dec, hv);
720 root 1.1 }
721     }
722     else
723     {
724 root 1.16 int pairs = decode_uint (dec);
725 root 1.1
726 root 1.16 while (pairs--)
727     decode_he (dec, hv);
728 root 1.1 }
729    
730     DEC_DEC_DEPTH;
731     return newRV_noinc ((SV *)hv);
732    
733     fail:
734     SvREFCNT_dec (hv);
735     DEC_DEC_DEPTH;
736     return &PL_sv_undef;
737     }
738    
739     static SV *
740     decode_str (dec_t *dec, int utf8)
741     {
742 root 1.6 SV *sv = 0;
743 root 1.1
744     if ((*dec->cur & 31) == 31)
745     {
746     ++dec->cur;
747    
748     sv = newSVpvn ("", 0);
749    
750     // not very fast, and certainly not robust against illegal input
751     for (;;)
752     {
753     WANT (1);
754    
755 root 1.2 if (*dec->cur == (0xe0 | 31))
756 root 1.1 {
757     ++dec->cur;
758     break;
759     }
760    
761 root 1.6 sv_catsv (sv, decode_sv (dec));
762 root 1.1 }
763     }
764     else
765     {
766     STRLEN len = decode_uint (dec);
767    
768     WANT (len);
769     sv = newSVpvn (dec->cur, len);
770     dec->cur += len;
771     }
772    
773     if (utf8)
774     SvUTF8_on (sv);
775    
776 root 1.21 if (ecb_expect_false (dec->stringref)
777     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
778 root 1.20 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
779    
780 root 1.1 return sv;
781    
782     fail:
783 root 1.6 SvREFCNT_dec (sv);
784 root 1.1 return &PL_sv_undef;
785     }
786    
787     static SV *
788 root 1.3 decode_tagged (dec_t *dec)
789     {
790 root 1.19 SV *sv = 0;
791 root 1.3 UV tag = decode_uint (dec);
792 root 1.19
793     WANT (1);
794 root 1.3
795 root 1.18 switch (tag)
796     {
797     case CBOR_TAG_MAGIC:
798 root 1.20 sv = decode_sv (dec);
799     break;
800 root 1.18
801     case CBOR_TAG_INDIRECTION:
802 root 1.20 sv = newRV_noinc (decode_sv (dec));
803     break;
804    
805     case CBOR_TAG_STRINGREF_NAMESPACE:
806     {
807     ENTER; SAVETMPS;
808    
809     SAVESPTR (dec->stringref);
810     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
811    
812     sv = decode_sv (dec);
813    
814     FREETMPS; LEAVE;
815     }
816     break;
817    
818     case CBOR_TAG_STRINGREF:
819     {
820     if ((*dec->cur >> 5) != 0)
821     ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
822    
823     UV idx = decode_uint (dec);
824    
825     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
826     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
827    
828     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
829     }
830     break;
831 root 1.11
832 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
833     {
834     if (ecb_expect_false (!dec->shareable))
835     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
836    
837     sv = newSV (0);
838     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
839 root 1.18
840 root 1.19 SV *osv = decode_sv (dec);
841     sv_setsv (sv, osv);
842     SvREFCNT_dec_NN (osv);
843     }
844 root 1.20 break;
845 root 1.18
846     case CBOR_TAG_VALUE_SHAREDREF:
847 root 1.17 {
848 root 1.19 if ((*dec->cur >> 5) != 0)
849     ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
850 root 1.18
851 root 1.19 UV idx = decode_uint (dec);
852    
853 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
854 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
855    
856 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
857 root 1.17 }
858 root 1.20 break;
859 root 1.17
860 root 1.18 case CBOR_TAG_PERL_OBJECT:
861     {
862 root 1.19 sv = decode_sv (dec);
863    
864 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
865     ERR ("corrupted CBOR data (non-array perl object)");
866    
867     AV *av = (AV *)SvRV (sv);
868     int len = av_len (av) + 1;
869     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
870    
871     if (!stash)
872     ERR ("cannot decode perl-object (package does not exist)");
873    
874     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
875    
876     if (!method)
877     ERR ("cannot decode perl-object (package does not have a THAW method)");
878    
879     dSP;
880    
881     ENTER; SAVETMPS; PUSHMARK (SP);
882     EXTEND (SP, len + 1);
883     // we re-bless the reference to get overload and other niceties right
884     PUSHs (*av_fetch (av, 0, 1));
885     PUSHs (sv_cbor);
886    
887     int i;
888    
889     for (i = 1; i < len; ++i)
890     PUSHs (*av_fetch (av, i, 1));
891    
892     PUTBACK;
893     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
894     SPAGAIN;
895    
896     if (SvTRUE (ERRSV))
897     {
898     FREETMPS; LEAVE;
899     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
900     }
901    
902     SvREFCNT_dec (sv);
903     sv = SvREFCNT_inc (POPs);
904 root 1.11
905 root 1.18 PUTBACK;
906 root 1.11
907 root 1.18 FREETMPS; LEAVE;
908     }
909 root 1.20 break;
910 root 1.9
911 root 1.18 default:
912     {
913 root 1.19 sv = decode_sv (dec);
914    
915 root 1.18 AV *av = newAV ();
916     av_push (av, newSVuv (tag));
917     av_push (av, sv);
918    
919     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
920     ? cbor_tagged_stash
921     : gv_stashpv ("CBOR::XS::Tagged" , 1);
922 root 1.7
923 root 1.20 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
924 root 1.18 }
925 root 1.20 break;
926 root 1.11 }
927 root 1.9
928 root 1.20 return sv;
929    
930 root 1.9 fail:
931     SvREFCNT_dec (sv);
932     return &PL_sv_undef;
933 root 1.3 }
934    
935     static SV *
936 root 1.1 decode_sv (dec_t *dec)
937     {
938     WANT (1);
939    
940     switch (*dec->cur >> 5)
941     {
942     case 0: // unsigned int
943     return newSVuv (decode_uint (dec));
944     case 1: // negative int
945     return newSViv (-1 - (IV)decode_uint (dec));
946     case 2: // octet string
947     return decode_str (dec, 0);
948     case 3: // utf-8 string
949     return decode_str (dec, 1);
950     case 4: // array
951     return decode_av (dec);
952     case 5: // map
953     return decode_hv (dec);
954     case 6: // tag
955 root 1.3 return decode_tagged (dec);
956 root 1.1 case 7: // misc
957     switch (*dec->cur++ & 31)
958     {
959     case 20:
960     #if CBOR_SLOW
961 root 1.10 types_false = get_bool ("Types::Serialiser::false");
962 root 1.1 #endif
963 root 1.10 return newSVsv (types_false);
964 root 1.1 case 21:
965     #if CBOR_SLOW
966 root 1.10 types_true = get_bool ("Types::Serialiser::true");
967 root 1.1 #endif
968 root 1.10 return newSVsv (types_true);
969 root 1.1 case 22:
970     return newSVsv (&PL_sv_undef);
971 root 1.10 case 23:
972     #if CBOR_SLOW
973     types_error = get_bool ("Types::Serialiser::error");
974     #endif
975     return newSVsv (types_error);
976 root 1.1
977     case 25:
978 root 1.2 {
979     WANT (2);
980    
981     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
982     dec->cur += 2;
983    
984     return newSVnv (ecb_binary16_to_float (fp));
985     }
986 root 1.1
987     case 26:
988     {
989     uint32_t fp;
990     WANT (4);
991     memcpy (&fp, dec->cur, 4);
992     dec->cur += 4;
993    
994     if (!ecb_big_endian ())
995     fp = ecb_bswap32 (fp);
996    
997     return newSVnv (ecb_binary32_to_float (fp));
998     }
999    
1000     case 27:
1001     {
1002     uint64_t fp;
1003     WANT (8);
1004     memcpy (&fp, dec->cur, 8);
1005     dec->cur += 8;
1006    
1007     if (!ecb_big_endian ())
1008     fp = ecb_bswap64 (fp);
1009    
1010     return newSVnv (ecb_binary64_to_double (fp));
1011     }
1012    
1013     // 0..19 unassigned
1014     // 24 reserved + unassigned (reserved values are not encodable)
1015     default:
1016     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
1017     }
1018    
1019     break;
1020     }
1021    
1022     fail:
1023     return &PL_sv_undef;
1024     }
1025    
1026     static SV *
1027     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1028     {
1029 root 1.18 dec_t dec = { };
1030 root 1.1 SV *sv;
1031 root 1.16 STRLEN len;
1032     char *data = SvPVbyte (string, len);
1033 root 1.1
1034 root 1.16 if (len > cbor->max_size && cbor->max_size)
1035     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1036     (unsigned long)len, (unsigned long)cbor->max_size);
1037 root 1.1
1038     dec.cbor = *cbor;
1039 root 1.16 dec.cur = (U8 *)data;
1040     dec.end = (U8 *)data + len;
1041 root 1.1
1042     sv = decode_sv (&dec);
1043    
1044     if (offset_return)
1045     *offset_return = dec.cur;
1046    
1047     if (!(offset_return || !sv))
1048 root 1.2 if (dec.cur != dec.end && !dec.err)
1049     dec.err = "garbage after CBOR object";
1050    
1051     if (dec.err)
1052 root 1.1 {
1053 root 1.2 SvREFCNT_dec (sv);
1054 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1055 root 1.1 }
1056    
1057     sv = sv_2mortal (sv);
1058    
1059     return sv;
1060     }
1061    
1062     /////////////////////////////////////////////////////////////////////////////
1063     // XS interface functions
1064    
1065     MODULE = CBOR::XS PACKAGE = CBOR::XS
1066    
1067     BOOT:
1068     {
1069     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1070 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1071 root 1.1
1072 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1073     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1074    
1075     types_true = get_bool ("Types::Serialiser::true" );
1076     types_false = get_bool ("Types::Serialiser::false");
1077     types_error = get_bool ("Types::Serialiser::error");
1078 root 1.11
1079     sv_cbor = newSVpv ("CBOR", 0);
1080     SvREADONLY_on (sv_cbor);
1081 root 1.1 }
1082    
1083     PROTOTYPES: DISABLE
1084    
1085     void CLONE (...)
1086     CODE:
1087 root 1.10 cbor_stash = 0;
1088     cbor_tagged_stash = 0;
1089     types_error_stash = 0;
1090     types_boolean_stash = 0;
1091 root 1.1
1092     void new (char *klass)
1093     PPCODE:
1094     {
1095     SV *pv = NEWSV (0, sizeof (CBOR));
1096     SvPOK_only (pv);
1097     cbor_init ((CBOR *)SvPVX (pv));
1098     XPUSHs (sv_2mortal (sv_bless (
1099     newRV_noinc (pv),
1100     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1101     )));
1102     }
1103    
1104     void shrink (CBOR *self, int enable = 1)
1105     ALIAS:
1106     shrink = F_SHRINK
1107     allow_unknown = F_ALLOW_UNKNOWN
1108 root 1.18 allow_sharing = F_ALLOW_SHARING
1109 root 1.24 allow_stringref = F_ALLOW_STRINGREF
1110 root 1.1 PPCODE:
1111     {
1112     if (enable)
1113     self->flags |= ix;
1114     else
1115     self->flags &= ~ix;
1116    
1117     XPUSHs (ST (0));
1118     }
1119    
1120     void get_shrink (CBOR *self)
1121     ALIAS:
1122     get_shrink = F_SHRINK
1123     get_allow_unknown = F_ALLOW_UNKNOWN
1124 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1125 root 1.24 get_allow_stringref = F_ALLOW_STRINGREF
1126 root 1.1 PPCODE:
1127     XPUSHs (boolSV (self->flags & ix));
1128    
1129     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1130     PPCODE:
1131     self->max_depth = max_depth;
1132     XPUSHs (ST (0));
1133    
1134     U32 get_max_depth (CBOR *self)
1135     CODE:
1136     RETVAL = self->max_depth;
1137     OUTPUT:
1138     RETVAL
1139    
1140     void max_size (CBOR *self, U32 max_size = 0)
1141     PPCODE:
1142     self->max_size = max_size;
1143     XPUSHs (ST (0));
1144    
1145     int get_max_size (CBOR *self)
1146     CODE:
1147     RETVAL = self->max_size;
1148     OUTPUT:
1149     RETVAL
1150    
1151     void encode (CBOR *self, SV *scalar)
1152     PPCODE:
1153     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1154     XPUSHs (scalar);
1155    
1156     void decode (CBOR *self, SV *cborstr)
1157     PPCODE:
1158     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1159     XPUSHs (cborstr);
1160    
1161     void decode_prefix (CBOR *self, SV *cborstr)
1162     PPCODE:
1163     {
1164     SV *sv;
1165     char *offset;
1166     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1167     EXTEND (SP, 2);
1168     PUSHs (sv);
1169     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1170     }
1171    
1172     PROTOTYPES: ENABLE
1173    
1174     void encode_cbor (SV *scalar)
1175     PPCODE:
1176     {
1177     CBOR cbor;
1178     cbor_init (&cbor);
1179     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1180     XPUSHs (scalar);
1181     }
1182    
1183     void decode_cbor (SV *cborstr)
1184     PPCODE:
1185     {
1186     CBOR cbor;
1187     cbor_init (&cbor);
1188     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1189     XPUSHs (cborstr);
1190     }
1191