ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.18
Committed: Wed Nov 20 01:09:46 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.17: +156 -86 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24    
25 root 1.9 // known tags
26 root 1.8 enum cbor_tag
27     {
28 root 1.9 // inofficial extensions (pending iana registration)
29 root 1.18 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30     CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31     CBOR_TAG_VALUE_SHARABLE = 26, // http://cbor.schmorp.de/value-sharing
32     CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33     CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34     CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
36 root 1.9
37     // rfc7049
38     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
39     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
40     CBOR_TAG_POS_BIGNUM = 2, // byte string
41     CBOR_TAG_NEG_BIGNUM = 3, // byte string
42     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
43     CBOR_TAG_BIGFLOAT = 5, // array
44    
45     CBOR_TAG_CONV_B64U = 21, // base64url, any
46     CBOR_TAG_CONV_B64 = 22, // base64, any
47     CBOR_TAG_CONV_HEX = 23, // base16, any
48     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
49    
50     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
51     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
52     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
53     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
54     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 root 1.8
56 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57 root 1.8 };
58    
59 root 1.18 #define F_SHRINK 0x00000001UL
60     #define F_ALLOW_UNKNOWN 0x00000002UL
61     #define F_ALLOW_SHARING 0x00000004UL //TODO
62     #define F_DEDUP_STRINGS 0x00000008UL //TODO
63     #define F_DEDUP_KEYS 0x00000010UL //TODO
64 root 1.1
65     #define INIT_SIZE 32 // initial scalar size to be allocated
66    
67     #define SB do {
68     #define SE } while (0)
69    
70     #define IN_RANGE_INC(type,val,beg,end) \
71     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
72     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
73    
74     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
75    
76     #ifdef USE_ITHREADS
77     # define CBOR_SLOW 1
78     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
79     #else
80     # define CBOR_SLOW 0
81     # define CBOR_STASH cbor_stash
82     #endif
83    
84 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
85 root 1.11 static SV *types_true, *types_false, *types_error, *sv_cbor;
86 root 1.1
87     typedef struct {
88     U32 flags;
89     U32 max_depth;
90     STRLEN max_size;
91     } CBOR;
92    
93 root 1.5 ecb_inline void
94 root 1.1 cbor_init (CBOR *cbor)
95     {
96     Zero (cbor, 1, CBOR);
97     cbor->max_depth = 512;
98     }
99    
100     /////////////////////////////////////////////////////////////////////////////
101     // utility functions
102    
103 root 1.5 ecb_inline SV *
104 root 1.1 get_bool (const char *name)
105     {
106     SV *sv = get_sv (name, 1);
107    
108     SvREADONLY_on (sv);
109     SvREADONLY_on (SvRV (sv));
110    
111     return sv;
112     }
113    
114 root 1.5 ecb_inline void
115 root 1.1 shrink (SV *sv)
116     {
117     sv_utf8_downgrade (sv, 1);
118    
119     if (SvLEN (sv) > SvCUR (sv) + 1)
120     {
121     #ifdef SvPV_shrink_to_cur
122     SvPV_shrink_to_cur (sv);
123     #elif defined (SvPV_renew)
124     SvPV_renew (sv, SvCUR (sv) + 1);
125     #endif
126     }
127     }
128    
129     /////////////////////////////////////////////////////////////////////////////
130     // encoder
131    
132     // structure used for encoding CBOR
133     typedef struct
134     {
135     char *cur; // SvPVX (sv) + current output position
136     char *end; // SvEND (sv)
137     SV *sv; // result scalar
138     CBOR cbor;
139     U32 depth; // recursion level
140 root 1.18 HV *stringref; // string => index, or 0
141     HV *sharable; // ptr => index, or 0
142     HV *sharable_idx;
143 root 1.1 } enc_t;
144    
145 root 1.5 ecb_inline void
146 root 1.1 need (enc_t *enc, STRLEN len)
147     {
148 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
149 root 1.1 {
150     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
151     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
152     enc->cur = SvPVX (enc->sv) + cur;
153     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
154     }
155     }
156    
157 root 1.5 ecb_inline void
158 root 1.1 encode_ch (enc_t *enc, char ch)
159     {
160     need (enc, 1);
161     *enc->cur++ = ch;
162     }
163    
164     static void
165     encode_uint (enc_t *enc, int major, UV len)
166     {
167     need (enc, 9);
168    
169     if (len < 24)
170     *enc->cur++ = major | len;
171 root 1.4 else if (len <= 0xff)
172 root 1.1 {
173     *enc->cur++ = major | 24;
174     *enc->cur++ = len;
175     }
176 root 1.4 else if (len <= 0xffff)
177 root 1.1 {
178     *enc->cur++ = major | 25;
179     *enc->cur++ = len >> 8;
180     *enc->cur++ = len;
181     }
182 root 1.4 else if (len <= 0xffffffff)
183 root 1.1 {
184     *enc->cur++ = major | 26;
185     *enc->cur++ = len >> 24;
186     *enc->cur++ = len >> 16;
187     *enc->cur++ = len >> 8;
188     *enc->cur++ = len;
189     }
190 root 1.4 else
191 root 1.1 {
192     *enc->cur++ = major | 27;
193     *enc->cur++ = len >> 56;
194     *enc->cur++ = len >> 48;
195     *enc->cur++ = len >> 40;
196     *enc->cur++ = len >> 32;
197     *enc->cur++ = len >> 24;
198     *enc->cur++ = len >> 16;
199     *enc->cur++ = len >> 8;
200     *enc->cur++ = len;
201     }
202     }
203    
204     static void
205     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
206     {
207     encode_uint (enc, utf8 ? 0x60 : 0x40, len);
208     need (enc, len);
209     memcpy (enc->cur, str, len);
210     enc->cur += len;
211     }
212    
213 root 1.18 ecb_inline void
214     encode_tag (enc_t *enc, UV tag)
215     {
216     encode_uint (enc, 0xc0, tag);
217     }
218    
219     static int
220     encode_sharable2 (enc_t *enc, SV *sv)
221     {
222     if (!enc->sharable)
223     enc->sharable = (HV *)sv_2mortal ((SV *)newHV ());
224    
225     SV **svp = hv_fetch (enc->sharable, &sv, sizeof (sv), 1);
226    
227     if (SvOK (*svp))
228     {
229     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
230     encode_uint (enc, 0x00, SvUV (*svp));
231    
232     return 1;
233     }
234     else
235     {
236     sv_setuv (*svp, enc->sharable_idx++);
237     encode_tag (enc, CBOR_TAG_VALUE_SHARABLE);
238    
239     return 0;
240     }
241     }
242    
243     ecb_inline int
244     encode_sharable (enc_t *enc, SV *sv)
245     {
246     if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
247     && ecb_expect_false (SvREFCNT (sv) > 1))
248     return encode_sharable2 (enc, sv);
249    
250     return 0;
251     }
252    
253 root 1.1 static void encode_sv (enc_t *enc, SV *sv);
254    
255     static void
256     encode_av (enc_t *enc, AV *av)
257     {
258     int i, len = av_len (av);
259    
260     if (enc->depth >= enc->cbor.max_depth)
261     croak (ERR_NESTING_EXCEEDED);
262    
263     ++enc->depth;
264    
265     encode_uint (enc, 0x80, len + 1);
266    
267     for (i = 0; i <= len; ++i)
268     {
269     SV **svp = av_fetch (av, i, 0);
270     encode_sv (enc, svp ? *svp : &PL_sv_undef);
271     }
272    
273     --enc->depth;
274     }
275    
276     static void
277     encode_hv (enc_t *enc, HV *hv)
278     {
279     HE *he;
280    
281     if (enc->depth >= enc->cbor.max_depth)
282     croak (ERR_NESTING_EXCEEDED);
283    
284     ++enc->depth;
285    
286     int pairs = hv_iterinit (hv);
287     int mg = SvMAGICAL (hv);
288    
289     if (mg)
290     encode_ch (enc, 0xa0 | 31);
291     else
292     encode_uint (enc, 0xa0, pairs);
293    
294     while ((he = hv_iternext (hv)))
295     {
296     if (HeKLEN (he) == HEf_SVKEY)
297     encode_sv (enc, HeSVKEY (he));
298     else
299     encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
300    
301 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
302 root 1.1 }
303    
304     if (mg)
305     encode_ch (enc, 0xe0 | 31);
306    
307     --enc->depth;
308     }
309    
310     // encode objects, arrays and special \0=false and \1=true values.
311     static void
312     encode_rv (enc_t *enc, SV *sv)
313     {
314     svtype svt;
315    
316     SvGETMAGIC (sv);
317     svt = SvTYPE (sv);
318    
319 root 1.18 if (encode_sharable (enc, sv))
320     return;
321    
322 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
323 root 1.1 {
324 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
325     ? types_boolean_stash
326     : gv_stashpv ("Types::Serialiser::Boolean", 1);
327     HV *error_stash = !CBOR_SLOW || types_error_stash
328     ? types_error_stash
329     : gv_stashpv ("Types::Serialiser::Error", 1);
330 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
331     ? cbor_tagged_stash
332     : gv_stashpv ("CBOR::XS::Tagged" , 1);
333 root 1.1
334 root 1.11 HV *stash = SvSTASH (sv);
335     GV *method;
336    
337     if (stash == boolean_stash)
338 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
339 root 1.11 else if (stash == error_stash)
340 root 1.10 encode_ch (enc, 0xe0 | 23);
341 root 1.11 else if (stash == tagged_stash)
342 root 1.6 {
343     if (svt != SVt_PVAV)
344     croak ("encountered CBOR::XS::Tagged object that isn't an array");
345    
346     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
347     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
348     }
349 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
350 root 1.1 {
351 root 1.11 dSP;
352    
353     ENTER; SAVETMPS; PUSHMARK (SP);
354 root 1.6 // we re-bless the reference to get overload and other niceties right
355 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
356    
357     PUTBACK;
358     // G_SCALAR ensures that return value is 1
359     call_sv ((SV *)GvCV (method), G_SCALAR);
360     SPAGAIN;
361    
362     // catch this surprisingly common error
363     if (SvROK (TOPs) && SvRV (TOPs) == sv)
364     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
365    
366     encode_sv (enc, POPs);
367    
368     PUTBACK;
369    
370     FREETMPS; LEAVE;
371     }
372     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
373     {
374     dSP;
375 root 1.6
376 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
377     EXTEND (SP, 2);
378     // we re-bless the reference to get overload and other niceties right
379     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
380     PUSHs (sv_cbor);
381 root 1.1
382 root 1.11 PUTBACK;
383     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
384     SPAGAIN;
385 root 1.6
386 root 1.11 // catch this surprisingly common error
387     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
388     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
389 root 1.6
390 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
391 root 1.11 encode_uint (enc, 0x80, count + 1);
392     encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
393 root 1.6
394 root 1.11 while (count)
395     encode_sv (enc, SP[1 - count--]);
396 root 1.6
397 root 1.11 PUTBACK;
398 root 1.6
399 root 1.11 FREETMPS; LEAVE;
400 root 1.1 }
401 root 1.11 else
402     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
403     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
404 root 1.1 }
405     else if (svt == SVt_PVHV)
406     encode_hv (enc, (HV *)sv);
407     else if (svt == SVt_PVAV)
408     encode_av (enc, (AV *)sv);
409 root 1.18 else
410 root 1.1 {
411 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
412     encode_sv (enc, sv);
413 root 1.1 }
414     }
415    
416     static void
417     encode_nv (enc_t *enc, SV *sv)
418     {
419     double nv = SvNVX (sv);
420    
421     need (enc, 9);
422    
423 root 1.5 if (ecb_expect_false (nv == (U32)nv))
424 root 1.1 encode_uint (enc, 0x00, (U32)nv);
425     //TODO: maybe I32?
426 root 1.5 else if (ecb_expect_false (nv == (float)nv))
427 root 1.1 {
428     uint32_t fp = ecb_float_to_binary32 (nv);
429    
430     *enc->cur++ = 0xe0 | 26;
431    
432     if (!ecb_big_endian ())
433     fp = ecb_bswap32 (fp);
434    
435     memcpy (enc->cur, &fp, 4);
436     enc->cur += 4;
437     }
438     else
439     {
440     uint64_t fp = ecb_double_to_binary64 (nv);
441    
442     *enc->cur++ = 0xe0 | 27;
443    
444     if (!ecb_big_endian ())
445     fp = ecb_bswap64 (fp);
446    
447     memcpy (enc->cur, &fp, 8);
448     enc->cur += 8;
449     }
450     }
451    
452     static void
453     encode_sv (enc_t *enc, SV *sv)
454     {
455     SvGETMAGIC (sv);
456    
457 root 1.18 if (encode_sharable (enc, sv))
458     return;
459    
460 root 1.1 if (SvPOKp (sv))
461     {
462     STRLEN len;
463     char *str = SvPV (sv, len);
464     encode_str (enc, SvUTF8 (sv), str, len);
465     }
466     else if (SvNOKp (sv))
467     encode_nv (enc, sv);
468     else if (SvIOKp (sv))
469     {
470     if (SvIsUV (sv))
471     encode_uint (enc, 0x00, SvUVX (sv));
472     else if (SvIVX (sv) >= 0)
473     encode_uint (enc, 0x00, SvIVX (sv));
474     else
475     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
476     }
477     else if (SvROK (sv))
478     encode_rv (enc, SvRV (sv));
479     else if (!SvOK (sv))
480     encode_ch (enc, 0xe0 | 22);
481     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
482     encode_ch (enc, 0xe0 | 23);
483     else
484     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
485     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
486     }
487    
488     static SV *
489     encode_cbor (SV *scalar, CBOR *cbor)
490     {
491 root 1.18 enc_t enc = { };
492 root 1.1
493     enc.cbor = *cbor;
494     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
495     enc.cur = SvPVX (enc.sv);
496     enc.end = SvEND (enc.sv);
497    
498     SvPOK_only (enc.sv);
499     encode_sv (&enc, scalar);
500    
501     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
502     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
503    
504     if (enc.cbor.flags & F_SHRINK)
505     shrink (enc.sv);
506    
507     return enc.sv;
508     }
509    
510     /////////////////////////////////////////////////////////////////////////////
511     // decoder
512    
513     // structure used for decoding CBOR
514     typedef struct
515     {
516     U8 *cur; // current parser pointer
517     U8 *end; // end of input string
518     const char *err; // parse error, if != 0
519     CBOR cbor;
520     U32 depth; // recursion depth
521     U32 maxdepth; // recursion depth limit
522 root 1.18 AV *sharable;
523 root 1.1 } dec_t;
524    
525     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
526    
527 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
528 root 1.1
529     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
530     #define DEC_DEC_DEPTH --dec->depth
531    
532     static UV
533     decode_uint (dec_t *dec)
534     {
535     switch (*dec->cur & 31)
536     {
537     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
538     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
539     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
540     return *dec->cur++ & 31;
541    
542     case 24:
543     WANT (2);
544     dec->cur += 2;
545     return dec->cur[-1];
546    
547     case 25:
548     WANT (3);
549     dec->cur += 3;
550     return (((UV)dec->cur[-2]) << 8)
551     | ((UV)dec->cur[-1]);
552    
553     case 26:
554     WANT (5);
555     dec->cur += 5;
556     return (((UV)dec->cur[-4]) << 24)
557     | (((UV)dec->cur[-3]) << 16)
558     | (((UV)dec->cur[-2]) << 8)
559     | ((UV)dec->cur[-1]);
560    
561     case 27:
562     WANT (9);
563     dec->cur += 9;
564     return (((UV)dec->cur[-8]) << 56)
565     | (((UV)dec->cur[-7]) << 48)
566     | (((UV)dec->cur[-6]) << 40)
567     | (((UV)dec->cur[-5]) << 32)
568     | (((UV)dec->cur[-4]) << 24)
569     | (((UV)dec->cur[-3]) << 16)
570     | (((UV)dec->cur[-2]) << 8)
571     | ((UV)dec->cur[-1]);
572    
573     default:
574     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
575     }
576    
577     fail:
578     return 0;
579     }
580    
581     static SV *decode_sv (dec_t *dec);
582    
583     static SV *
584     decode_av (dec_t *dec)
585     {
586     AV *av = newAV ();
587    
588     DEC_INC_DEPTH;
589    
590     if ((*dec->cur & 31) == 31)
591     {
592     ++dec->cur;
593    
594     for (;;)
595     {
596     WANT (1);
597    
598 root 1.2 if (*dec->cur == (0xe0 | 31))
599 root 1.1 {
600     ++dec->cur;
601     break;
602     }
603    
604     av_push (av, decode_sv (dec));
605     }
606     }
607     else
608     {
609     int i, len = decode_uint (dec);
610    
611     av_fill (av, len - 1);
612    
613     for (i = 0; i < len; ++i)
614     AvARRAY (av)[i] = decode_sv (dec);
615     }
616    
617     DEC_DEC_DEPTH;
618     return newRV_noinc ((SV *)av);
619    
620     fail:
621     SvREFCNT_dec (av);
622     DEC_DEC_DEPTH;
623     return &PL_sv_undef;
624     }
625    
626 root 1.16 static void
627     decode_he (dec_t *dec, HV *hv)
628     {
629     // for speed reasons, we specialcase single-string
630     // byte or utf-8 strings as keys.
631    
632     if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
633     {
634     I32 len = decode_uint (dec);
635     char *key = (char *)dec->cur;
636    
637     dec->cur += len;
638    
639     hv_store (hv, key, len, decode_sv (dec), 0);
640     }
641     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
642     {
643     I32 len = decode_uint (dec);
644     char *key = (char *)dec->cur;
645    
646     dec->cur += len;
647    
648     hv_store (hv, key, -len, decode_sv (dec), 0);
649     }
650     else
651     {
652     SV *k = decode_sv (dec);
653     SV *v = decode_sv (dec);
654    
655     hv_store_ent (hv, k, v, 0);
656     SvREFCNT_dec (k);
657     }
658     }
659    
660 root 1.1 static SV *
661     decode_hv (dec_t *dec)
662     {
663     HV *hv = newHV ();
664    
665     DEC_INC_DEPTH;
666    
667     if ((*dec->cur & 31) == 31)
668     {
669     ++dec->cur;
670    
671     for (;;)
672     {
673     WANT (1);
674    
675 root 1.2 if (*dec->cur == (0xe0 | 31))
676 root 1.1 {
677     ++dec->cur;
678     break;
679     }
680    
681 root 1.16 decode_he (dec, hv);
682 root 1.1 }
683     }
684     else
685     {
686 root 1.16 int pairs = decode_uint (dec);
687 root 1.1
688 root 1.16 while (pairs--)
689     decode_he (dec, hv);
690 root 1.1 }
691    
692     DEC_DEC_DEPTH;
693     return newRV_noinc ((SV *)hv);
694    
695     fail:
696     SvREFCNT_dec (hv);
697     DEC_DEC_DEPTH;
698     return &PL_sv_undef;
699     }
700    
701     static SV *
702     decode_str (dec_t *dec, int utf8)
703     {
704 root 1.6 SV *sv = 0;
705 root 1.1
706     if ((*dec->cur & 31) == 31)
707     {
708     ++dec->cur;
709    
710     sv = newSVpvn ("", 0);
711    
712     // not very fast, and certainly not robust against illegal input
713     for (;;)
714     {
715     WANT (1);
716    
717 root 1.2 if (*dec->cur == (0xe0 | 31))
718 root 1.1 {
719     ++dec->cur;
720     break;
721     }
722    
723 root 1.6 sv_catsv (sv, decode_sv (dec));
724 root 1.1 }
725     }
726     else
727     {
728     STRLEN len = decode_uint (dec);
729    
730     WANT (len);
731     sv = newSVpvn (dec->cur, len);
732     dec->cur += len;
733     }
734    
735     if (utf8)
736     SvUTF8_on (sv);
737    
738     return sv;
739    
740     fail:
741 root 1.6 SvREFCNT_dec (sv);
742 root 1.1 return &PL_sv_undef;
743     }
744    
745     static SV *
746 root 1.3 decode_tagged (dec_t *dec)
747     {
748     UV tag = decode_uint (dec);
749     SV *sv = decode_sv (dec);
750    
751 root 1.18 switch (tag)
752     {
753     case CBOR_TAG_MAGIC:
754     return sv;
755    
756     case CBOR_TAG_INDIRECTION:
757     return newRV_noinc (sv);
758 root 1.11
759 root 1.18 case CBOR_TAG_VALUE_SHARABLE:
760     if (ecb_expect_false (!dec->sharable))
761     dec->sharable = (AV *)sv_2mortal ((SV *)newAV ());
762    
763     av_push (dec->sharable, SvREFCNT_inc_NN (sv));
764    
765     return sv;
766    
767     case CBOR_TAG_VALUE_SHAREDREF:
768 root 1.17 {
769 root 1.18 // TODO: should verify that the sv atcually was a CBOR unsigned integer
770     UV idx = SvUV (sv);
771    
772     if (!dec->sharable || idx > AvFILLp (dec->sharable))
773     ERR ("corrupted CBOR data (sharedref index out of bounds)");
774    
775     SvREFCNT_dec (sv);
776    
777     return SvREFCNT_inc_NN (AvARRAY (dec->sharable)[idx]);
778 root 1.17 }
779    
780 root 1.18 case CBOR_TAG_PERL_OBJECT:
781     {
782     if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
783     ERR ("corrupted CBOR data (non-array perl object)");
784    
785     AV *av = (AV *)SvRV (sv);
786     int len = av_len (av) + 1;
787     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
788    
789     if (!stash)
790     ERR ("cannot decode perl-object (package does not exist)");
791    
792     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
793    
794     if (!method)
795     ERR ("cannot decode perl-object (package does not have a THAW method)");
796    
797     dSP;
798    
799     ENTER; SAVETMPS; PUSHMARK (SP);
800     EXTEND (SP, len + 1);
801     // we re-bless the reference to get overload and other niceties right
802     PUSHs (*av_fetch (av, 0, 1));
803     PUSHs (sv_cbor);
804    
805     int i;
806    
807     for (i = 1; i < len; ++i)
808     PUSHs (*av_fetch (av, i, 1));
809    
810     PUTBACK;
811     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
812     SPAGAIN;
813    
814     if (SvTRUE (ERRSV))
815     {
816     FREETMPS; LEAVE;
817     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
818     }
819    
820     SvREFCNT_dec (sv);
821     sv = SvREFCNT_inc (POPs);
822 root 1.11
823 root 1.18 PUTBACK;
824 root 1.11
825 root 1.18 FREETMPS; LEAVE;
826 root 1.11
827 root 1.18 return sv;
828     }
829 root 1.9
830 root 1.18 default:
831     {
832     AV *av = newAV ();
833     av_push (av, newSVuv (tag));
834     av_push (av, sv);
835    
836     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
837     ? cbor_tagged_stash
838     : gv_stashpv ("CBOR::XS::Tagged" , 1);
839 root 1.7
840 root 1.18 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
841     }
842 root 1.11 }
843 root 1.9
844     fail:
845     SvREFCNT_dec (sv);
846     return &PL_sv_undef;
847 root 1.3 }
848    
849     static SV *
850 root 1.1 decode_sv (dec_t *dec)
851     {
852     WANT (1);
853    
854     switch (*dec->cur >> 5)
855     {
856     case 0: // unsigned int
857     return newSVuv (decode_uint (dec));
858     case 1: // negative int
859     return newSViv (-1 - (IV)decode_uint (dec));
860     case 2: // octet string
861     return decode_str (dec, 0);
862     case 3: // utf-8 string
863     return decode_str (dec, 1);
864     case 4: // array
865     return decode_av (dec);
866     case 5: // map
867     return decode_hv (dec);
868     case 6: // tag
869 root 1.3 return decode_tagged (dec);
870 root 1.1 case 7: // misc
871     switch (*dec->cur++ & 31)
872     {
873     case 20:
874     #if CBOR_SLOW
875 root 1.10 types_false = get_bool ("Types::Serialiser::false");
876 root 1.1 #endif
877 root 1.10 return newSVsv (types_false);
878 root 1.1 case 21:
879     #if CBOR_SLOW
880 root 1.10 types_true = get_bool ("Types::Serialiser::true");
881 root 1.1 #endif
882 root 1.10 return newSVsv (types_true);
883 root 1.1 case 22:
884     return newSVsv (&PL_sv_undef);
885 root 1.10 case 23:
886     #if CBOR_SLOW
887     types_error = get_bool ("Types::Serialiser::error");
888     #endif
889     return newSVsv (types_error);
890 root 1.1
891     case 25:
892 root 1.2 {
893     WANT (2);
894    
895     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
896     dec->cur += 2;
897    
898     return newSVnv (ecb_binary16_to_float (fp));
899     }
900 root 1.1
901     case 26:
902     {
903     uint32_t fp;
904     WANT (4);
905     memcpy (&fp, dec->cur, 4);
906     dec->cur += 4;
907    
908     if (!ecb_big_endian ())
909     fp = ecb_bswap32 (fp);
910    
911     return newSVnv (ecb_binary32_to_float (fp));
912     }
913    
914     case 27:
915     {
916     uint64_t fp;
917     WANT (8);
918     memcpy (&fp, dec->cur, 8);
919     dec->cur += 8;
920    
921     if (!ecb_big_endian ())
922     fp = ecb_bswap64 (fp);
923    
924     return newSVnv (ecb_binary64_to_double (fp));
925     }
926    
927     // 0..19 unassigned
928     // 24 reserved + unassigned (reserved values are not encodable)
929     default:
930     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
931     }
932    
933     break;
934     }
935    
936     fail:
937     return &PL_sv_undef;
938     }
939    
940     static SV *
941     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
942     {
943 root 1.18 dec_t dec = { };
944 root 1.1 SV *sv;
945 root 1.16 STRLEN len;
946     char *data = SvPVbyte (string, len);
947 root 1.1
948 root 1.16 if (len > cbor->max_size && cbor->max_size)
949     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
950     (unsigned long)len, (unsigned long)cbor->max_size);
951 root 1.1
952     dec.cbor = *cbor;
953 root 1.16 dec.cur = (U8 *)data;
954     dec.end = (U8 *)data + len;
955 root 1.1
956     sv = decode_sv (&dec);
957    
958     if (offset_return)
959     *offset_return = dec.cur;
960    
961     if (!(offset_return || !sv))
962 root 1.2 if (dec.cur != dec.end && !dec.err)
963     dec.err = "garbage after CBOR object";
964    
965     if (dec.err)
966 root 1.1 {
967 root 1.2 SvREFCNT_dec (sv);
968 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
969 root 1.1 }
970    
971     sv = sv_2mortal (sv);
972    
973     return sv;
974     }
975    
976     /////////////////////////////////////////////////////////////////////////////
977     // XS interface functions
978    
979     MODULE = CBOR::XS PACKAGE = CBOR::XS
980    
981     BOOT:
982     {
983     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
984 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
985 root 1.1
986 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
987     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
988    
989     types_true = get_bool ("Types::Serialiser::true" );
990     types_false = get_bool ("Types::Serialiser::false");
991     types_error = get_bool ("Types::Serialiser::error");
992 root 1.11
993     sv_cbor = newSVpv ("CBOR", 0);
994     SvREADONLY_on (sv_cbor);
995 root 1.1 }
996    
997     PROTOTYPES: DISABLE
998    
999     void CLONE (...)
1000     CODE:
1001 root 1.10 cbor_stash = 0;
1002     cbor_tagged_stash = 0;
1003     types_error_stash = 0;
1004     types_boolean_stash = 0;
1005 root 1.1
1006     void new (char *klass)
1007     PPCODE:
1008     {
1009     SV *pv = NEWSV (0, sizeof (CBOR));
1010     SvPOK_only (pv);
1011     cbor_init ((CBOR *)SvPVX (pv));
1012     XPUSHs (sv_2mortal (sv_bless (
1013     newRV_noinc (pv),
1014     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1015     )));
1016     }
1017    
1018     void shrink (CBOR *self, int enable = 1)
1019     ALIAS:
1020     shrink = F_SHRINK
1021     allow_unknown = F_ALLOW_UNKNOWN
1022 root 1.18 allow_sharing = F_ALLOW_SHARING
1023     dedup_keys = F_DEDUP_KEYS
1024     dedup_strings = F_DEDUP_STRINGS
1025 root 1.1 PPCODE:
1026     {
1027     if (enable)
1028     self->flags |= ix;
1029     else
1030     self->flags &= ~ix;
1031    
1032     XPUSHs (ST (0));
1033     }
1034    
1035     void get_shrink (CBOR *self)
1036     ALIAS:
1037     get_shrink = F_SHRINK
1038     get_allow_unknown = F_ALLOW_UNKNOWN
1039 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1040     get_dedup_keys = F_DEDUP_KEYS
1041     get_dedup_strings = F_DEDUP_STRINGS
1042 root 1.1 PPCODE:
1043     XPUSHs (boolSV (self->flags & ix));
1044    
1045     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1046     PPCODE:
1047     self->max_depth = max_depth;
1048     XPUSHs (ST (0));
1049    
1050     U32 get_max_depth (CBOR *self)
1051     CODE:
1052     RETVAL = self->max_depth;
1053     OUTPUT:
1054     RETVAL
1055    
1056     void max_size (CBOR *self, U32 max_size = 0)
1057     PPCODE:
1058     self->max_size = max_size;
1059     XPUSHs (ST (0));
1060    
1061     int get_max_size (CBOR *self)
1062     CODE:
1063     RETVAL = self->max_size;
1064     OUTPUT:
1065     RETVAL
1066    
1067     void encode (CBOR *self, SV *scalar)
1068     PPCODE:
1069     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1070     XPUSHs (scalar);
1071    
1072     void decode (CBOR *self, SV *cborstr)
1073     PPCODE:
1074     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1075     XPUSHs (cborstr);
1076    
1077     void decode_prefix (CBOR *self, SV *cborstr)
1078     PPCODE:
1079     {
1080     SV *sv;
1081     char *offset;
1082     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1083     EXTEND (SP, 2);
1084     PUSHs (sv);
1085     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1086     }
1087    
1088     PROTOTYPES: ENABLE
1089    
1090     void encode_cbor (SV *scalar)
1091     PPCODE:
1092     {
1093     CBOR cbor;
1094     cbor_init (&cbor);
1095     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1096     XPUSHs (scalar);
1097     }
1098    
1099     void decode_cbor (SV *cborstr)
1100     PPCODE:
1101     {
1102     CBOR cbor;
1103     cbor_init (&cbor);
1104     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1105     XPUSHs (cborstr);
1106     }
1107