ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.19
Committed: Wed Nov 20 02:03:09 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.18: +53 -59 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24    
25 root 1.9 // known tags
26 root 1.8 enum cbor_tag
27     {
28 root 1.9 // inofficial extensions (pending iana registration)
29 root 1.18 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30     CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 root 1.19 CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing
32 root 1.18 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33     CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34     CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
36 root 1.9
37     // rfc7049
38     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
39     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
40     CBOR_TAG_POS_BIGNUM = 2, // byte string
41     CBOR_TAG_NEG_BIGNUM = 3, // byte string
42     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
43     CBOR_TAG_BIGFLOAT = 5, // array
44    
45     CBOR_TAG_CONV_B64U = 21, // base64url, any
46     CBOR_TAG_CONV_B64 = 22, // base64, any
47     CBOR_TAG_CONV_HEX = 23, // base16, any
48     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
49    
50     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
51     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
52     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
53     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
54     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 root 1.8
56 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57 root 1.8 };
58    
59 root 1.18 #define F_SHRINK 0x00000001UL
60     #define F_ALLOW_UNKNOWN 0x00000002UL
61     #define F_ALLOW_SHARING 0x00000004UL //TODO
62     #define F_DEDUP_STRINGS 0x00000008UL //TODO
63     #define F_DEDUP_KEYS 0x00000010UL //TODO
64 root 1.1
65     #define INIT_SIZE 32 // initial scalar size to be allocated
66    
67     #define SB do {
68     #define SE } while (0)
69    
70     #define IN_RANGE_INC(type,val,beg,end) \
71     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
72     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
73    
74     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
75    
76     #ifdef USE_ITHREADS
77     # define CBOR_SLOW 1
78     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
79     #else
80     # define CBOR_SLOW 0
81     # define CBOR_STASH cbor_stash
82     #endif
83    
84 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
85 root 1.11 static SV *types_true, *types_false, *types_error, *sv_cbor;
86 root 1.1
87     typedef struct {
88     U32 flags;
89     U32 max_depth;
90     STRLEN max_size;
91     } CBOR;
92    
93 root 1.5 ecb_inline void
94 root 1.1 cbor_init (CBOR *cbor)
95     {
96     Zero (cbor, 1, CBOR);
97     cbor->max_depth = 512;
98     }
99    
100     /////////////////////////////////////////////////////////////////////////////
101     // utility functions
102    
103 root 1.5 ecb_inline SV *
104 root 1.1 get_bool (const char *name)
105     {
106     SV *sv = get_sv (name, 1);
107    
108     SvREADONLY_on (sv);
109     SvREADONLY_on (SvRV (sv));
110    
111     return sv;
112     }
113    
114 root 1.5 ecb_inline void
115 root 1.1 shrink (SV *sv)
116     {
117     sv_utf8_downgrade (sv, 1);
118    
119     if (SvLEN (sv) > SvCUR (sv) + 1)
120     {
121     #ifdef SvPV_shrink_to_cur
122     SvPV_shrink_to_cur (sv);
123     #elif defined (SvPV_renew)
124     SvPV_renew (sv, SvCUR (sv) + 1);
125     #endif
126     }
127     }
128    
129     /////////////////////////////////////////////////////////////////////////////
130     // encoder
131    
132     // structure used for encoding CBOR
133     typedef struct
134     {
135     char *cur; // SvPVX (sv) + current output position
136     char *end; // SvEND (sv)
137     SV *sv; // result scalar
138     CBOR cbor;
139     U32 depth; // recursion level
140 root 1.18 HV *stringref; // string => index, or 0
141 root 1.19 HV *shareable; // ptr => index, or 0
142     UV shareable_idx;
143 root 1.1 } enc_t;
144    
145 root 1.5 ecb_inline void
146 root 1.1 need (enc_t *enc, STRLEN len)
147     {
148 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
149 root 1.1 {
150     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
151     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
152     enc->cur = SvPVX (enc->sv) + cur;
153     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
154     }
155     }
156    
157 root 1.5 ecb_inline void
158 root 1.1 encode_ch (enc_t *enc, char ch)
159     {
160     need (enc, 1);
161     *enc->cur++ = ch;
162     }
163    
164     static void
165     encode_uint (enc_t *enc, int major, UV len)
166     {
167     need (enc, 9);
168    
169     if (len < 24)
170     *enc->cur++ = major | len;
171 root 1.4 else if (len <= 0xff)
172 root 1.1 {
173     *enc->cur++ = major | 24;
174     *enc->cur++ = len;
175     }
176 root 1.4 else if (len <= 0xffff)
177 root 1.1 {
178     *enc->cur++ = major | 25;
179     *enc->cur++ = len >> 8;
180     *enc->cur++ = len;
181     }
182 root 1.4 else if (len <= 0xffffffff)
183 root 1.1 {
184     *enc->cur++ = major | 26;
185     *enc->cur++ = len >> 24;
186     *enc->cur++ = len >> 16;
187     *enc->cur++ = len >> 8;
188     *enc->cur++ = len;
189     }
190 root 1.4 else
191 root 1.1 {
192     *enc->cur++ = major | 27;
193     *enc->cur++ = len >> 56;
194     *enc->cur++ = len >> 48;
195     *enc->cur++ = len >> 40;
196     *enc->cur++ = len >> 32;
197     *enc->cur++ = len >> 24;
198     *enc->cur++ = len >> 16;
199     *enc->cur++ = len >> 8;
200     *enc->cur++ = len;
201     }
202     }
203    
204     static void
205     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
206     {
207     encode_uint (enc, utf8 ? 0x60 : 0x40, len);
208     need (enc, len);
209     memcpy (enc->cur, str, len);
210     enc->cur += len;
211     }
212    
213 root 1.18 ecb_inline void
214     encode_tag (enc_t *enc, UV tag)
215     {
216     encode_uint (enc, 0xc0, tag);
217     }
218    
219 root 1.1 static void encode_sv (enc_t *enc, SV *sv);
220    
221     static void
222     encode_av (enc_t *enc, AV *av)
223     {
224     int i, len = av_len (av);
225    
226     if (enc->depth >= enc->cbor.max_depth)
227     croak (ERR_NESTING_EXCEEDED);
228    
229     ++enc->depth;
230    
231     encode_uint (enc, 0x80, len + 1);
232    
233     for (i = 0; i <= len; ++i)
234     {
235     SV **svp = av_fetch (av, i, 0);
236     encode_sv (enc, svp ? *svp : &PL_sv_undef);
237     }
238    
239     --enc->depth;
240     }
241    
242     static void
243     encode_hv (enc_t *enc, HV *hv)
244     {
245     HE *he;
246    
247     if (enc->depth >= enc->cbor.max_depth)
248     croak (ERR_NESTING_EXCEEDED);
249    
250     ++enc->depth;
251    
252     int pairs = hv_iterinit (hv);
253     int mg = SvMAGICAL (hv);
254    
255     if (mg)
256     encode_ch (enc, 0xa0 | 31);
257     else
258     encode_uint (enc, 0xa0, pairs);
259    
260     while ((he = hv_iternext (hv)))
261     {
262     if (HeKLEN (he) == HEf_SVKEY)
263     encode_sv (enc, HeSVKEY (he));
264     else
265     encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
266    
267 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
268 root 1.1 }
269    
270     if (mg)
271     encode_ch (enc, 0xe0 | 31);
272    
273     --enc->depth;
274     }
275    
276     // encode objects, arrays and special \0=false and \1=true values.
277     static void
278     encode_rv (enc_t *enc, SV *sv)
279     {
280 root 1.19 SvGETMAGIC (sv);
281 root 1.1
282 root 1.19 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
283     && ecb_expect_false (SvREFCNT (sv) > 1))
284     {
285     if (!enc->shareable)
286     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
287    
288     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
289    
290     if (SvOK (*svp))
291     {
292     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
293     encode_uint (enc, 0x00, SvUV (*svp));
294     return;
295     }
296     else
297     {
298     sv_setuv (*svp, enc->shareable_idx);
299     ++enc->shareable_idx;
300     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
301     }
302     }
303 root 1.1
304 root 1.19 svtype svt = SvTYPE (sv);
305 root 1.18
306 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
307 root 1.1 {
308 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
309     ? types_boolean_stash
310     : gv_stashpv ("Types::Serialiser::Boolean", 1);
311     HV *error_stash = !CBOR_SLOW || types_error_stash
312     ? types_error_stash
313     : gv_stashpv ("Types::Serialiser::Error", 1);
314 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
315     ? cbor_tagged_stash
316     : gv_stashpv ("CBOR::XS::Tagged" , 1);
317 root 1.1
318 root 1.11 HV *stash = SvSTASH (sv);
319     GV *method;
320    
321     if (stash == boolean_stash)
322 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
323 root 1.11 else if (stash == error_stash)
324 root 1.10 encode_ch (enc, 0xe0 | 23);
325 root 1.11 else if (stash == tagged_stash)
326 root 1.6 {
327     if (svt != SVt_PVAV)
328     croak ("encountered CBOR::XS::Tagged object that isn't an array");
329    
330     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
331     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
332     }
333 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
334 root 1.1 {
335 root 1.11 dSP;
336    
337     ENTER; SAVETMPS; PUSHMARK (SP);
338 root 1.6 // we re-bless the reference to get overload and other niceties right
339 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
340    
341     PUTBACK;
342     // G_SCALAR ensures that return value is 1
343     call_sv ((SV *)GvCV (method), G_SCALAR);
344     SPAGAIN;
345    
346     // catch this surprisingly common error
347     if (SvROK (TOPs) && SvRV (TOPs) == sv)
348     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
349    
350     encode_sv (enc, POPs);
351    
352     PUTBACK;
353    
354     FREETMPS; LEAVE;
355     }
356     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
357     {
358     dSP;
359 root 1.6
360 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
361     EXTEND (SP, 2);
362     // we re-bless the reference to get overload and other niceties right
363     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
364     PUSHs (sv_cbor);
365 root 1.1
366 root 1.11 PUTBACK;
367     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
368     SPAGAIN;
369 root 1.6
370 root 1.11 // catch this surprisingly common error
371     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
372     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
373 root 1.6
374 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
375 root 1.11 encode_uint (enc, 0x80, count + 1);
376     encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
377 root 1.6
378 root 1.11 while (count)
379     encode_sv (enc, SP[1 - count--]);
380 root 1.6
381 root 1.11 PUTBACK;
382 root 1.6
383 root 1.11 FREETMPS; LEAVE;
384 root 1.1 }
385 root 1.11 else
386     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
387     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
388 root 1.1 }
389     else if (svt == SVt_PVHV)
390     encode_hv (enc, (HV *)sv);
391     else if (svt == SVt_PVAV)
392     encode_av (enc, (AV *)sv);
393 root 1.18 else
394 root 1.1 {
395 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
396     encode_sv (enc, sv);
397 root 1.1 }
398     }
399    
400     static void
401     encode_nv (enc_t *enc, SV *sv)
402     {
403     double nv = SvNVX (sv);
404    
405     need (enc, 9);
406    
407 root 1.5 if (ecb_expect_false (nv == (U32)nv))
408 root 1.1 encode_uint (enc, 0x00, (U32)nv);
409     //TODO: maybe I32?
410 root 1.5 else if (ecb_expect_false (nv == (float)nv))
411 root 1.1 {
412     uint32_t fp = ecb_float_to_binary32 (nv);
413    
414     *enc->cur++ = 0xe0 | 26;
415    
416     if (!ecb_big_endian ())
417     fp = ecb_bswap32 (fp);
418    
419     memcpy (enc->cur, &fp, 4);
420     enc->cur += 4;
421     }
422     else
423     {
424     uint64_t fp = ecb_double_to_binary64 (nv);
425    
426     *enc->cur++ = 0xe0 | 27;
427    
428     if (!ecb_big_endian ())
429     fp = ecb_bswap64 (fp);
430    
431     memcpy (enc->cur, &fp, 8);
432     enc->cur += 8;
433     }
434     }
435    
436     static void
437     encode_sv (enc_t *enc, SV *sv)
438     {
439     SvGETMAGIC (sv);
440    
441     if (SvPOKp (sv))
442     {
443     STRLEN len;
444     char *str = SvPV (sv, len);
445     encode_str (enc, SvUTF8 (sv), str, len);
446     }
447     else if (SvNOKp (sv))
448     encode_nv (enc, sv);
449     else if (SvIOKp (sv))
450     {
451     if (SvIsUV (sv))
452     encode_uint (enc, 0x00, SvUVX (sv));
453     else if (SvIVX (sv) >= 0)
454     encode_uint (enc, 0x00, SvIVX (sv));
455     else
456     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
457     }
458     else if (SvROK (sv))
459     encode_rv (enc, SvRV (sv));
460     else if (!SvOK (sv))
461     encode_ch (enc, 0xe0 | 22);
462     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
463     encode_ch (enc, 0xe0 | 23);
464     else
465     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
466     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
467     }
468    
469     static SV *
470     encode_cbor (SV *scalar, CBOR *cbor)
471     {
472 root 1.18 enc_t enc = { };
473 root 1.1
474     enc.cbor = *cbor;
475     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
476     enc.cur = SvPVX (enc.sv);
477     enc.end = SvEND (enc.sv);
478    
479     SvPOK_only (enc.sv);
480     encode_sv (&enc, scalar);
481    
482     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
483     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
484    
485     if (enc.cbor.flags & F_SHRINK)
486     shrink (enc.sv);
487    
488     return enc.sv;
489     }
490    
491     /////////////////////////////////////////////////////////////////////////////
492     // decoder
493    
494     // structure used for decoding CBOR
495     typedef struct
496     {
497     U8 *cur; // current parser pointer
498     U8 *end; // end of input string
499     const char *err; // parse error, if != 0
500     CBOR cbor;
501     U32 depth; // recursion depth
502     U32 maxdepth; // recursion depth limit
503 root 1.19 AV *shareable;
504 root 1.1 } dec_t;
505    
506     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
507    
508 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
509 root 1.1
510     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
511     #define DEC_DEC_DEPTH --dec->depth
512    
513     static UV
514     decode_uint (dec_t *dec)
515     {
516     switch (*dec->cur & 31)
517     {
518     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
519     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
520     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
521     return *dec->cur++ & 31;
522    
523     case 24:
524     WANT (2);
525     dec->cur += 2;
526     return dec->cur[-1];
527    
528     case 25:
529     WANT (3);
530     dec->cur += 3;
531     return (((UV)dec->cur[-2]) << 8)
532     | ((UV)dec->cur[-1]);
533    
534     case 26:
535     WANT (5);
536     dec->cur += 5;
537     return (((UV)dec->cur[-4]) << 24)
538     | (((UV)dec->cur[-3]) << 16)
539     | (((UV)dec->cur[-2]) << 8)
540     | ((UV)dec->cur[-1]);
541    
542     case 27:
543     WANT (9);
544     dec->cur += 9;
545     return (((UV)dec->cur[-8]) << 56)
546     | (((UV)dec->cur[-7]) << 48)
547     | (((UV)dec->cur[-6]) << 40)
548     | (((UV)dec->cur[-5]) << 32)
549     | (((UV)dec->cur[-4]) << 24)
550     | (((UV)dec->cur[-3]) << 16)
551     | (((UV)dec->cur[-2]) << 8)
552     | ((UV)dec->cur[-1]);
553    
554     default:
555     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
556     }
557    
558     fail:
559     return 0;
560     }
561    
562     static SV *decode_sv (dec_t *dec);
563    
564     static SV *
565     decode_av (dec_t *dec)
566     {
567     AV *av = newAV ();
568    
569     DEC_INC_DEPTH;
570    
571     if ((*dec->cur & 31) == 31)
572     {
573     ++dec->cur;
574    
575     for (;;)
576     {
577     WANT (1);
578    
579 root 1.2 if (*dec->cur == (0xe0 | 31))
580 root 1.1 {
581     ++dec->cur;
582     break;
583     }
584    
585     av_push (av, decode_sv (dec));
586     }
587     }
588     else
589     {
590     int i, len = decode_uint (dec);
591    
592     av_fill (av, len - 1);
593    
594     for (i = 0; i < len; ++i)
595     AvARRAY (av)[i] = decode_sv (dec);
596     }
597    
598     DEC_DEC_DEPTH;
599     return newRV_noinc ((SV *)av);
600    
601     fail:
602     SvREFCNT_dec (av);
603     DEC_DEC_DEPTH;
604     return &PL_sv_undef;
605     }
606    
607 root 1.16 static void
608     decode_he (dec_t *dec, HV *hv)
609     {
610     // for speed reasons, we specialcase single-string
611     // byte or utf-8 strings as keys.
612    
613     if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
614     {
615     I32 len = decode_uint (dec);
616     char *key = (char *)dec->cur;
617    
618     dec->cur += len;
619    
620     hv_store (hv, key, len, decode_sv (dec), 0);
621     }
622     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
623     {
624     I32 len = decode_uint (dec);
625     char *key = (char *)dec->cur;
626    
627     dec->cur += len;
628    
629     hv_store (hv, key, -len, decode_sv (dec), 0);
630     }
631     else
632     {
633     SV *k = decode_sv (dec);
634     SV *v = decode_sv (dec);
635    
636     hv_store_ent (hv, k, v, 0);
637     SvREFCNT_dec (k);
638     }
639     }
640    
641 root 1.1 static SV *
642     decode_hv (dec_t *dec)
643     {
644     HV *hv = newHV ();
645    
646     DEC_INC_DEPTH;
647    
648     if ((*dec->cur & 31) == 31)
649     {
650     ++dec->cur;
651    
652     for (;;)
653     {
654     WANT (1);
655    
656 root 1.2 if (*dec->cur == (0xe0 | 31))
657 root 1.1 {
658     ++dec->cur;
659     break;
660     }
661    
662 root 1.16 decode_he (dec, hv);
663 root 1.1 }
664     }
665     else
666     {
667 root 1.16 int pairs = decode_uint (dec);
668 root 1.1
669 root 1.16 while (pairs--)
670     decode_he (dec, hv);
671 root 1.1 }
672    
673     DEC_DEC_DEPTH;
674     return newRV_noinc ((SV *)hv);
675    
676     fail:
677     SvREFCNT_dec (hv);
678     DEC_DEC_DEPTH;
679     return &PL_sv_undef;
680     }
681    
682     static SV *
683     decode_str (dec_t *dec, int utf8)
684     {
685 root 1.6 SV *sv = 0;
686 root 1.1
687     if ((*dec->cur & 31) == 31)
688     {
689     ++dec->cur;
690    
691     sv = newSVpvn ("", 0);
692    
693     // not very fast, and certainly not robust against illegal input
694     for (;;)
695     {
696     WANT (1);
697    
698 root 1.2 if (*dec->cur == (0xe0 | 31))
699 root 1.1 {
700     ++dec->cur;
701     break;
702     }
703    
704 root 1.6 sv_catsv (sv, decode_sv (dec));
705 root 1.1 }
706     }
707     else
708     {
709     STRLEN len = decode_uint (dec);
710    
711     WANT (len);
712     sv = newSVpvn (dec->cur, len);
713     dec->cur += len;
714     }
715    
716     if (utf8)
717     SvUTF8_on (sv);
718    
719     return sv;
720    
721     fail:
722 root 1.6 SvREFCNT_dec (sv);
723 root 1.1 return &PL_sv_undef;
724     }
725    
726     static SV *
727 root 1.3 decode_tagged (dec_t *dec)
728     {
729 root 1.19 SV *sv = 0;
730 root 1.3 UV tag = decode_uint (dec);
731 root 1.19
732     WANT (1);
733 root 1.3
734 root 1.18 switch (tag)
735     {
736     case CBOR_TAG_MAGIC:
737 root 1.19 return decode_sv (dec);
738 root 1.18
739     case CBOR_TAG_INDIRECTION:
740 root 1.19 return newRV_noinc (decode_sv (dec));
741 root 1.11
742 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
743     {
744     if (ecb_expect_false (!dec->shareable))
745     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
746    
747     sv = newSV (0);
748     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
749 root 1.18
750 root 1.19 SV *osv = decode_sv (dec);
751     sv_setsv (sv, osv);
752     SvREFCNT_dec_NN (osv);
753     }
754 root 1.18
755     return sv;
756    
757     case CBOR_TAG_VALUE_SHAREDREF:
758 root 1.17 {
759 root 1.19 if ((*dec->cur >> 5) != 0)
760     ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
761 root 1.18
762 root 1.19 UV idx = decode_uint (dec);
763    
764     if (!dec->shareable || idx > AvFILLp (dec->shareable))
765 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
766    
767 root 1.19 return SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
768 root 1.17 }
769    
770 root 1.18 case CBOR_TAG_PERL_OBJECT:
771     {
772 root 1.19 sv = decode_sv (dec);
773    
774 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
775     ERR ("corrupted CBOR data (non-array perl object)");
776    
777     AV *av = (AV *)SvRV (sv);
778     int len = av_len (av) + 1;
779     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
780    
781     if (!stash)
782     ERR ("cannot decode perl-object (package does not exist)");
783    
784     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
785    
786     if (!method)
787     ERR ("cannot decode perl-object (package does not have a THAW method)");
788    
789     dSP;
790    
791     ENTER; SAVETMPS; PUSHMARK (SP);
792     EXTEND (SP, len + 1);
793     // we re-bless the reference to get overload and other niceties right
794     PUSHs (*av_fetch (av, 0, 1));
795     PUSHs (sv_cbor);
796    
797     int i;
798    
799     for (i = 1; i < len; ++i)
800     PUSHs (*av_fetch (av, i, 1));
801    
802     PUTBACK;
803     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
804     SPAGAIN;
805    
806     if (SvTRUE (ERRSV))
807     {
808     FREETMPS; LEAVE;
809     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
810     }
811    
812     SvREFCNT_dec (sv);
813     sv = SvREFCNT_inc (POPs);
814 root 1.11
815 root 1.18 PUTBACK;
816 root 1.11
817 root 1.18 FREETMPS; LEAVE;
818 root 1.11
819 root 1.18 return sv;
820     }
821 root 1.9
822 root 1.18 default:
823     {
824 root 1.19 sv = decode_sv (dec);
825    
826 root 1.18 AV *av = newAV ();
827     av_push (av, newSVuv (tag));
828     av_push (av, sv);
829    
830     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
831     ? cbor_tagged_stash
832     : gv_stashpv ("CBOR::XS::Tagged" , 1);
833 root 1.7
834 root 1.18 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
835     }
836 root 1.11 }
837 root 1.9
838     fail:
839     SvREFCNT_dec (sv);
840     return &PL_sv_undef;
841 root 1.3 }
842    
843     static SV *
844 root 1.1 decode_sv (dec_t *dec)
845     {
846     WANT (1);
847    
848     switch (*dec->cur >> 5)
849     {
850     case 0: // unsigned int
851     return newSVuv (decode_uint (dec));
852     case 1: // negative int
853     return newSViv (-1 - (IV)decode_uint (dec));
854     case 2: // octet string
855     return decode_str (dec, 0);
856     case 3: // utf-8 string
857     return decode_str (dec, 1);
858     case 4: // array
859     return decode_av (dec);
860     case 5: // map
861     return decode_hv (dec);
862     case 6: // tag
863 root 1.3 return decode_tagged (dec);
864 root 1.1 case 7: // misc
865     switch (*dec->cur++ & 31)
866     {
867     case 20:
868     #if CBOR_SLOW
869 root 1.10 types_false = get_bool ("Types::Serialiser::false");
870 root 1.1 #endif
871 root 1.10 return newSVsv (types_false);
872 root 1.1 case 21:
873     #if CBOR_SLOW
874 root 1.10 types_true = get_bool ("Types::Serialiser::true");
875 root 1.1 #endif
876 root 1.10 return newSVsv (types_true);
877 root 1.1 case 22:
878     return newSVsv (&PL_sv_undef);
879 root 1.10 case 23:
880     #if CBOR_SLOW
881     types_error = get_bool ("Types::Serialiser::error");
882     #endif
883     return newSVsv (types_error);
884 root 1.1
885     case 25:
886 root 1.2 {
887     WANT (2);
888    
889     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
890     dec->cur += 2;
891    
892     return newSVnv (ecb_binary16_to_float (fp));
893     }
894 root 1.1
895     case 26:
896     {
897     uint32_t fp;
898     WANT (4);
899     memcpy (&fp, dec->cur, 4);
900     dec->cur += 4;
901    
902     if (!ecb_big_endian ())
903     fp = ecb_bswap32 (fp);
904    
905     return newSVnv (ecb_binary32_to_float (fp));
906     }
907    
908     case 27:
909     {
910     uint64_t fp;
911     WANT (8);
912     memcpy (&fp, dec->cur, 8);
913     dec->cur += 8;
914    
915     if (!ecb_big_endian ())
916     fp = ecb_bswap64 (fp);
917    
918     return newSVnv (ecb_binary64_to_double (fp));
919     }
920    
921     // 0..19 unassigned
922     // 24 reserved + unassigned (reserved values are not encodable)
923     default:
924     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
925     }
926    
927     break;
928     }
929    
930     fail:
931     return &PL_sv_undef;
932     }
933    
934     static SV *
935     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
936     {
937 root 1.18 dec_t dec = { };
938 root 1.1 SV *sv;
939 root 1.16 STRLEN len;
940     char *data = SvPVbyte (string, len);
941 root 1.1
942 root 1.16 if (len > cbor->max_size && cbor->max_size)
943     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
944     (unsigned long)len, (unsigned long)cbor->max_size);
945 root 1.1
946     dec.cbor = *cbor;
947 root 1.16 dec.cur = (U8 *)data;
948     dec.end = (U8 *)data + len;
949 root 1.1
950     sv = decode_sv (&dec);
951    
952     if (offset_return)
953     *offset_return = dec.cur;
954    
955     if (!(offset_return || !sv))
956 root 1.2 if (dec.cur != dec.end && !dec.err)
957     dec.err = "garbage after CBOR object";
958    
959     if (dec.err)
960 root 1.1 {
961 root 1.2 SvREFCNT_dec (sv);
962 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
963 root 1.1 }
964    
965     sv = sv_2mortal (sv);
966    
967     return sv;
968     }
969    
970     /////////////////////////////////////////////////////////////////////////////
971     // XS interface functions
972    
973     MODULE = CBOR::XS PACKAGE = CBOR::XS
974    
975     BOOT:
976     {
977     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
978 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
979 root 1.1
980 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
981     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
982    
983     types_true = get_bool ("Types::Serialiser::true" );
984     types_false = get_bool ("Types::Serialiser::false");
985     types_error = get_bool ("Types::Serialiser::error");
986 root 1.11
987     sv_cbor = newSVpv ("CBOR", 0);
988     SvREADONLY_on (sv_cbor);
989 root 1.1 }
990    
991     PROTOTYPES: DISABLE
992    
993     void CLONE (...)
994     CODE:
995 root 1.10 cbor_stash = 0;
996     cbor_tagged_stash = 0;
997     types_error_stash = 0;
998     types_boolean_stash = 0;
999 root 1.1
1000     void new (char *klass)
1001     PPCODE:
1002     {
1003     SV *pv = NEWSV (0, sizeof (CBOR));
1004     SvPOK_only (pv);
1005     cbor_init ((CBOR *)SvPVX (pv));
1006     XPUSHs (sv_2mortal (sv_bless (
1007     newRV_noinc (pv),
1008     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1009     )));
1010     }
1011    
1012     void shrink (CBOR *self, int enable = 1)
1013     ALIAS:
1014     shrink = F_SHRINK
1015     allow_unknown = F_ALLOW_UNKNOWN
1016 root 1.18 allow_sharing = F_ALLOW_SHARING
1017     dedup_keys = F_DEDUP_KEYS
1018     dedup_strings = F_DEDUP_STRINGS
1019 root 1.1 PPCODE:
1020     {
1021     if (enable)
1022     self->flags |= ix;
1023     else
1024     self->flags &= ~ix;
1025    
1026     XPUSHs (ST (0));
1027     }
1028    
1029     void get_shrink (CBOR *self)
1030     ALIAS:
1031     get_shrink = F_SHRINK
1032     get_allow_unknown = F_ALLOW_UNKNOWN
1033 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1034     get_dedup_keys = F_DEDUP_KEYS
1035     get_dedup_strings = F_DEDUP_STRINGS
1036 root 1.1 PPCODE:
1037     XPUSHs (boolSV (self->flags & ix));
1038    
1039     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1040     PPCODE:
1041     self->max_depth = max_depth;
1042     XPUSHs (ST (0));
1043    
1044     U32 get_max_depth (CBOR *self)
1045     CODE:
1046     RETVAL = self->max_depth;
1047     OUTPUT:
1048     RETVAL
1049    
1050     void max_size (CBOR *self, U32 max_size = 0)
1051     PPCODE:
1052     self->max_size = max_size;
1053     XPUSHs (ST (0));
1054    
1055     int get_max_size (CBOR *self)
1056     CODE:
1057     RETVAL = self->max_size;
1058     OUTPUT:
1059     RETVAL
1060    
1061     void encode (CBOR *self, SV *scalar)
1062     PPCODE:
1063     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1064     XPUSHs (scalar);
1065    
1066     void decode (CBOR *self, SV *cborstr)
1067     PPCODE:
1068     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1069     XPUSHs (cborstr);
1070    
1071     void decode_prefix (CBOR *self, SV *cborstr)
1072     PPCODE:
1073     {
1074     SV *sv;
1075     char *offset;
1076     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1077     EXTEND (SP, 2);
1078     PUSHs (sv);
1079     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1080     }
1081    
1082     PROTOTYPES: ENABLE
1083    
1084     void encode_cbor (SV *scalar)
1085     PPCODE:
1086     {
1087     CBOR cbor;
1088     cbor_init (&cbor);
1089     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1090     XPUSHs (scalar);
1091     }
1092    
1093     void decode_cbor (SV *cborstr)
1094     PPCODE:
1095     {
1096     CBOR cbor;
1097     cbor_init (&cbor);
1098     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1099     XPUSHs (cborstr);
1100     }
1101