ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.20
Committed: Wed Nov 20 11:06:42 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.19: +91 -13 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24    
25 root 1.9 // known tags
26 root 1.8 enum cbor_tag
27     {
28 root 1.9 // inofficial extensions (pending iana registration)
29 root 1.18 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30     CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 root 1.19 CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing
32 root 1.18 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33     CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34     CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
36 root 1.9
37     // rfc7049
38     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
39     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
40     CBOR_TAG_POS_BIGNUM = 2, // byte string
41     CBOR_TAG_NEG_BIGNUM = 3, // byte string
42     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
43     CBOR_TAG_BIGFLOAT = 5, // array
44    
45     CBOR_TAG_CONV_B64U = 21, // base64url, any
46     CBOR_TAG_CONV_B64 = 22, // base64, any
47     CBOR_TAG_CONV_HEX = 23, // base16, any
48     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
49    
50     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
51     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
52     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
53     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
54     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 root 1.8
56 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57 root 1.8 };
58    
59 root 1.18 #define F_SHRINK 0x00000001UL
60     #define F_ALLOW_UNKNOWN 0x00000002UL
61     #define F_ALLOW_SHARING 0x00000004UL //TODO
62     #define F_DEDUP_STRINGS 0x00000008UL //TODO
63     #define F_DEDUP_KEYS 0x00000010UL //TODO
64 root 1.1
65     #define INIT_SIZE 32 // initial scalar size to be allocated
66    
67     #define SB do {
68     #define SE } while (0)
69    
70     #define IN_RANGE_INC(type,val,beg,end) \
71     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
72     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
73    
74     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
75    
76     #ifdef USE_ITHREADS
77     # define CBOR_SLOW 1
78     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
79     #else
80     # define CBOR_SLOW 0
81     # define CBOR_STASH cbor_stash
82     #endif
83    
84 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
85 root 1.11 static SV *types_true, *types_false, *types_error, *sv_cbor;
86 root 1.1
87     typedef struct {
88     U32 flags;
89     U32 max_depth;
90     STRLEN max_size;
91     } CBOR;
92    
93 root 1.5 ecb_inline void
94 root 1.1 cbor_init (CBOR *cbor)
95     {
96     Zero (cbor, 1, CBOR);
97     cbor->max_depth = 512;
98     }
99    
100     /////////////////////////////////////////////////////////////////////////////
101     // utility functions
102    
103 root 1.5 ecb_inline SV *
104 root 1.1 get_bool (const char *name)
105     {
106     SV *sv = get_sv (name, 1);
107    
108     SvREADONLY_on (sv);
109     SvREADONLY_on (SvRV (sv));
110    
111     return sv;
112     }
113    
114 root 1.5 ecb_inline void
115 root 1.1 shrink (SV *sv)
116     {
117     sv_utf8_downgrade (sv, 1);
118    
119     if (SvLEN (sv) > SvCUR (sv) + 1)
120     {
121     #ifdef SvPV_shrink_to_cur
122     SvPV_shrink_to_cur (sv);
123     #elif defined (SvPV_renew)
124     SvPV_renew (sv, SvCUR (sv) + 1);
125     #endif
126     }
127     }
128    
129     /////////////////////////////////////////////////////////////////////////////
130     // encoder
131    
132     // structure used for encoding CBOR
133     typedef struct
134     {
135     char *cur; // SvPVX (sv) + current output position
136     char *end; // SvEND (sv)
137     SV *sv; // result scalar
138     CBOR cbor;
139     U32 depth; // recursion level
140 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
141     UV stringref_idx;
142 root 1.19 HV *shareable; // ptr => index, or 0
143     UV shareable_idx;
144 root 1.1 } enc_t;
145    
146 root 1.5 ecb_inline void
147 root 1.1 need (enc_t *enc, STRLEN len)
148     {
149 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
150 root 1.1 {
151     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
152     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
153     enc->cur = SvPVX (enc->sv) + cur;
154     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
155     }
156     }
157    
158 root 1.5 ecb_inline void
159 root 1.1 encode_ch (enc_t *enc, char ch)
160     {
161     need (enc, 1);
162     *enc->cur++ = ch;
163     }
164    
165     static void
166     encode_uint (enc_t *enc, int major, UV len)
167     {
168     need (enc, 9);
169    
170     if (len < 24)
171     *enc->cur++ = major | len;
172 root 1.4 else if (len <= 0xff)
173 root 1.1 {
174     *enc->cur++ = major | 24;
175     *enc->cur++ = len;
176     }
177 root 1.4 else if (len <= 0xffff)
178 root 1.1 {
179     *enc->cur++ = major | 25;
180     *enc->cur++ = len >> 8;
181     *enc->cur++ = len;
182     }
183 root 1.4 else if (len <= 0xffffffff)
184 root 1.1 {
185     *enc->cur++ = major | 26;
186     *enc->cur++ = len >> 24;
187     *enc->cur++ = len >> 16;
188     *enc->cur++ = len >> 8;
189     *enc->cur++ = len;
190     }
191 root 1.4 else
192 root 1.1 {
193     *enc->cur++ = major | 27;
194     *enc->cur++ = len >> 56;
195     *enc->cur++ = len >> 48;
196     *enc->cur++ = len >> 40;
197     *enc->cur++ = len >> 32;
198     *enc->cur++ = len >> 24;
199     *enc->cur++ = len >> 16;
200     *enc->cur++ = len >> 8;
201     *enc->cur++ = len;
202     }
203     }
204    
205     static void
206     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
207     {
208     encode_uint (enc, utf8 ? 0x60 : 0x40, len);
209     need (enc, len);
210     memcpy (enc->cur, str, len);
211     enc->cur += len;
212     }
213    
214 root 1.18 ecb_inline void
215     encode_tag (enc_t *enc, UV tag)
216     {
217     encode_uint (enc, 0xc0, tag);
218     }
219    
220 root 1.1 static void encode_sv (enc_t *enc, SV *sv);
221    
222     static void
223     encode_av (enc_t *enc, AV *av)
224     {
225     int i, len = av_len (av);
226    
227     if (enc->depth >= enc->cbor.max_depth)
228     croak (ERR_NESTING_EXCEEDED);
229    
230     ++enc->depth;
231    
232     encode_uint (enc, 0x80, len + 1);
233    
234     for (i = 0; i <= len; ++i)
235     {
236     SV **svp = av_fetch (av, i, 0);
237     encode_sv (enc, svp ? *svp : &PL_sv_undef);
238     }
239    
240     --enc->depth;
241     }
242    
243 root 1.20 ecb_inline void
244     encode_he (enc_t *enc, HE *he)
245     {
246     if (HeKLEN (he) == HEf_SVKEY)
247     encode_sv (enc, HeSVKEY (he));
248     else
249     encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
250     }
251    
252 root 1.1 static void
253     encode_hv (enc_t *enc, HV *hv)
254     {
255     HE *he;
256    
257     if (enc->depth >= enc->cbor.max_depth)
258     croak (ERR_NESTING_EXCEEDED);
259    
260     ++enc->depth;
261    
262     int pairs = hv_iterinit (hv);
263     int mg = SvMAGICAL (hv);
264    
265     if (mg)
266     encode_ch (enc, 0xa0 | 31);
267     else
268     encode_uint (enc, 0xa0, pairs);
269    
270     while ((he = hv_iternext (hv)))
271     {
272 root 1.20 if (ecb_expect_false (enc->cbor.flags & (F_DEDUP_STRINGS | F_DEDUP_KEYS)))
273     {
274     SV **svp;
275    
276     if (HeKLEN (he) == HEf_SVKEY)
277     svp = hv_fetch_ent (enc->stringref[!! SvUTF8 (HeSVKEY (he))], HeSVKEY (he) , 1, 0);//TODO return HE :/
278     else
279     svp = hv_fetch (enc->stringref[!! HeKUTF8 (he) ], HeKEY (he), HeKLEN (he), 1);
280    
281     if (SvOK (*svp))
282     {
283     encode_tag (enc, CBOR_TAG_STRINGREF);
284     encode_uint (enc, 0x00, SvUV (*svp));
285     }
286     else
287     {
288     sv_setuv (*svp, enc->stringref_idx);
289     ++enc->stringref_idx;
290     encode_he (enc, he);
291     }
292     }
293 root 1.1 else
294 root 1.20 encode_he (enc, he);
295 root 1.1
296 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
297 root 1.1 }
298    
299     if (mg)
300     encode_ch (enc, 0xe0 | 31);
301    
302     --enc->depth;
303     }
304    
305     // encode objects, arrays and special \0=false and \1=true values.
306     static void
307     encode_rv (enc_t *enc, SV *sv)
308     {
309 root 1.19 SvGETMAGIC (sv);
310 root 1.1
311 root 1.19 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
312     && ecb_expect_false (SvREFCNT (sv) > 1))
313     {
314     if (!enc->shareable)
315     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
316    
317     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
318    
319     if (SvOK (*svp))
320     {
321     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
322     encode_uint (enc, 0x00, SvUV (*svp));
323     return;
324     }
325     else
326     {
327     sv_setuv (*svp, enc->shareable_idx);
328     ++enc->shareable_idx;
329     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
330     }
331     }
332 root 1.1
333 root 1.19 svtype svt = SvTYPE (sv);
334 root 1.18
335 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
336 root 1.1 {
337 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
338     ? types_boolean_stash
339     : gv_stashpv ("Types::Serialiser::Boolean", 1);
340     HV *error_stash = !CBOR_SLOW || types_error_stash
341     ? types_error_stash
342     : gv_stashpv ("Types::Serialiser::Error", 1);
343 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
344     ? cbor_tagged_stash
345     : gv_stashpv ("CBOR::XS::Tagged" , 1);
346 root 1.1
347 root 1.11 HV *stash = SvSTASH (sv);
348     GV *method;
349    
350     if (stash == boolean_stash)
351 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
352 root 1.11 else if (stash == error_stash)
353 root 1.10 encode_ch (enc, 0xe0 | 23);
354 root 1.11 else if (stash == tagged_stash)
355 root 1.6 {
356     if (svt != SVt_PVAV)
357     croak ("encountered CBOR::XS::Tagged object that isn't an array");
358    
359     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
360     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
361     }
362 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
363 root 1.1 {
364 root 1.11 dSP;
365    
366     ENTER; SAVETMPS; PUSHMARK (SP);
367 root 1.6 // we re-bless the reference to get overload and other niceties right
368 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
369    
370     PUTBACK;
371     // G_SCALAR ensures that return value is 1
372     call_sv ((SV *)GvCV (method), G_SCALAR);
373     SPAGAIN;
374    
375     // catch this surprisingly common error
376     if (SvROK (TOPs) && SvRV (TOPs) == sv)
377     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
378    
379     encode_sv (enc, POPs);
380    
381     PUTBACK;
382    
383     FREETMPS; LEAVE;
384     }
385     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
386     {
387     dSP;
388 root 1.6
389 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
390     EXTEND (SP, 2);
391     // we re-bless the reference to get overload and other niceties right
392     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
393     PUSHs (sv_cbor);
394 root 1.1
395 root 1.11 PUTBACK;
396     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
397     SPAGAIN;
398 root 1.6
399 root 1.11 // catch this surprisingly common error
400     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
401     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
402 root 1.6
403 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
404 root 1.11 encode_uint (enc, 0x80, count + 1);
405     encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
406 root 1.6
407 root 1.11 while (count)
408     encode_sv (enc, SP[1 - count--]);
409 root 1.6
410 root 1.11 PUTBACK;
411 root 1.6
412 root 1.11 FREETMPS; LEAVE;
413 root 1.1 }
414 root 1.11 else
415     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
416     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
417 root 1.1 }
418     else if (svt == SVt_PVHV)
419     encode_hv (enc, (HV *)sv);
420     else if (svt == SVt_PVAV)
421     encode_av (enc, (AV *)sv);
422 root 1.18 else
423 root 1.1 {
424 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
425     encode_sv (enc, sv);
426 root 1.1 }
427     }
428    
429     static void
430     encode_nv (enc_t *enc, SV *sv)
431     {
432     double nv = SvNVX (sv);
433    
434     need (enc, 9);
435    
436 root 1.5 if (ecb_expect_false (nv == (U32)nv))
437 root 1.1 encode_uint (enc, 0x00, (U32)nv);
438     //TODO: maybe I32?
439 root 1.5 else if (ecb_expect_false (nv == (float)nv))
440 root 1.1 {
441     uint32_t fp = ecb_float_to_binary32 (nv);
442    
443     *enc->cur++ = 0xe0 | 26;
444    
445     if (!ecb_big_endian ())
446     fp = ecb_bswap32 (fp);
447    
448     memcpy (enc->cur, &fp, 4);
449     enc->cur += 4;
450     }
451     else
452     {
453     uint64_t fp = ecb_double_to_binary64 (nv);
454    
455     *enc->cur++ = 0xe0 | 27;
456    
457     if (!ecb_big_endian ())
458     fp = ecb_bswap64 (fp);
459    
460     memcpy (enc->cur, &fp, 8);
461     enc->cur += 8;
462     }
463     }
464    
465     static void
466     encode_sv (enc_t *enc, SV *sv)
467     {
468     SvGETMAGIC (sv);
469    
470     if (SvPOKp (sv))
471     {
472     STRLEN len;
473     char *str = SvPV (sv, len);
474     encode_str (enc, SvUTF8 (sv), str, len);
475     }
476     else if (SvNOKp (sv))
477     encode_nv (enc, sv);
478     else if (SvIOKp (sv))
479     {
480     if (SvIsUV (sv))
481     encode_uint (enc, 0x00, SvUVX (sv));
482     else if (SvIVX (sv) >= 0)
483     encode_uint (enc, 0x00, SvIVX (sv));
484     else
485     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
486     }
487     else if (SvROK (sv))
488     encode_rv (enc, SvRV (sv));
489     else if (!SvOK (sv))
490     encode_ch (enc, 0xe0 | 22);
491     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
492     encode_ch (enc, 0xe0 | 23);
493     else
494     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
495     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
496     }
497    
498     static SV *
499     encode_cbor (SV *scalar, CBOR *cbor)
500     {
501 root 1.18 enc_t enc = { };
502 root 1.1
503     enc.cbor = *cbor;
504     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
505     enc.cur = SvPVX (enc.sv);
506     enc.end = SvEND (enc.sv);
507    
508     SvPOK_only (enc.sv);
509 root 1.20
510     if (cbor->flags & (F_DEDUP_STRINGS | F_DEDUP_KEYS))
511     {
512     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
513     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
514     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
515     }
516    
517 root 1.1 encode_sv (&enc, scalar);
518    
519     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
520     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
521    
522     if (enc.cbor.flags & F_SHRINK)
523     shrink (enc.sv);
524    
525     return enc.sv;
526     }
527    
528     /////////////////////////////////////////////////////////////////////////////
529     // decoder
530    
531     // structure used for decoding CBOR
532     typedef struct
533     {
534     U8 *cur; // current parser pointer
535     U8 *end; // end of input string
536     const char *err; // parse error, if != 0
537     CBOR cbor;
538     U32 depth; // recursion depth
539     U32 maxdepth; // recursion depth limit
540 root 1.19 AV *shareable;
541 root 1.20 AV *stringref;
542 root 1.1 } dec_t;
543    
544     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
545    
546 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
547 root 1.1
548     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
549     #define DEC_DEC_DEPTH --dec->depth
550    
551     static UV
552     decode_uint (dec_t *dec)
553     {
554     switch (*dec->cur & 31)
555     {
556     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
557     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
558     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
559     return *dec->cur++ & 31;
560    
561     case 24:
562     WANT (2);
563     dec->cur += 2;
564     return dec->cur[-1];
565    
566     case 25:
567     WANT (3);
568     dec->cur += 3;
569     return (((UV)dec->cur[-2]) << 8)
570     | ((UV)dec->cur[-1]);
571    
572     case 26:
573     WANT (5);
574     dec->cur += 5;
575     return (((UV)dec->cur[-4]) << 24)
576     | (((UV)dec->cur[-3]) << 16)
577     | (((UV)dec->cur[-2]) << 8)
578     | ((UV)dec->cur[-1]);
579    
580     case 27:
581     WANT (9);
582     dec->cur += 9;
583     return (((UV)dec->cur[-8]) << 56)
584     | (((UV)dec->cur[-7]) << 48)
585     | (((UV)dec->cur[-6]) << 40)
586     | (((UV)dec->cur[-5]) << 32)
587     | (((UV)dec->cur[-4]) << 24)
588     | (((UV)dec->cur[-3]) << 16)
589     | (((UV)dec->cur[-2]) << 8)
590     | ((UV)dec->cur[-1]);
591    
592     default:
593     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
594     }
595    
596     fail:
597     return 0;
598     }
599    
600     static SV *decode_sv (dec_t *dec);
601    
602     static SV *
603     decode_av (dec_t *dec)
604     {
605     AV *av = newAV ();
606    
607     DEC_INC_DEPTH;
608    
609     if ((*dec->cur & 31) == 31)
610     {
611     ++dec->cur;
612    
613     for (;;)
614     {
615     WANT (1);
616    
617 root 1.2 if (*dec->cur == (0xe0 | 31))
618 root 1.1 {
619     ++dec->cur;
620     break;
621     }
622    
623     av_push (av, decode_sv (dec));
624     }
625     }
626     else
627     {
628     int i, len = decode_uint (dec);
629    
630     av_fill (av, len - 1);
631    
632     for (i = 0; i < len; ++i)
633     AvARRAY (av)[i] = decode_sv (dec);
634     }
635    
636     DEC_DEC_DEPTH;
637     return newRV_noinc ((SV *)av);
638    
639     fail:
640     SvREFCNT_dec (av);
641     DEC_DEC_DEPTH;
642     return &PL_sv_undef;
643     }
644    
645 root 1.16 static void
646     decode_he (dec_t *dec, HV *hv)
647     {
648     // for speed reasons, we specialcase single-string
649     // byte or utf-8 strings as keys.
650    
651     if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
652     {
653     I32 len = decode_uint (dec);
654     char *key = (char *)dec->cur;
655    
656     dec->cur += len;
657    
658 root 1.20 if (ecb_expect_false (dec->stringref))
659     av_push (dec->stringref, newSVpvn (key, len));
660    
661 root 1.16 hv_store (hv, key, len, decode_sv (dec), 0);
662     }
663     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
664     {
665     I32 len = decode_uint (dec);
666     char *key = (char *)dec->cur;
667    
668     dec->cur += len;
669    
670 root 1.20 if (ecb_expect_false (dec->stringref))
671     av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
672    
673 root 1.16 hv_store (hv, key, -len, decode_sv (dec), 0);
674     }
675     else
676     {
677     SV *k = decode_sv (dec);
678     SV *v = decode_sv (dec);
679    
680     hv_store_ent (hv, k, v, 0);
681     SvREFCNT_dec (k);
682     }
683     }
684    
685 root 1.1 static SV *
686     decode_hv (dec_t *dec)
687     {
688     HV *hv = newHV ();
689    
690     DEC_INC_DEPTH;
691    
692     if ((*dec->cur & 31) == 31)
693     {
694     ++dec->cur;
695    
696     for (;;)
697     {
698     WANT (1);
699    
700 root 1.2 if (*dec->cur == (0xe0 | 31))
701 root 1.1 {
702     ++dec->cur;
703     break;
704     }
705    
706 root 1.16 decode_he (dec, hv);
707 root 1.1 }
708     }
709     else
710     {
711 root 1.16 int pairs = decode_uint (dec);
712 root 1.1
713 root 1.16 while (pairs--)
714     decode_he (dec, hv);
715 root 1.1 }
716    
717     DEC_DEC_DEPTH;
718     return newRV_noinc ((SV *)hv);
719    
720     fail:
721     SvREFCNT_dec (hv);
722     DEC_DEC_DEPTH;
723     return &PL_sv_undef;
724     }
725    
726     static SV *
727     decode_str (dec_t *dec, int utf8)
728     {
729 root 1.6 SV *sv = 0;
730 root 1.1
731     if ((*dec->cur & 31) == 31)
732     {
733     ++dec->cur;
734    
735     sv = newSVpvn ("", 0);
736    
737     // not very fast, and certainly not robust against illegal input
738     for (;;)
739     {
740     WANT (1);
741    
742 root 1.2 if (*dec->cur == (0xe0 | 31))
743 root 1.1 {
744     ++dec->cur;
745     break;
746     }
747    
748 root 1.6 sv_catsv (sv, decode_sv (dec));
749 root 1.1 }
750     }
751     else
752     {
753     STRLEN len = decode_uint (dec);
754    
755     WANT (len);
756     sv = newSVpvn (dec->cur, len);
757     dec->cur += len;
758     }
759    
760     if (utf8)
761     SvUTF8_on (sv);
762    
763 root 1.20 if (ecb_expect_false (dec->stringref))
764     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
765    
766 root 1.1 return sv;
767    
768     fail:
769 root 1.6 SvREFCNT_dec (sv);
770 root 1.1 return &PL_sv_undef;
771     }
772    
773     static SV *
774 root 1.3 decode_tagged (dec_t *dec)
775     {
776 root 1.19 SV *sv = 0;
777 root 1.3 UV tag = decode_uint (dec);
778 root 1.19
779     WANT (1);
780 root 1.3
781 root 1.18 switch (tag)
782     {
783     case CBOR_TAG_MAGIC:
784 root 1.20 sv = decode_sv (dec);
785     break;
786 root 1.18
787     case CBOR_TAG_INDIRECTION:
788 root 1.20 sv = newRV_noinc (decode_sv (dec));
789     break;
790    
791     case CBOR_TAG_STRINGREF_NAMESPACE:
792     {
793     ENTER; SAVETMPS;
794    
795     SAVESPTR (dec->stringref);
796     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
797    
798     sv = decode_sv (dec);
799    
800     FREETMPS; LEAVE;
801     }
802     break;
803    
804     case CBOR_TAG_STRINGREF:
805     {
806     if ((*dec->cur >> 5) != 0)
807     ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
808    
809     UV idx = decode_uint (dec);
810    
811     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
812     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
813    
814     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
815     }
816     break;
817 root 1.11
818 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
819     {
820     if (ecb_expect_false (!dec->shareable))
821     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
822    
823     sv = newSV (0);
824     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
825 root 1.18
826 root 1.19 SV *osv = decode_sv (dec);
827     sv_setsv (sv, osv);
828     SvREFCNT_dec_NN (osv);
829     }
830 root 1.20 break;
831 root 1.18
832     case CBOR_TAG_VALUE_SHAREDREF:
833 root 1.17 {
834 root 1.19 if ((*dec->cur >> 5) != 0)
835     ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
836 root 1.18
837 root 1.19 UV idx = decode_uint (dec);
838    
839 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
840 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
841    
842 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
843 root 1.17 }
844 root 1.20 break;
845 root 1.17
846 root 1.18 case CBOR_TAG_PERL_OBJECT:
847     {
848 root 1.19 sv = decode_sv (dec);
849    
850 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
851     ERR ("corrupted CBOR data (non-array perl object)");
852    
853     AV *av = (AV *)SvRV (sv);
854     int len = av_len (av) + 1;
855     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
856    
857     if (!stash)
858     ERR ("cannot decode perl-object (package does not exist)");
859    
860     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
861    
862     if (!method)
863     ERR ("cannot decode perl-object (package does not have a THAW method)");
864    
865     dSP;
866    
867     ENTER; SAVETMPS; PUSHMARK (SP);
868     EXTEND (SP, len + 1);
869     // we re-bless the reference to get overload and other niceties right
870     PUSHs (*av_fetch (av, 0, 1));
871     PUSHs (sv_cbor);
872    
873     int i;
874    
875     for (i = 1; i < len; ++i)
876     PUSHs (*av_fetch (av, i, 1));
877    
878     PUTBACK;
879     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
880     SPAGAIN;
881    
882     if (SvTRUE (ERRSV))
883     {
884     FREETMPS; LEAVE;
885     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
886     }
887    
888     SvREFCNT_dec (sv);
889     sv = SvREFCNT_inc (POPs);
890 root 1.11
891 root 1.18 PUTBACK;
892 root 1.11
893 root 1.18 FREETMPS; LEAVE;
894     }
895 root 1.20 break;
896 root 1.9
897 root 1.18 default:
898     {
899 root 1.19 sv = decode_sv (dec);
900    
901 root 1.18 AV *av = newAV ();
902     av_push (av, newSVuv (tag));
903     av_push (av, sv);
904    
905     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
906     ? cbor_tagged_stash
907     : gv_stashpv ("CBOR::XS::Tagged" , 1);
908 root 1.7
909 root 1.20 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
910 root 1.18 }
911 root 1.20 break;
912 root 1.11 }
913 root 1.9
914 root 1.20 return sv;
915    
916 root 1.9 fail:
917     SvREFCNT_dec (sv);
918     return &PL_sv_undef;
919 root 1.3 }
920    
921     static SV *
922 root 1.1 decode_sv (dec_t *dec)
923     {
924     WANT (1);
925    
926     switch (*dec->cur >> 5)
927     {
928     case 0: // unsigned int
929     return newSVuv (decode_uint (dec));
930     case 1: // negative int
931     return newSViv (-1 - (IV)decode_uint (dec));
932     case 2: // octet string
933     return decode_str (dec, 0);
934     case 3: // utf-8 string
935     return decode_str (dec, 1);
936     case 4: // array
937     return decode_av (dec);
938     case 5: // map
939     return decode_hv (dec);
940     case 6: // tag
941 root 1.3 return decode_tagged (dec);
942 root 1.1 case 7: // misc
943     switch (*dec->cur++ & 31)
944     {
945     case 20:
946     #if CBOR_SLOW
947 root 1.10 types_false = get_bool ("Types::Serialiser::false");
948 root 1.1 #endif
949 root 1.10 return newSVsv (types_false);
950 root 1.1 case 21:
951     #if CBOR_SLOW
952 root 1.10 types_true = get_bool ("Types::Serialiser::true");
953 root 1.1 #endif
954 root 1.10 return newSVsv (types_true);
955 root 1.1 case 22:
956     return newSVsv (&PL_sv_undef);
957 root 1.10 case 23:
958     #if CBOR_SLOW
959     types_error = get_bool ("Types::Serialiser::error");
960     #endif
961     return newSVsv (types_error);
962 root 1.1
963     case 25:
964 root 1.2 {
965     WANT (2);
966    
967     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
968     dec->cur += 2;
969    
970     return newSVnv (ecb_binary16_to_float (fp));
971     }
972 root 1.1
973     case 26:
974     {
975     uint32_t fp;
976     WANT (4);
977     memcpy (&fp, dec->cur, 4);
978     dec->cur += 4;
979    
980     if (!ecb_big_endian ())
981     fp = ecb_bswap32 (fp);
982    
983     return newSVnv (ecb_binary32_to_float (fp));
984     }
985    
986     case 27:
987     {
988     uint64_t fp;
989     WANT (8);
990     memcpy (&fp, dec->cur, 8);
991     dec->cur += 8;
992    
993     if (!ecb_big_endian ())
994     fp = ecb_bswap64 (fp);
995    
996     return newSVnv (ecb_binary64_to_double (fp));
997     }
998    
999     // 0..19 unassigned
1000     // 24 reserved + unassigned (reserved values are not encodable)
1001     default:
1002     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
1003     }
1004    
1005     break;
1006     }
1007    
1008     fail:
1009     return &PL_sv_undef;
1010     }
1011    
1012     static SV *
1013     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1014     {
1015 root 1.18 dec_t dec = { };
1016 root 1.1 SV *sv;
1017 root 1.16 STRLEN len;
1018     char *data = SvPVbyte (string, len);
1019 root 1.1
1020 root 1.16 if (len > cbor->max_size && cbor->max_size)
1021     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1022     (unsigned long)len, (unsigned long)cbor->max_size);
1023 root 1.1
1024     dec.cbor = *cbor;
1025 root 1.16 dec.cur = (U8 *)data;
1026     dec.end = (U8 *)data + len;
1027 root 1.1
1028     sv = decode_sv (&dec);
1029    
1030     if (offset_return)
1031     *offset_return = dec.cur;
1032    
1033     if (!(offset_return || !sv))
1034 root 1.2 if (dec.cur != dec.end && !dec.err)
1035     dec.err = "garbage after CBOR object";
1036    
1037     if (dec.err)
1038 root 1.1 {
1039 root 1.2 SvREFCNT_dec (sv);
1040 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1041 root 1.1 }
1042    
1043     sv = sv_2mortal (sv);
1044    
1045     return sv;
1046     }
1047    
1048     /////////////////////////////////////////////////////////////////////////////
1049     // XS interface functions
1050    
1051     MODULE = CBOR::XS PACKAGE = CBOR::XS
1052    
1053     BOOT:
1054     {
1055     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1056 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1057 root 1.1
1058 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1059     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1060    
1061     types_true = get_bool ("Types::Serialiser::true" );
1062     types_false = get_bool ("Types::Serialiser::false");
1063     types_error = get_bool ("Types::Serialiser::error");
1064 root 1.11
1065     sv_cbor = newSVpv ("CBOR", 0);
1066     SvREADONLY_on (sv_cbor);
1067 root 1.1 }
1068    
1069     PROTOTYPES: DISABLE
1070    
1071     void CLONE (...)
1072     CODE:
1073 root 1.10 cbor_stash = 0;
1074     cbor_tagged_stash = 0;
1075     types_error_stash = 0;
1076     types_boolean_stash = 0;
1077 root 1.1
1078     void new (char *klass)
1079     PPCODE:
1080     {
1081     SV *pv = NEWSV (0, sizeof (CBOR));
1082     SvPOK_only (pv);
1083     cbor_init ((CBOR *)SvPVX (pv));
1084     XPUSHs (sv_2mortal (sv_bless (
1085     newRV_noinc (pv),
1086     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1087     )));
1088     }
1089    
1090     void shrink (CBOR *self, int enable = 1)
1091     ALIAS:
1092     shrink = F_SHRINK
1093     allow_unknown = F_ALLOW_UNKNOWN
1094 root 1.18 allow_sharing = F_ALLOW_SHARING
1095     dedup_keys = F_DEDUP_KEYS
1096     dedup_strings = F_DEDUP_STRINGS
1097 root 1.1 PPCODE:
1098     {
1099     if (enable)
1100     self->flags |= ix;
1101     else
1102     self->flags &= ~ix;
1103    
1104     XPUSHs (ST (0));
1105     }
1106    
1107     void get_shrink (CBOR *self)
1108     ALIAS:
1109     get_shrink = F_SHRINK
1110     get_allow_unknown = F_ALLOW_UNKNOWN
1111 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1112     get_dedup_keys = F_DEDUP_KEYS
1113     get_dedup_strings = F_DEDUP_STRINGS
1114 root 1.1 PPCODE:
1115     XPUSHs (boolSV (self->flags & ix));
1116    
1117     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1118     PPCODE:
1119     self->max_depth = max_depth;
1120     XPUSHs (ST (0));
1121    
1122     U32 get_max_depth (CBOR *self)
1123     CODE:
1124     RETVAL = self->max_depth;
1125     OUTPUT:
1126     RETVAL
1127    
1128     void max_size (CBOR *self, U32 max_size = 0)
1129     PPCODE:
1130     self->max_size = max_size;
1131     XPUSHs (ST (0));
1132    
1133     int get_max_size (CBOR *self)
1134     CODE:
1135     RETVAL = self->max_size;
1136     OUTPUT:
1137     RETVAL
1138    
1139     void encode (CBOR *self, SV *scalar)
1140     PPCODE:
1141     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1142     XPUSHs (scalar);
1143    
1144     void decode (CBOR *self, SV *cborstr)
1145     PPCODE:
1146     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1147     XPUSHs (cborstr);
1148    
1149     void decode_prefix (CBOR *self, SV *cborstr)
1150     PPCODE:
1151     {
1152     SV *sv;
1153     char *offset;
1154     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1155     EXTEND (SP, 2);
1156     PUSHs (sv);
1157     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1158     }
1159    
1160     PROTOTYPES: ENABLE
1161    
1162     void encode_cbor (SV *scalar)
1163     PPCODE:
1164     {
1165     CBOR cbor;
1166     cbor_init (&cbor);
1167     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1168     XPUSHs (scalar);
1169     }
1170    
1171     void decode_cbor (SV *cborstr)
1172     PPCODE:
1173     {
1174     CBOR cbor;
1175     cbor_init (&cbor);
1176     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1177     XPUSHs (cborstr);
1178     }
1179