ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.26
Committed: Fri Nov 22 09:40:13 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.25: +0 -5 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24    
25 root 1.9 // known tags
26 root 1.8 enum cbor_tag
27     {
28 root 1.9 // inofficial extensions (pending iana registration)
29 root 1.18 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30     CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 root 1.19 CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing
32 root 1.18 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33     CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34     CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35     CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
36 root 1.9
37     // rfc7049
38     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
39     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
40     CBOR_TAG_POS_BIGNUM = 2, // byte string
41     CBOR_TAG_NEG_BIGNUM = 3, // byte string
42     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
43     CBOR_TAG_BIGFLOAT = 5, // array
44    
45     CBOR_TAG_CONV_B64U = 21, // base64url, any
46     CBOR_TAG_CONV_B64 = 22, // base64, any
47     CBOR_TAG_CONV_HEX = 23, // base16, any
48     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
49    
50     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
51     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
52     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
53     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
54     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 root 1.8
56 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57 root 1.8 };
58    
59 root 1.24 #define F_SHRINK 0x00000001UL
60     #define F_ALLOW_UNKNOWN 0x00000002UL
61     #define F_ALLOW_SHARING 0x00000004UL //TODO
62     #define F_ALLOW_STRINGREF 0x00000008UL //TODO
63 root 1.1
64     #define INIT_SIZE 32 // initial scalar size to be allocated
65    
66     #define SB do {
67     #define SE } while (0)
68    
69     #define IN_RANGE_INC(type,val,beg,end) \
70     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
71     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
72    
73     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
74    
75     #ifdef USE_ITHREADS
76     # define CBOR_SLOW 1
77     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
78     #else
79     # define CBOR_SLOW 0
80     # define CBOR_STASH cbor_stash
81     #endif
82    
83 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
84 root 1.11 static SV *types_true, *types_false, *types_error, *sv_cbor;
85 root 1.1
86     typedef struct {
87     U32 flags;
88     U32 max_depth;
89     STRLEN max_size;
90     } CBOR;
91    
92 root 1.5 ecb_inline void
93 root 1.1 cbor_init (CBOR *cbor)
94     {
95     Zero (cbor, 1, CBOR);
96     cbor->max_depth = 512;
97     }
98    
99     /////////////////////////////////////////////////////////////////////////////
100     // utility functions
101    
102 root 1.5 ecb_inline SV *
103 root 1.1 get_bool (const char *name)
104     {
105     SV *sv = get_sv (name, 1);
106    
107     SvREADONLY_on (sv);
108     SvREADONLY_on (SvRV (sv));
109    
110     return sv;
111     }
112    
113 root 1.5 ecb_inline void
114 root 1.1 shrink (SV *sv)
115     {
116     sv_utf8_downgrade (sv, 1);
117    
118     if (SvLEN (sv) > SvCUR (sv) + 1)
119     {
120     #ifdef SvPV_shrink_to_cur
121     SvPV_shrink_to_cur (sv);
122     #elif defined (SvPV_renew)
123     SvPV_renew (sv, SvCUR (sv) + 1);
124     #endif
125     }
126     }
127    
128 root 1.21 // minimum length of a string to be registered for stringref
129     ecb_inline int
130     minimum_string_length (UV idx)
131     {
132     return idx > 23
133     ? idx > 0xffU
134     ? idx > 0xffffU
135     ? idx > 0xffffffffU
136     ? 7
137     : 6
138     : 5
139     : 4
140     : 3;
141     }
142    
143 root 1.1 /////////////////////////////////////////////////////////////////////////////
144     // encoder
145    
146     // structure used for encoding CBOR
147     typedef struct
148     {
149     char *cur; // SvPVX (sv) + current output position
150     char *end; // SvEND (sv)
151     SV *sv; // result scalar
152     CBOR cbor;
153     U32 depth; // recursion level
154 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
155     UV stringref_idx;
156 root 1.19 HV *shareable; // ptr => index, or 0
157     UV shareable_idx;
158 root 1.1 } enc_t;
159    
160 root 1.5 ecb_inline void
161 root 1.1 need (enc_t *enc, STRLEN len)
162     {
163 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
164 root 1.1 {
165     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
166     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
167     enc->cur = SvPVX (enc->sv) + cur;
168     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
169     }
170     }
171    
172 root 1.5 ecb_inline void
173 root 1.1 encode_ch (enc_t *enc, char ch)
174     {
175     need (enc, 1);
176     *enc->cur++ = ch;
177     }
178    
179     static void
180     encode_uint (enc_t *enc, int major, UV len)
181     {
182     need (enc, 9);
183    
184     if (len < 24)
185     *enc->cur++ = major | len;
186 root 1.4 else if (len <= 0xff)
187 root 1.1 {
188     *enc->cur++ = major | 24;
189     *enc->cur++ = len;
190     }
191 root 1.4 else if (len <= 0xffff)
192 root 1.1 {
193     *enc->cur++ = major | 25;
194     *enc->cur++ = len >> 8;
195     *enc->cur++ = len;
196     }
197 root 1.4 else if (len <= 0xffffffff)
198 root 1.1 {
199     *enc->cur++ = major | 26;
200     *enc->cur++ = len >> 24;
201     *enc->cur++ = len >> 16;
202     *enc->cur++ = len >> 8;
203     *enc->cur++ = len;
204     }
205 root 1.4 else
206 root 1.1 {
207     *enc->cur++ = major | 27;
208     *enc->cur++ = len >> 56;
209     *enc->cur++ = len >> 48;
210     *enc->cur++ = len >> 40;
211     *enc->cur++ = len >> 32;
212     *enc->cur++ = len >> 24;
213     *enc->cur++ = len >> 16;
214     *enc->cur++ = len >> 8;
215     *enc->cur++ = len;
216     }
217     }
218    
219 root 1.21 ecb_inline void
220     encode_tag (enc_t *enc, UV tag)
221     {
222     encode_uint (enc, 0xc0, tag);
223     }
224    
225 root 1.1 static void
226     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
227     {
228 root 1.24 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF))
229 root 1.21 {
230 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
231 root 1.21
232     if (SvOK (*svp))
233     {
234     // already registered, use stringref
235     encode_tag (enc, CBOR_TAG_STRINGREF);
236     encode_uint (enc, 0x00, SvUV (*svp));
237     return;
238     }
239     else if (len >= minimum_string_length (enc->stringref_idx))
240     {
241     // register only
242     sv_setuv (*svp, enc->stringref_idx);
243     ++enc->stringref_idx;
244     }
245     }
246    
247 root 1.1 encode_uint (enc, utf8 ? 0x60 : 0x40, len);
248     need (enc, len);
249     memcpy (enc->cur, str, len);
250     enc->cur += len;
251     }
252    
253     static void encode_sv (enc_t *enc, SV *sv);
254    
255     static void
256     encode_av (enc_t *enc, AV *av)
257     {
258     int i, len = av_len (av);
259    
260     if (enc->depth >= enc->cbor.max_depth)
261     croak (ERR_NESTING_EXCEEDED);
262    
263     ++enc->depth;
264    
265     encode_uint (enc, 0x80, len + 1);
266    
267     for (i = 0; i <= len; ++i)
268     {
269     SV **svp = av_fetch (av, i, 0);
270     encode_sv (enc, svp ? *svp : &PL_sv_undef);
271     }
272    
273     --enc->depth;
274     }
275    
276     static void
277     encode_hv (enc_t *enc, HV *hv)
278     {
279     HE *he;
280    
281     if (enc->depth >= enc->cbor.max_depth)
282     croak (ERR_NESTING_EXCEEDED);
283    
284     ++enc->depth;
285    
286     int pairs = hv_iterinit (hv);
287     int mg = SvMAGICAL (hv);
288    
289     if (mg)
290     encode_ch (enc, 0xa0 | 31);
291     else
292     encode_uint (enc, 0xa0, pairs);
293    
294     while ((he = hv_iternext (hv)))
295     {
296 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
297     encode_sv (enc, HeSVKEY (he));
298 root 1.1 else
299 root 1.21 encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
300 root 1.1
301 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
302 root 1.1 }
303    
304     if (mg)
305     encode_ch (enc, 0xe0 | 31);
306    
307     --enc->depth;
308     }
309    
310     // encode objects, arrays and special \0=false and \1=true values.
311     static void
312     encode_rv (enc_t *enc, SV *sv)
313     {
314 root 1.19 SvGETMAGIC (sv);
315 root 1.1
316 root 1.19 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
317     && ecb_expect_false (SvREFCNT (sv) > 1))
318     {
319     if (!enc->shareable)
320     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
321    
322     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
323    
324     if (SvOK (*svp))
325     {
326     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
327     encode_uint (enc, 0x00, SvUV (*svp));
328     return;
329     }
330     else
331     {
332     sv_setuv (*svp, enc->shareable_idx);
333     ++enc->shareable_idx;
334     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
335     }
336     }
337 root 1.1
338 root 1.19 svtype svt = SvTYPE (sv);
339 root 1.18
340 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
341 root 1.1 {
342 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
343     ? types_boolean_stash
344     : gv_stashpv ("Types::Serialiser::Boolean", 1);
345     HV *error_stash = !CBOR_SLOW || types_error_stash
346     ? types_error_stash
347     : gv_stashpv ("Types::Serialiser::Error", 1);
348 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
349     ? cbor_tagged_stash
350     : gv_stashpv ("CBOR::XS::Tagged" , 1);
351 root 1.1
352 root 1.11 HV *stash = SvSTASH (sv);
353     GV *method;
354    
355     if (stash == boolean_stash)
356 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
357 root 1.11 else if (stash == error_stash)
358 root 1.10 encode_ch (enc, 0xe0 | 23);
359 root 1.11 else if (stash == tagged_stash)
360 root 1.6 {
361     if (svt != SVt_PVAV)
362     croak ("encountered CBOR::XS::Tagged object that isn't an array");
363    
364     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
365     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
366     }
367 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
368 root 1.1 {
369 root 1.11 dSP;
370    
371     ENTER; SAVETMPS; PUSHMARK (SP);
372 root 1.6 // we re-bless the reference to get overload and other niceties right
373 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
374    
375     PUTBACK;
376     // G_SCALAR ensures that return value is 1
377     call_sv ((SV *)GvCV (method), G_SCALAR);
378     SPAGAIN;
379    
380     // catch this surprisingly common error
381     if (SvROK (TOPs) && SvRV (TOPs) == sv)
382     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
383    
384     encode_sv (enc, POPs);
385    
386     PUTBACK;
387    
388     FREETMPS; LEAVE;
389     }
390     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
391     {
392     dSP;
393 root 1.6
394 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
395     EXTEND (SP, 2);
396     // we re-bless the reference to get overload and other niceties right
397     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
398     PUSHs (sv_cbor);
399 root 1.1
400 root 1.11 PUTBACK;
401     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
402     SPAGAIN;
403 root 1.6
404 root 1.11 // catch this surprisingly common error
405     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
406     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
407 root 1.6
408 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
409 root 1.11 encode_uint (enc, 0x80, count + 1);
410     encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
411 root 1.6
412 root 1.11 while (count)
413     encode_sv (enc, SP[1 - count--]);
414 root 1.6
415 root 1.11 PUTBACK;
416 root 1.6
417 root 1.11 FREETMPS; LEAVE;
418 root 1.1 }
419 root 1.11 else
420     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
421     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
422 root 1.1 }
423     else if (svt == SVt_PVHV)
424     encode_hv (enc, (HV *)sv);
425     else if (svt == SVt_PVAV)
426     encode_av (enc, (AV *)sv);
427 root 1.18 else
428 root 1.1 {
429 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
430     encode_sv (enc, sv);
431 root 1.1 }
432     }
433    
434     static void
435     encode_nv (enc_t *enc, SV *sv)
436     {
437     double nv = SvNVX (sv);
438    
439     need (enc, 9);
440    
441 root 1.5 if (ecb_expect_false (nv == (U32)nv))
442 root 1.1 encode_uint (enc, 0x00, (U32)nv);
443     //TODO: maybe I32?
444 root 1.5 else if (ecb_expect_false (nv == (float)nv))
445 root 1.1 {
446     uint32_t fp = ecb_float_to_binary32 (nv);
447    
448     *enc->cur++ = 0xe0 | 26;
449    
450     if (!ecb_big_endian ())
451     fp = ecb_bswap32 (fp);
452    
453     memcpy (enc->cur, &fp, 4);
454     enc->cur += 4;
455     }
456     else
457     {
458     uint64_t fp = ecb_double_to_binary64 (nv);
459    
460     *enc->cur++ = 0xe0 | 27;
461    
462     if (!ecb_big_endian ())
463     fp = ecb_bswap64 (fp);
464    
465     memcpy (enc->cur, &fp, 8);
466     enc->cur += 8;
467     }
468     }
469    
470     static void
471     encode_sv (enc_t *enc, SV *sv)
472     {
473     SvGETMAGIC (sv);
474    
475     if (SvPOKp (sv))
476     {
477     STRLEN len;
478     char *str = SvPV (sv, len);
479     encode_str (enc, SvUTF8 (sv), str, len);
480     }
481     else if (SvNOKp (sv))
482     encode_nv (enc, sv);
483     else if (SvIOKp (sv))
484     {
485     if (SvIsUV (sv))
486     encode_uint (enc, 0x00, SvUVX (sv));
487     else if (SvIVX (sv) >= 0)
488     encode_uint (enc, 0x00, SvIVX (sv));
489     else
490     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
491     }
492     else if (SvROK (sv))
493     encode_rv (enc, SvRV (sv));
494     else if (!SvOK (sv))
495     encode_ch (enc, 0xe0 | 22);
496     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
497     encode_ch (enc, 0xe0 | 23);
498     else
499     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
500     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
501     }
502    
503     static SV *
504     encode_cbor (SV *scalar, CBOR *cbor)
505     {
506 root 1.18 enc_t enc = { };
507 root 1.1
508     enc.cbor = *cbor;
509     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
510     enc.cur = SvPVX (enc.sv);
511     enc.end = SvEND (enc.sv);
512    
513     SvPOK_only (enc.sv);
514 root 1.20
515 root 1.24 if (cbor->flags & F_ALLOW_STRINGREF)
516 root 1.20 {
517     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
518     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
519     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
520     }
521    
522 root 1.1 encode_sv (&enc, scalar);
523    
524     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
525     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
526    
527     if (enc.cbor.flags & F_SHRINK)
528     shrink (enc.sv);
529    
530     return enc.sv;
531     }
532    
533     /////////////////////////////////////////////////////////////////////////////
534     // decoder
535    
536     // structure used for decoding CBOR
537     typedef struct
538     {
539     U8 *cur; // current parser pointer
540     U8 *end; // end of input string
541     const char *err; // parse error, if != 0
542     CBOR cbor;
543     U32 depth; // recursion depth
544     U32 maxdepth; // recursion depth limit
545 root 1.19 AV *shareable;
546 root 1.20 AV *stringref;
547 root 1.1 } dec_t;
548    
549     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
550    
551 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
552 root 1.1
553     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
554     #define DEC_DEC_DEPTH --dec->depth
555    
556     static UV
557     decode_uint (dec_t *dec)
558     {
559     switch (*dec->cur & 31)
560     {
561     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
562     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
563     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
564     return *dec->cur++ & 31;
565    
566     case 24:
567     WANT (2);
568     dec->cur += 2;
569     return dec->cur[-1];
570    
571     case 25:
572     WANT (3);
573     dec->cur += 3;
574     return (((UV)dec->cur[-2]) << 8)
575     | ((UV)dec->cur[-1]);
576    
577     case 26:
578     WANT (5);
579     dec->cur += 5;
580     return (((UV)dec->cur[-4]) << 24)
581     | (((UV)dec->cur[-3]) << 16)
582     | (((UV)dec->cur[-2]) << 8)
583     | ((UV)dec->cur[-1]);
584    
585     case 27:
586     WANT (9);
587     dec->cur += 9;
588     return (((UV)dec->cur[-8]) << 56)
589     | (((UV)dec->cur[-7]) << 48)
590     | (((UV)dec->cur[-6]) << 40)
591     | (((UV)dec->cur[-5]) << 32)
592     | (((UV)dec->cur[-4]) << 24)
593     | (((UV)dec->cur[-3]) << 16)
594     | (((UV)dec->cur[-2]) << 8)
595     | ((UV)dec->cur[-1]);
596    
597     default:
598     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
599     }
600    
601     fail:
602     return 0;
603     }
604    
605     static SV *decode_sv (dec_t *dec);
606    
607     static SV *
608     decode_av (dec_t *dec)
609     {
610     AV *av = newAV ();
611    
612     DEC_INC_DEPTH;
613    
614     if ((*dec->cur & 31) == 31)
615     {
616     ++dec->cur;
617    
618     for (;;)
619     {
620     WANT (1);
621    
622 root 1.2 if (*dec->cur == (0xe0 | 31))
623 root 1.1 {
624     ++dec->cur;
625     break;
626     }
627    
628     av_push (av, decode_sv (dec));
629     }
630     }
631     else
632     {
633     int i, len = decode_uint (dec);
634    
635     av_fill (av, len - 1);
636    
637     for (i = 0; i < len; ++i)
638     AvARRAY (av)[i] = decode_sv (dec);
639     }
640    
641     DEC_DEC_DEPTH;
642     return newRV_noinc ((SV *)av);
643    
644     fail:
645     SvREFCNT_dec (av);
646     DEC_DEC_DEPTH;
647     return &PL_sv_undef;
648     }
649    
650 root 1.16 static void
651     decode_he (dec_t *dec, HV *hv)
652     {
653     // for speed reasons, we specialcase single-string
654 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
655    
656 root 1.23 if (ecb_expect_true (!dec->stringref))
657 root 1.21 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
658     {
659     I32 len = decode_uint (dec);
660     char *key = (char *)dec->cur;
661 root 1.16
662 root 1.21 dec->cur += len;
663    
664     if (ecb_expect_false (dec->stringref))
665     av_push (dec->stringref, newSVpvn (key, len));
666    
667     hv_store (hv, key, len, decode_sv (dec), 0);
668 root 1.16
669 root 1.21 return;
670     }
671     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
672     {
673     I32 len = decode_uint (dec);
674     char *key = (char *)dec->cur;
675 root 1.16
676 root 1.21 dec->cur += len;
677 root 1.20
678 root 1.21 if (ecb_expect_false (dec->stringref))
679     av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
680 root 1.16
681 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
682 root 1.16
683 root 1.21 return;
684     }
685 root 1.20
686 root 1.21 SV *k = decode_sv (dec);
687     SV *v = decode_sv (dec);
688 root 1.16
689 root 1.21 hv_store_ent (hv, k, v, 0);
690     SvREFCNT_dec (k);
691 root 1.16 }
692    
693 root 1.1 static SV *
694     decode_hv (dec_t *dec)
695     {
696     HV *hv = newHV ();
697    
698     DEC_INC_DEPTH;
699    
700     if ((*dec->cur & 31) == 31)
701     {
702     ++dec->cur;
703    
704     for (;;)
705     {
706     WANT (1);
707    
708 root 1.2 if (*dec->cur == (0xe0 | 31))
709 root 1.1 {
710     ++dec->cur;
711     break;
712     }
713    
714 root 1.16 decode_he (dec, hv);
715 root 1.1 }
716     }
717     else
718     {
719 root 1.16 int pairs = decode_uint (dec);
720 root 1.1
721 root 1.16 while (pairs--)
722     decode_he (dec, hv);
723 root 1.1 }
724    
725     DEC_DEC_DEPTH;
726     return newRV_noinc ((SV *)hv);
727    
728     fail:
729     SvREFCNT_dec (hv);
730     DEC_DEC_DEPTH;
731     return &PL_sv_undef;
732     }
733    
734     static SV *
735     decode_str (dec_t *dec, int utf8)
736     {
737 root 1.6 SV *sv = 0;
738 root 1.1
739     if ((*dec->cur & 31) == 31)
740     {
741     ++dec->cur;
742    
743     sv = newSVpvn ("", 0);
744    
745     // not very fast, and certainly not robust against illegal input
746     for (;;)
747     {
748     WANT (1);
749    
750 root 1.2 if (*dec->cur == (0xe0 | 31))
751 root 1.1 {
752     ++dec->cur;
753     break;
754     }
755    
756 root 1.6 sv_catsv (sv, decode_sv (dec));
757 root 1.1 }
758     }
759     else
760     {
761     STRLEN len = decode_uint (dec);
762    
763     WANT (len);
764     sv = newSVpvn (dec->cur, len);
765     dec->cur += len;
766 root 1.25
767     if (ecb_expect_false (dec->stringref)
768     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
769     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
770 root 1.1 }
771    
772     if (utf8)
773     SvUTF8_on (sv);
774    
775     return sv;
776    
777     fail:
778 root 1.6 SvREFCNT_dec (sv);
779 root 1.1 return &PL_sv_undef;
780     }
781    
782     static SV *
783 root 1.3 decode_tagged (dec_t *dec)
784     {
785 root 1.19 SV *sv = 0;
786 root 1.3 UV tag = decode_uint (dec);
787 root 1.19
788     WANT (1);
789 root 1.3
790 root 1.18 switch (tag)
791     {
792     case CBOR_TAG_MAGIC:
793 root 1.20 sv = decode_sv (dec);
794     break;
795 root 1.18
796     case CBOR_TAG_INDIRECTION:
797 root 1.20 sv = newRV_noinc (decode_sv (dec));
798     break;
799    
800     case CBOR_TAG_STRINGREF_NAMESPACE:
801     {
802     ENTER; SAVETMPS;
803    
804     SAVESPTR (dec->stringref);
805     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
806    
807     sv = decode_sv (dec);
808    
809     FREETMPS; LEAVE;
810     }
811     break;
812    
813     case CBOR_TAG_STRINGREF:
814     {
815     if ((*dec->cur >> 5) != 0)
816     ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
817    
818     UV idx = decode_uint (dec);
819    
820     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
821     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
822    
823     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
824     }
825     break;
826 root 1.11
827 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
828     {
829     if (ecb_expect_false (!dec->shareable))
830     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
831    
832     sv = newSV (0);
833     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
834 root 1.18
835 root 1.19 SV *osv = decode_sv (dec);
836     sv_setsv (sv, osv);
837     SvREFCNT_dec_NN (osv);
838     }
839 root 1.20 break;
840 root 1.18
841     case CBOR_TAG_VALUE_SHAREDREF:
842 root 1.17 {
843 root 1.19 if ((*dec->cur >> 5) != 0)
844     ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
845 root 1.18
846 root 1.19 UV idx = decode_uint (dec);
847    
848 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
849 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
850    
851 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
852 root 1.17 }
853 root 1.20 break;
854 root 1.17
855 root 1.18 case CBOR_TAG_PERL_OBJECT:
856     {
857 root 1.19 sv = decode_sv (dec);
858    
859 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
860     ERR ("corrupted CBOR data (non-array perl object)");
861    
862     AV *av = (AV *)SvRV (sv);
863     int len = av_len (av) + 1;
864     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
865    
866     if (!stash)
867     ERR ("cannot decode perl-object (package does not exist)");
868    
869     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
870    
871     if (!method)
872     ERR ("cannot decode perl-object (package does not have a THAW method)");
873    
874     dSP;
875    
876     ENTER; SAVETMPS; PUSHMARK (SP);
877     EXTEND (SP, len + 1);
878     // we re-bless the reference to get overload and other niceties right
879     PUSHs (*av_fetch (av, 0, 1));
880     PUSHs (sv_cbor);
881    
882     int i;
883    
884     for (i = 1; i < len; ++i)
885     PUSHs (*av_fetch (av, i, 1));
886    
887     PUTBACK;
888     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
889     SPAGAIN;
890    
891     if (SvTRUE (ERRSV))
892     {
893     FREETMPS; LEAVE;
894     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
895     }
896    
897     SvREFCNT_dec (sv);
898     sv = SvREFCNT_inc (POPs);
899 root 1.11
900 root 1.18 PUTBACK;
901 root 1.11
902 root 1.18 FREETMPS; LEAVE;
903     }
904 root 1.20 break;
905 root 1.9
906 root 1.18 default:
907     {
908 root 1.19 sv = decode_sv (dec);
909    
910 root 1.18 AV *av = newAV ();
911     av_push (av, newSVuv (tag));
912     av_push (av, sv);
913    
914     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
915     ? cbor_tagged_stash
916     : gv_stashpv ("CBOR::XS::Tagged" , 1);
917 root 1.7
918 root 1.20 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
919 root 1.18 }
920 root 1.20 break;
921 root 1.11 }
922 root 1.9
923 root 1.20 return sv;
924    
925 root 1.9 fail:
926     SvREFCNT_dec (sv);
927     return &PL_sv_undef;
928 root 1.3 }
929    
930     static SV *
931 root 1.1 decode_sv (dec_t *dec)
932     {
933     WANT (1);
934    
935     switch (*dec->cur >> 5)
936     {
937     case 0: // unsigned int
938     return newSVuv (decode_uint (dec));
939     case 1: // negative int
940     return newSViv (-1 - (IV)decode_uint (dec));
941     case 2: // octet string
942     return decode_str (dec, 0);
943     case 3: // utf-8 string
944     return decode_str (dec, 1);
945     case 4: // array
946     return decode_av (dec);
947     case 5: // map
948     return decode_hv (dec);
949     case 6: // tag
950 root 1.3 return decode_tagged (dec);
951 root 1.1 case 7: // misc
952     switch (*dec->cur++ & 31)
953     {
954     case 20:
955     #if CBOR_SLOW
956 root 1.10 types_false = get_bool ("Types::Serialiser::false");
957 root 1.1 #endif
958 root 1.10 return newSVsv (types_false);
959 root 1.1 case 21:
960     #if CBOR_SLOW
961 root 1.10 types_true = get_bool ("Types::Serialiser::true");
962 root 1.1 #endif
963 root 1.10 return newSVsv (types_true);
964 root 1.1 case 22:
965     return newSVsv (&PL_sv_undef);
966 root 1.10 case 23:
967     #if CBOR_SLOW
968     types_error = get_bool ("Types::Serialiser::error");
969     #endif
970     return newSVsv (types_error);
971 root 1.1
972     case 25:
973 root 1.2 {
974     WANT (2);
975    
976     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
977     dec->cur += 2;
978    
979     return newSVnv (ecb_binary16_to_float (fp));
980     }
981 root 1.1
982     case 26:
983     {
984     uint32_t fp;
985     WANT (4);
986     memcpy (&fp, dec->cur, 4);
987     dec->cur += 4;
988    
989     if (!ecb_big_endian ())
990     fp = ecb_bswap32 (fp);
991    
992     return newSVnv (ecb_binary32_to_float (fp));
993     }
994    
995     case 27:
996     {
997     uint64_t fp;
998     WANT (8);
999     memcpy (&fp, dec->cur, 8);
1000     dec->cur += 8;
1001    
1002     if (!ecb_big_endian ())
1003     fp = ecb_bswap64 (fp);
1004    
1005     return newSVnv (ecb_binary64_to_double (fp));
1006     }
1007    
1008     // 0..19 unassigned
1009     // 24 reserved + unassigned (reserved values are not encodable)
1010     default:
1011     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
1012     }
1013    
1014     break;
1015     }
1016    
1017     fail:
1018     return &PL_sv_undef;
1019     }
1020    
1021     static SV *
1022     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1023     {
1024 root 1.18 dec_t dec = { };
1025 root 1.1 SV *sv;
1026 root 1.16 STRLEN len;
1027     char *data = SvPVbyte (string, len);
1028 root 1.1
1029 root 1.16 if (len > cbor->max_size && cbor->max_size)
1030     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1031     (unsigned long)len, (unsigned long)cbor->max_size);
1032 root 1.1
1033     dec.cbor = *cbor;
1034 root 1.16 dec.cur = (U8 *)data;
1035     dec.end = (U8 *)data + len;
1036 root 1.1
1037     sv = decode_sv (&dec);
1038    
1039     if (offset_return)
1040     *offset_return = dec.cur;
1041    
1042     if (!(offset_return || !sv))
1043 root 1.2 if (dec.cur != dec.end && !dec.err)
1044     dec.err = "garbage after CBOR object";
1045    
1046     if (dec.err)
1047 root 1.1 {
1048 root 1.2 SvREFCNT_dec (sv);
1049 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1050 root 1.1 }
1051    
1052     sv = sv_2mortal (sv);
1053    
1054     return sv;
1055     }
1056    
1057     /////////////////////////////////////////////////////////////////////////////
1058     // XS interface functions
1059    
1060     MODULE = CBOR::XS PACKAGE = CBOR::XS
1061    
1062     BOOT:
1063     {
1064     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1065 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1066 root 1.1
1067 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1068     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1069    
1070     types_true = get_bool ("Types::Serialiser::true" );
1071     types_false = get_bool ("Types::Serialiser::false");
1072     types_error = get_bool ("Types::Serialiser::error");
1073 root 1.11
1074     sv_cbor = newSVpv ("CBOR", 0);
1075     SvREADONLY_on (sv_cbor);
1076 root 1.1 }
1077    
1078     PROTOTYPES: DISABLE
1079    
1080     void CLONE (...)
1081     CODE:
1082 root 1.10 cbor_stash = 0;
1083     cbor_tagged_stash = 0;
1084     types_error_stash = 0;
1085     types_boolean_stash = 0;
1086 root 1.1
1087     void new (char *klass)
1088     PPCODE:
1089     {
1090     SV *pv = NEWSV (0, sizeof (CBOR));
1091     SvPOK_only (pv);
1092     cbor_init ((CBOR *)SvPVX (pv));
1093     XPUSHs (sv_2mortal (sv_bless (
1094     newRV_noinc (pv),
1095     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1096     )));
1097     }
1098    
1099     void shrink (CBOR *self, int enable = 1)
1100     ALIAS:
1101     shrink = F_SHRINK
1102     allow_unknown = F_ALLOW_UNKNOWN
1103 root 1.18 allow_sharing = F_ALLOW_SHARING
1104 root 1.24 allow_stringref = F_ALLOW_STRINGREF
1105 root 1.1 PPCODE:
1106     {
1107     if (enable)
1108     self->flags |= ix;
1109     else
1110     self->flags &= ~ix;
1111    
1112     XPUSHs (ST (0));
1113     }
1114    
1115     void get_shrink (CBOR *self)
1116     ALIAS:
1117     get_shrink = F_SHRINK
1118     get_allow_unknown = F_ALLOW_UNKNOWN
1119 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1120 root 1.24 get_allow_stringref = F_ALLOW_STRINGREF
1121 root 1.1 PPCODE:
1122     XPUSHs (boolSV (self->flags & ix));
1123    
1124     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1125     PPCODE:
1126     self->max_depth = max_depth;
1127     XPUSHs (ST (0));
1128    
1129     U32 get_max_depth (CBOR *self)
1130     CODE:
1131     RETVAL = self->max_depth;
1132     OUTPUT:
1133     RETVAL
1134    
1135     void max_size (CBOR *self, U32 max_size = 0)
1136     PPCODE:
1137     self->max_size = max_size;
1138     XPUSHs (ST (0));
1139    
1140     int get_max_size (CBOR *self)
1141     CODE:
1142     RETVAL = self->max_size;
1143     OUTPUT:
1144     RETVAL
1145    
1146     void encode (CBOR *self, SV *scalar)
1147     PPCODE:
1148     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1149     XPUSHs (scalar);
1150    
1151     void decode (CBOR *self, SV *cborstr)
1152     PPCODE:
1153     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1154     XPUSHs (cborstr);
1155    
1156     void decode_prefix (CBOR *self, SV *cborstr)
1157     PPCODE:
1158     {
1159     SV *sv;
1160     char *offset;
1161     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1162     EXTEND (SP, 2);
1163     PUSHs (sv);
1164     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1165     }
1166    
1167     PROTOTYPES: ENABLE
1168    
1169     void encode_cbor (SV *scalar)
1170     PPCODE:
1171     {
1172     CBOR cbor;
1173     cbor_init (&cbor);
1174     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1175     XPUSHs (scalar);
1176     }
1177    
1178     void decode_cbor (SV *cborstr)
1179     PPCODE:
1180     {
1181     CBOR cbor;
1182     cbor_init (&cbor);
1183     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1184     XPUSHs (cborstr);
1185     }
1186