ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.30
Committed: Thu Nov 28 11:36:53 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.29: +14 -8 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24 root 1.28 #ifndef SvREFCNT_dec_NN
25     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
26     #endif
27 root 1.14
28 root 1.9 // known tags
29 root 1.8 enum cbor_tag
30     {
31 root 1.29 // extensions
32     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
33     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
34     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
35     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
36     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
37     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
38 root 1.18 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
39 root 1.9
40     // rfc7049
41     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
42     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
43     CBOR_TAG_POS_BIGNUM = 2, // byte string
44     CBOR_TAG_NEG_BIGNUM = 3, // byte string
45     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
46     CBOR_TAG_BIGFLOAT = 5, // array
47    
48     CBOR_TAG_CONV_B64U = 21, // base64url, any
49     CBOR_TAG_CONV_B64 = 22, // base64, any
50     CBOR_TAG_CONV_HEX = 23, // base16, any
51     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
52    
53     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
54     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
55     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
56     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
57     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
58 root 1.8
59 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
60 root 1.8 };
61    
62 root 1.24 #define F_SHRINK 0x00000001UL
63     #define F_ALLOW_UNKNOWN 0x00000002UL
64     #define F_ALLOW_SHARING 0x00000004UL //TODO
65     #define F_ALLOW_STRINGREF 0x00000008UL //TODO
66 root 1.1
67     #define INIT_SIZE 32 // initial scalar size to be allocated
68    
69     #define SB do {
70     #define SE } while (0)
71    
72     #define IN_RANGE_INC(type,val,beg,end) \
73     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
74     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
75    
76     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
77    
78     #ifdef USE_ITHREADS
79     # define CBOR_SLOW 1
80     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
81     #else
82     # define CBOR_SLOW 0
83     # define CBOR_STASH cbor_stash
84     #endif
85    
86 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
87 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
88 root 1.1
89     typedef struct {
90     U32 flags;
91     U32 max_depth;
92     STRLEN max_size;
93 root 1.27 SV *filter;
94 root 1.1 } CBOR;
95    
96 root 1.5 ecb_inline void
97 root 1.1 cbor_init (CBOR *cbor)
98     {
99     Zero (cbor, 1, CBOR);
100     cbor->max_depth = 512;
101     }
102    
103 root 1.27 ecb_inline void
104     cbor_free (CBOR *cbor)
105     {
106     SvREFCNT_dec (cbor->filter);
107     }
108    
109 root 1.1 /////////////////////////////////////////////////////////////////////////////
110     // utility functions
111    
112 root 1.5 ecb_inline SV *
113 root 1.1 get_bool (const char *name)
114     {
115     SV *sv = get_sv (name, 1);
116    
117     SvREADONLY_on (sv);
118     SvREADONLY_on (SvRV (sv));
119    
120     return sv;
121     }
122    
123 root 1.5 ecb_inline void
124 root 1.1 shrink (SV *sv)
125     {
126     sv_utf8_downgrade (sv, 1);
127    
128     if (SvLEN (sv) > SvCUR (sv) + 1)
129     {
130     #ifdef SvPV_shrink_to_cur
131     SvPV_shrink_to_cur (sv);
132     #elif defined (SvPV_renew)
133     SvPV_renew (sv, SvCUR (sv) + 1);
134     #endif
135     }
136     }
137    
138 root 1.21 // minimum length of a string to be registered for stringref
139     ecb_inline int
140     minimum_string_length (UV idx)
141     {
142     return idx > 23
143     ? idx > 0xffU
144     ? idx > 0xffffU
145     ? idx > 0xffffffffU
146 root 1.29 ? 11
147     : 7
148 root 1.21 : 5
149     : 4
150     : 3;
151     }
152    
153 root 1.1 /////////////////////////////////////////////////////////////////////////////
154     // encoder
155    
156     // structure used for encoding CBOR
157     typedef struct
158     {
159     char *cur; // SvPVX (sv) + current output position
160     char *end; // SvEND (sv)
161     SV *sv; // result scalar
162     CBOR cbor;
163     U32 depth; // recursion level
164 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
165     UV stringref_idx;
166 root 1.19 HV *shareable; // ptr => index, or 0
167     UV shareable_idx;
168 root 1.1 } enc_t;
169    
170 root 1.5 ecb_inline void
171 root 1.1 need (enc_t *enc, STRLEN len)
172     {
173 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
174 root 1.1 {
175     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
176     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
177     enc->cur = SvPVX (enc->sv) + cur;
178     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
179     }
180     }
181    
182 root 1.5 ecb_inline void
183 root 1.1 encode_ch (enc_t *enc, char ch)
184     {
185     need (enc, 1);
186     *enc->cur++ = ch;
187     }
188    
189     static void
190     encode_uint (enc_t *enc, int major, UV len)
191     {
192     need (enc, 9);
193    
194     if (len < 24)
195     *enc->cur++ = major | len;
196 root 1.4 else if (len <= 0xff)
197 root 1.1 {
198     *enc->cur++ = major | 24;
199     *enc->cur++ = len;
200     }
201 root 1.4 else if (len <= 0xffff)
202 root 1.1 {
203     *enc->cur++ = major | 25;
204     *enc->cur++ = len >> 8;
205     *enc->cur++ = len;
206     }
207 root 1.4 else if (len <= 0xffffffff)
208 root 1.1 {
209     *enc->cur++ = major | 26;
210     *enc->cur++ = len >> 24;
211     *enc->cur++ = len >> 16;
212     *enc->cur++ = len >> 8;
213     *enc->cur++ = len;
214     }
215 root 1.4 else
216 root 1.1 {
217     *enc->cur++ = major | 27;
218     *enc->cur++ = len >> 56;
219     *enc->cur++ = len >> 48;
220     *enc->cur++ = len >> 40;
221     *enc->cur++ = len >> 32;
222     *enc->cur++ = len >> 24;
223     *enc->cur++ = len >> 16;
224     *enc->cur++ = len >> 8;
225     *enc->cur++ = len;
226     }
227     }
228    
229 root 1.21 ecb_inline void
230     encode_tag (enc_t *enc, UV tag)
231     {
232     encode_uint (enc, 0xc0, tag);
233     }
234    
235 root 1.30 ecb_inline void
236     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
237     {
238     encode_uint (enc, utf8 ? 0x60 : 0x40, len);
239     need (enc, len);
240     memcpy (enc->cur, str, len);
241     enc->cur += len;
242     }
243    
244 root 1.1 static void
245 root 1.30 encode_strref (enc_t *enc, int utf8, char *str, STRLEN len)
246 root 1.1 {
247 root 1.24 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF))
248 root 1.21 {
249 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
250 root 1.21
251     if (SvOK (*svp))
252     {
253     // already registered, use stringref
254     encode_tag (enc, CBOR_TAG_STRINGREF);
255     encode_uint (enc, 0x00, SvUV (*svp));
256     return;
257     }
258     else if (len >= minimum_string_length (enc->stringref_idx))
259     {
260     // register only
261     sv_setuv (*svp, enc->stringref_idx);
262     ++enc->stringref_idx;
263     }
264     }
265    
266 root 1.30 encode_str (enc, utf8, str, len);
267 root 1.1 }
268    
269     static void encode_sv (enc_t *enc, SV *sv);
270    
271     static void
272     encode_av (enc_t *enc, AV *av)
273     {
274     int i, len = av_len (av);
275    
276     if (enc->depth >= enc->cbor.max_depth)
277     croak (ERR_NESTING_EXCEEDED);
278    
279     ++enc->depth;
280    
281     encode_uint (enc, 0x80, len + 1);
282    
283     for (i = 0; i <= len; ++i)
284     {
285     SV **svp = av_fetch (av, i, 0);
286     encode_sv (enc, svp ? *svp : &PL_sv_undef);
287     }
288    
289     --enc->depth;
290     }
291    
292     static void
293     encode_hv (enc_t *enc, HV *hv)
294     {
295     HE *he;
296    
297     if (enc->depth >= enc->cbor.max_depth)
298     croak (ERR_NESTING_EXCEEDED);
299    
300     ++enc->depth;
301    
302     int pairs = hv_iterinit (hv);
303     int mg = SvMAGICAL (hv);
304    
305     if (mg)
306     encode_ch (enc, 0xa0 | 31);
307     else
308     encode_uint (enc, 0xa0, pairs);
309    
310     while ((he = hv_iternext (hv)))
311     {
312 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
313     encode_sv (enc, HeSVKEY (he));
314 root 1.1 else
315 root 1.30 encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
316 root 1.1
317 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
318 root 1.1 }
319    
320     if (mg)
321     encode_ch (enc, 0xe0 | 31);
322    
323     --enc->depth;
324     }
325    
326     // encode objects, arrays and special \0=false and \1=true values.
327     static void
328     encode_rv (enc_t *enc, SV *sv)
329     {
330 root 1.19 SvGETMAGIC (sv);
331 root 1.1
332 root 1.19 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
333     && ecb_expect_false (SvREFCNT (sv) > 1))
334     {
335     if (!enc->shareable)
336     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
337    
338     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
339    
340     if (SvOK (*svp))
341     {
342     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
343     encode_uint (enc, 0x00, SvUV (*svp));
344     return;
345     }
346     else
347     {
348     sv_setuv (*svp, enc->shareable_idx);
349     ++enc->shareable_idx;
350     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
351     }
352     }
353 root 1.1
354 root 1.19 svtype svt = SvTYPE (sv);
355 root 1.18
356 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
357 root 1.1 {
358 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
359     ? types_boolean_stash
360     : gv_stashpv ("Types::Serialiser::Boolean", 1);
361     HV *error_stash = !CBOR_SLOW || types_error_stash
362     ? types_error_stash
363     : gv_stashpv ("Types::Serialiser::Error", 1);
364 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
365     ? cbor_tagged_stash
366     : gv_stashpv ("CBOR::XS::Tagged" , 1);
367 root 1.1
368 root 1.11 HV *stash = SvSTASH (sv);
369     GV *method;
370    
371     if (stash == boolean_stash)
372 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
373 root 1.11 else if (stash == error_stash)
374 root 1.10 encode_ch (enc, 0xe0 | 23);
375 root 1.11 else if (stash == tagged_stash)
376 root 1.6 {
377     if (svt != SVt_PVAV)
378     croak ("encountered CBOR::XS::Tagged object that isn't an array");
379    
380     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
381     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
382     }
383 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
384 root 1.1 {
385 root 1.11 dSP;
386    
387     ENTER; SAVETMPS; PUSHMARK (SP);
388 root 1.6 // we re-bless the reference to get overload and other niceties right
389 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
390    
391     PUTBACK;
392     // G_SCALAR ensures that return value is 1
393     call_sv ((SV *)GvCV (method), G_SCALAR);
394     SPAGAIN;
395    
396     // catch this surprisingly common error
397     if (SvROK (TOPs) && SvRV (TOPs) == sv)
398     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
399    
400     encode_sv (enc, POPs);
401    
402     PUTBACK;
403    
404     FREETMPS; LEAVE;
405     }
406     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
407     {
408     dSP;
409 root 1.6
410 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
411     EXTEND (SP, 2);
412     // we re-bless the reference to get overload and other niceties right
413     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
414     PUSHs (sv_cbor);
415 root 1.1
416 root 1.11 PUTBACK;
417     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
418     SPAGAIN;
419 root 1.6
420 root 1.11 // catch this surprisingly common error
421     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
422     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
423 root 1.6
424 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
425 root 1.11 encode_uint (enc, 0x80, count + 1);
426 root 1.30 encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
427 root 1.6
428 root 1.11 while (count)
429     encode_sv (enc, SP[1 - count--]);
430 root 1.6
431 root 1.11 PUTBACK;
432 root 1.6
433 root 1.11 FREETMPS; LEAVE;
434 root 1.1 }
435 root 1.11 else
436     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
437     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
438 root 1.1 }
439     else if (svt == SVt_PVHV)
440     encode_hv (enc, (HV *)sv);
441     else if (svt == SVt_PVAV)
442     encode_av (enc, (AV *)sv);
443 root 1.18 else
444 root 1.1 {
445 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
446     encode_sv (enc, sv);
447 root 1.1 }
448     }
449    
450     static void
451     encode_nv (enc_t *enc, SV *sv)
452     {
453     double nv = SvNVX (sv);
454    
455     need (enc, 9);
456    
457 root 1.5 if (ecb_expect_false (nv == (U32)nv))
458 root 1.1 encode_uint (enc, 0x00, (U32)nv);
459     //TODO: maybe I32?
460 root 1.5 else if (ecb_expect_false (nv == (float)nv))
461 root 1.1 {
462     uint32_t fp = ecb_float_to_binary32 (nv);
463    
464     *enc->cur++ = 0xe0 | 26;
465    
466     if (!ecb_big_endian ())
467     fp = ecb_bswap32 (fp);
468    
469     memcpy (enc->cur, &fp, 4);
470     enc->cur += 4;
471     }
472     else
473     {
474     uint64_t fp = ecb_double_to_binary64 (nv);
475    
476     *enc->cur++ = 0xe0 | 27;
477    
478     if (!ecb_big_endian ())
479     fp = ecb_bswap64 (fp);
480    
481     memcpy (enc->cur, &fp, 8);
482     enc->cur += 8;
483     }
484     }
485    
486     static void
487     encode_sv (enc_t *enc, SV *sv)
488     {
489     SvGETMAGIC (sv);
490    
491     if (SvPOKp (sv))
492     {
493     STRLEN len;
494     char *str = SvPV (sv, len);
495 root 1.30 encode_strref (enc, SvUTF8 (sv), str, len);
496 root 1.1 }
497     else if (SvNOKp (sv))
498     encode_nv (enc, sv);
499     else if (SvIOKp (sv))
500     {
501     if (SvIsUV (sv))
502     encode_uint (enc, 0x00, SvUVX (sv));
503     else if (SvIVX (sv) >= 0)
504     encode_uint (enc, 0x00, SvIVX (sv));
505     else
506     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
507     }
508     else if (SvROK (sv))
509     encode_rv (enc, SvRV (sv));
510     else if (!SvOK (sv))
511     encode_ch (enc, 0xe0 | 22);
512     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
513     encode_ch (enc, 0xe0 | 23);
514     else
515     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
516     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
517     }
518    
519     static SV *
520     encode_cbor (SV *scalar, CBOR *cbor)
521     {
522 root 1.18 enc_t enc = { };
523 root 1.1
524     enc.cbor = *cbor;
525     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
526     enc.cur = SvPVX (enc.sv);
527     enc.end = SvEND (enc.sv);
528    
529     SvPOK_only (enc.sv);
530 root 1.20
531 root 1.24 if (cbor->flags & F_ALLOW_STRINGREF)
532 root 1.20 {
533     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
534     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
535     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
536     }
537    
538 root 1.1 encode_sv (&enc, scalar);
539    
540     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
541     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
542    
543     if (enc.cbor.flags & F_SHRINK)
544     shrink (enc.sv);
545    
546     return enc.sv;
547     }
548    
549     /////////////////////////////////////////////////////////////////////////////
550     // decoder
551    
552     // structure used for decoding CBOR
553     typedef struct
554     {
555     U8 *cur; // current parser pointer
556     U8 *end; // end of input string
557     const char *err; // parse error, if != 0
558     CBOR cbor;
559     U32 depth; // recursion depth
560     U32 maxdepth; // recursion depth limit
561 root 1.19 AV *shareable;
562 root 1.20 AV *stringref;
563 root 1.27 SV *decode_tagged;
564 root 1.1 } dec_t;
565    
566     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
567    
568 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
569 root 1.1
570     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
571     #define DEC_DEC_DEPTH --dec->depth
572    
573     static UV
574     decode_uint (dec_t *dec)
575     {
576     switch (*dec->cur & 31)
577     {
578     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
579     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
580     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
581     return *dec->cur++ & 31;
582    
583     case 24:
584     WANT (2);
585     dec->cur += 2;
586     return dec->cur[-1];
587    
588     case 25:
589     WANT (3);
590     dec->cur += 3;
591     return (((UV)dec->cur[-2]) << 8)
592     | ((UV)dec->cur[-1]);
593    
594     case 26:
595     WANT (5);
596     dec->cur += 5;
597     return (((UV)dec->cur[-4]) << 24)
598     | (((UV)dec->cur[-3]) << 16)
599     | (((UV)dec->cur[-2]) << 8)
600     | ((UV)dec->cur[-1]);
601    
602     case 27:
603     WANT (9);
604     dec->cur += 9;
605     return (((UV)dec->cur[-8]) << 56)
606     | (((UV)dec->cur[-7]) << 48)
607     | (((UV)dec->cur[-6]) << 40)
608     | (((UV)dec->cur[-5]) << 32)
609     | (((UV)dec->cur[-4]) << 24)
610     | (((UV)dec->cur[-3]) << 16)
611     | (((UV)dec->cur[-2]) << 8)
612     | ((UV)dec->cur[-1]);
613    
614     default:
615     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
616     }
617    
618     fail:
619     return 0;
620     }
621    
622     static SV *decode_sv (dec_t *dec);
623    
624     static SV *
625     decode_av (dec_t *dec)
626     {
627     AV *av = newAV ();
628    
629     DEC_INC_DEPTH;
630    
631     if ((*dec->cur & 31) == 31)
632     {
633     ++dec->cur;
634    
635     for (;;)
636     {
637     WANT (1);
638    
639 root 1.2 if (*dec->cur == (0xe0 | 31))
640 root 1.1 {
641     ++dec->cur;
642     break;
643     }
644    
645     av_push (av, decode_sv (dec));
646     }
647     }
648     else
649     {
650     int i, len = decode_uint (dec);
651    
652     av_fill (av, len - 1);
653    
654     for (i = 0; i < len; ++i)
655     AvARRAY (av)[i] = decode_sv (dec);
656     }
657    
658     DEC_DEC_DEPTH;
659     return newRV_noinc ((SV *)av);
660    
661     fail:
662     SvREFCNT_dec (av);
663     DEC_DEC_DEPTH;
664     return &PL_sv_undef;
665     }
666    
667 root 1.16 static void
668     decode_he (dec_t *dec, HV *hv)
669     {
670     // for speed reasons, we specialcase single-string
671 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
672    
673 root 1.23 if (ecb_expect_true (!dec->stringref))
674 root 1.21 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
675     {
676     I32 len = decode_uint (dec);
677     char *key = (char *)dec->cur;
678 root 1.16
679 root 1.21 dec->cur += len;
680    
681     if (ecb_expect_false (dec->stringref))
682     av_push (dec->stringref, newSVpvn (key, len));
683    
684     hv_store (hv, key, len, decode_sv (dec), 0);
685 root 1.16
686 root 1.21 return;
687     }
688     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
689     {
690     I32 len = decode_uint (dec);
691     char *key = (char *)dec->cur;
692 root 1.16
693 root 1.21 dec->cur += len;
694 root 1.20
695 root 1.21 if (ecb_expect_false (dec->stringref))
696     av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
697 root 1.16
698 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
699 root 1.16
700 root 1.21 return;
701     }
702 root 1.20
703 root 1.21 SV *k = decode_sv (dec);
704     SV *v = decode_sv (dec);
705 root 1.16
706 root 1.21 hv_store_ent (hv, k, v, 0);
707     SvREFCNT_dec (k);
708 root 1.16 }
709    
710 root 1.1 static SV *
711     decode_hv (dec_t *dec)
712     {
713     HV *hv = newHV ();
714    
715     DEC_INC_DEPTH;
716    
717     if ((*dec->cur & 31) == 31)
718     {
719     ++dec->cur;
720    
721     for (;;)
722     {
723     WANT (1);
724    
725 root 1.2 if (*dec->cur == (0xe0 | 31))
726 root 1.1 {
727     ++dec->cur;
728     break;
729     }
730    
731 root 1.16 decode_he (dec, hv);
732 root 1.1 }
733     }
734     else
735     {
736 root 1.16 int pairs = decode_uint (dec);
737 root 1.1
738 root 1.16 while (pairs--)
739     decode_he (dec, hv);
740 root 1.1 }
741    
742     DEC_DEC_DEPTH;
743     return newRV_noinc ((SV *)hv);
744    
745     fail:
746     SvREFCNT_dec (hv);
747     DEC_DEC_DEPTH;
748     return &PL_sv_undef;
749     }
750    
751     static SV *
752     decode_str (dec_t *dec, int utf8)
753     {
754 root 1.6 SV *sv = 0;
755 root 1.1
756     if ((*dec->cur & 31) == 31)
757     {
758     ++dec->cur;
759    
760     sv = newSVpvn ("", 0);
761    
762     // not very fast, and certainly not robust against illegal input
763     for (;;)
764     {
765     WANT (1);
766    
767 root 1.2 if (*dec->cur == (0xe0 | 31))
768 root 1.1 {
769     ++dec->cur;
770     break;
771     }
772    
773 root 1.6 sv_catsv (sv, decode_sv (dec));
774 root 1.1 }
775     }
776     else
777     {
778     STRLEN len = decode_uint (dec);
779    
780     WANT (len);
781     sv = newSVpvn (dec->cur, len);
782     dec->cur += len;
783 root 1.25
784     if (ecb_expect_false (dec->stringref)
785     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
786     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
787 root 1.1 }
788    
789     if (utf8)
790     SvUTF8_on (sv);
791    
792     return sv;
793    
794     fail:
795 root 1.6 SvREFCNT_dec (sv);
796 root 1.1 return &PL_sv_undef;
797     }
798    
799     static SV *
800 root 1.3 decode_tagged (dec_t *dec)
801     {
802 root 1.19 SV *sv = 0;
803 root 1.3 UV tag = decode_uint (dec);
804 root 1.19
805     WANT (1);
806 root 1.3
807 root 1.18 switch (tag)
808     {
809     case CBOR_TAG_MAGIC:
810 root 1.20 sv = decode_sv (dec);
811     break;
812 root 1.18
813     case CBOR_TAG_INDIRECTION:
814 root 1.20 sv = newRV_noinc (decode_sv (dec));
815     break;
816    
817     case CBOR_TAG_STRINGREF_NAMESPACE:
818     {
819     ENTER; SAVETMPS;
820    
821     SAVESPTR (dec->stringref);
822     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
823    
824     sv = decode_sv (dec);
825    
826     FREETMPS; LEAVE;
827     }
828     break;
829    
830     case CBOR_TAG_STRINGREF:
831     {
832     if ((*dec->cur >> 5) != 0)
833     ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
834    
835     UV idx = decode_uint (dec);
836    
837     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
838     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
839    
840     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
841     }
842     break;
843 root 1.11
844 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
845     {
846     if (ecb_expect_false (!dec->shareable))
847     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
848    
849     sv = newSV (0);
850     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
851 root 1.18
852 root 1.19 SV *osv = decode_sv (dec);
853     sv_setsv (sv, osv);
854     SvREFCNT_dec_NN (osv);
855     }
856 root 1.20 break;
857 root 1.18
858     case CBOR_TAG_VALUE_SHAREDREF:
859 root 1.17 {
860 root 1.19 if ((*dec->cur >> 5) != 0)
861     ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
862 root 1.18
863 root 1.19 UV idx = decode_uint (dec);
864    
865 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
866 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
867    
868 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
869 root 1.17 }
870 root 1.20 break;
871 root 1.17
872 root 1.18 case CBOR_TAG_PERL_OBJECT:
873     {
874 root 1.19 sv = decode_sv (dec);
875    
876 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
877     ERR ("corrupted CBOR data (non-array perl object)");
878    
879     AV *av = (AV *)SvRV (sv);
880     int len = av_len (av) + 1;
881     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
882    
883     if (!stash)
884     ERR ("cannot decode perl-object (package does not exist)");
885    
886     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
887    
888     if (!method)
889     ERR ("cannot decode perl-object (package does not have a THAW method)");
890    
891     dSP;
892    
893     ENTER; SAVETMPS; PUSHMARK (SP);
894     EXTEND (SP, len + 1);
895     // we re-bless the reference to get overload and other niceties right
896     PUSHs (*av_fetch (av, 0, 1));
897     PUSHs (sv_cbor);
898    
899     int i;
900    
901     for (i = 1; i < len; ++i)
902     PUSHs (*av_fetch (av, i, 1));
903    
904     PUTBACK;
905     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
906     SPAGAIN;
907    
908     if (SvTRUE (ERRSV))
909     {
910     FREETMPS; LEAVE;
911     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
912     }
913    
914     SvREFCNT_dec (sv);
915     sv = SvREFCNT_inc (POPs);
916 root 1.11
917 root 1.18 PUTBACK;
918 root 1.11
919 root 1.18 FREETMPS; LEAVE;
920     }
921 root 1.20 break;
922 root 1.9
923 root 1.18 default:
924     {
925 root 1.19 sv = decode_sv (dec);
926    
927 root 1.27 dSP;
928     ENTER; SAVETMPS; PUSHMARK (SP);
929     EXTEND (SP, 2);
930     PUSHs (newSVuv (tag));
931     PUSHs (sv);
932    
933     PUTBACK;
934     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
935     SPAGAIN;
936    
937     if (SvTRUE (ERRSV))
938     {
939     FREETMPS; LEAVE;
940     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
941     }
942    
943     if (count)
944     {
945     SvREFCNT_dec (sv);
946     sv = SvREFCNT_inc (POPs);
947     }
948     else
949     {
950     AV *av = newAV ();
951     av_push (av, newSVuv (tag));
952     av_push (av, sv);
953    
954     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
955     ? cbor_tagged_stash
956     : gv_stashpv ("CBOR::XS::Tagged" , 1);
957     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
958     }
959 root 1.7
960 root 1.27 PUTBACK;
961    
962     FREETMPS; LEAVE;
963 root 1.18 }
964 root 1.20 break;
965 root 1.11 }
966 root 1.9
967 root 1.20 return sv;
968    
969 root 1.9 fail:
970     SvREFCNT_dec (sv);
971     return &PL_sv_undef;
972 root 1.3 }
973    
974     static SV *
975 root 1.1 decode_sv (dec_t *dec)
976     {
977     WANT (1);
978    
979     switch (*dec->cur >> 5)
980     {
981     case 0: // unsigned int
982     return newSVuv (decode_uint (dec));
983     case 1: // negative int
984     return newSViv (-1 - (IV)decode_uint (dec));
985     case 2: // octet string
986     return decode_str (dec, 0);
987     case 3: // utf-8 string
988     return decode_str (dec, 1);
989     case 4: // array
990     return decode_av (dec);
991     case 5: // map
992     return decode_hv (dec);
993     case 6: // tag
994 root 1.3 return decode_tagged (dec);
995 root 1.1 case 7: // misc
996     switch (*dec->cur++ & 31)
997     {
998     case 20:
999     #if CBOR_SLOW
1000 root 1.10 types_false = get_bool ("Types::Serialiser::false");
1001 root 1.1 #endif
1002 root 1.10 return newSVsv (types_false);
1003 root 1.1 case 21:
1004     #if CBOR_SLOW
1005 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1006 root 1.1 #endif
1007 root 1.10 return newSVsv (types_true);
1008 root 1.1 case 22:
1009     return newSVsv (&PL_sv_undef);
1010 root 1.10 case 23:
1011     #if CBOR_SLOW
1012     types_error = get_bool ("Types::Serialiser::error");
1013     #endif
1014     return newSVsv (types_error);
1015 root 1.1
1016     case 25:
1017 root 1.2 {
1018     WANT (2);
1019    
1020     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1021     dec->cur += 2;
1022    
1023     return newSVnv (ecb_binary16_to_float (fp));
1024     }
1025 root 1.1
1026     case 26:
1027     {
1028     uint32_t fp;
1029     WANT (4);
1030     memcpy (&fp, dec->cur, 4);
1031     dec->cur += 4;
1032    
1033     if (!ecb_big_endian ())
1034     fp = ecb_bswap32 (fp);
1035    
1036     return newSVnv (ecb_binary32_to_float (fp));
1037     }
1038    
1039     case 27:
1040     {
1041     uint64_t fp;
1042     WANT (8);
1043     memcpy (&fp, dec->cur, 8);
1044     dec->cur += 8;
1045    
1046     if (!ecb_big_endian ())
1047     fp = ecb_bswap64 (fp);
1048    
1049     return newSVnv (ecb_binary64_to_double (fp));
1050     }
1051    
1052     // 0..19 unassigned
1053     // 24 reserved + unassigned (reserved values are not encodable)
1054     default:
1055     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
1056     }
1057    
1058     break;
1059     }
1060    
1061     fail:
1062     return &PL_sv_undef;
1063     }
1064    
1065     static SV *
1066     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1067     {
1068 root 1.18 dec_t dec = { };
1069 root 1.1 SV *sv;
1070 root 1.16 STRLEN len;
1071     char *data = SvPVbyte (string, len);
1072 root 1.1
1073 root 1.16 if (len > cbor->max_size && cbor->max_size)
1074     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1075     (unsigned long)len, (unsigned long)cbor->max_size);
1076 root 1.1
1077     dec.cbor = *cbor;
1078 root 1.16 dec.cur = (U8 *)data;
1079     dec.end = (U8 *)data + len;
1080 root 1.1
1081     sv = decode_sv (&dec);
1082    
1083     if (offset_return)
1084     *offset_return = dec.cur;
1085    
1086     if (!(offset_return || !sv))
1087 root 1.2 if (dec.cur != dec.end && !dec.err)
1088     dec.err = "garbage after CBOR object";
1089    
1090     if (dec.err)
1091 root 1.1 {
1092 root 1.2 SvREFCNT_dec (sv);
1093 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1094 root 1.1 }
1095    
1096     sv = sv_2mortal (sv);
1097    
1098     return sv;
1099     }
1100    
1101     /////////////////////////////////////////////////////////////////////////////
1102     // XS interface functions
1103    
1104     MODULE = CBOR::XS PACKAGE = CBOR::XS
1105    
1106     BOOT:
1107     {
1108     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1109 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1110 root 1.1
1111 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1112     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1113    
1114     types_true = get_bool ("Types::Serialiser::true" );
1115     types_false = get_bool ("Types::Serialiser::false");
1116     types_error = get_bool ("Types::Serialiser::error");
1117 root 1.11
1118 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1119    
1120 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1121     SvREADONLY_on (sv_cbor);
1122 root 1.1 }
1123    
1124     PROTOTYPES: DISABLE
1125    
1126     void CLONE (...)
1127     CODE:
1128 root 1.10 cbor_stash = 0;
1129     cbor_tagged_stash = 0;
1130     types_error_stash = 0;
1131     types_boolean_stash = 0;
1132 root 1.1
1133     void new (char *klass)
1134     PPCODE:
1135     {
1136     SV *pv = NEWSV (0, sizeof (CBOR));
1137     SvPOK_only (pv);
1138     cbor_init ((CBOR *)SvPVX (pv));
1139     XPUSHs (sv_2mortal (sv_bless (
1140     newRV_noinc (pv),
1141     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1142     )));
1143     }
1144    
1145     void shrink (CBOR *self, int enable = 1)
1146     ALIAS:
1147     shrink = F_SHRINK
1148     allow_unknown = F_ALLOW_UNKNOWN
1149 root 1.18 allow_sharing = F_ALLOW_SHARING
1150 root 1.24 allow_stringref = F_ALLOW_STRINGREF
1151 root 1.1 PPCODE:
1152     {
1153     if (enable)
1154     self->flags |= ix;
1155     else
1156     self->flags &= ~ix;
1157    
1158     XPUSHs (ST (0));
1159     }
1160    
1161     void get_shrink (CBOR *self)
1162     ALIAS:
1163     get_shrink = F_SHRINK
1164     get_allow_unknown = F_ALLOW_UNKNOWN
1165 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1166 root 1.24 get_allow_stringref = F_ALLOW_STRINGREF
1167 root 1.1 PPCODE:
1168     XPUSHs (boolSV (self->flags & ix));
1169    
1170     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1171     PPCODE:
1172     self->max_depth = max_depth;
1173     XPUSHs (ST (0));
1174    
1175     U32 get_max_depth (CBOR *self)
1176     CODE:
1177     RETVAL = self->max_depth;
1178     OUTPUT:
1179     RETVAL
1180    
1181     void max_size (CBOR *self, U32 max_size = 0)
1182     PPCODE:
1183     self->max_size = max_size;
1184     XPUSHs (ST (0));
1185    
1186     int get_max_size (CBOR *self)
1187     CODE:
1188     RETVAL = self->max_size;
1189     OUTPUT:
1190     RETVAL
1191    
1192 root 1.27 void filter (CBOR *self, SV *filter = 0)
1193     PPCODE:
1194     SvREFCNT_dec (self->filter);
1195     self->filter = filter ? newSVsv (filter) : filter;
1196     XPUSHs (ST (0));
1197    
1198     SV *get_filter (CBOR *self)
1199     CODE:
1200     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1201     OUTPUT:
1202     RETVAL
1203    
1204 root 1.1 void encode (CBOR *self, SV *scalar)
1205     PPCODE:
1206     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1207     XPUSHs (scalar);
1208    
1209     void decode (CBOR *self, SV *cborstr)
1210     PPCODE:
1211     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1212     XPUSHs (cborstr);
1213    
1214     void decode_prefix (CBOR *self, SV *cborstr)
1215     PPCODE:
1216     {
1217     SV *sv;
1218     char *offset;
1219     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1220     EXTEND (SP, 2);
1221     PUSHs (sv);
1222     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1223     }
1224    
1225 root 1.27 void DESTROY (CBOR *self)
1226     PPCODE:
1227     cbor_free (self);
1228    
1229 root 1.1 PROTOTYPES: ENABLE
1230    
1231     void encode_cbor (SV *scalar)
1232     PPCODE:
1233     {
1234     CBOR cbor;
1235     cbor_init (&cbor);
1236     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1237     XPUSHs (scalar);
1238     }
1239    
1240     void decode_cbor (SV *cborstr)
1241     PPCODE:
1242     {
1243     CBOR cbor;
1244     cbor_init (&cbor);
1245     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1246     XPUSHs (cborstr);
1247     }
1248