ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
Revision: 1.29
Committed: Thu Nov 28 09:13:12 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.28: +9 -9 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include <assert.h>
6     #include <string.h>
7     #include <stdlib.h>
8     #include <stdio.h>
9     #include <limits.h>
10     #include <float.h>
11    
12     #include "ecb.h"
13    
14 root 1.14 // compatibility with perl <5.18
15     #ifndef HvNAMELEN_get
16     # define HvNAMELEN_get(hv) strlen (HvNAME (hv))
17     #endif
18     #ifndef HvNAMELEN
19     # define HvNAMELEN(hv) HvNAMELEN_get (hv)
20     #endif
21     #ifndef HvNAMEUTF8
22     # define HvNAMEUTF8(hv) 0
23     #endif
24 root 1.28 #ifndef SvREFCNT_dec_NN
25     # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
26     #endif
27 root 1.14
28 root 1.9 // known tags
29 root 1.8 enum cbor_tag
30     {
31 root 1.29 // extensions
32     CBOR_TAG_STRINGREF = 25, // http://cbor.schmorp.de/stringref
33     CBOR_TAG_PERL_OBJECT = 26, // http://cbor.schmorp.de/perl-object
34     CBOR_TAG_GENERIC_OBJECT = 27, // http://cbor.schmorp.de/generic-object
35     CBOR_TAG_VALUE_SHAREABLE = 28, // http://cbor.schmorp.de/value-sharing
36     CBOR_TAG_VALUE_SHAREDREF = 29, // http://cbor.schmorp.de/value-sharing
37     CBOR_TAG_STRINGREF_NAMESPACE = 256, // http://cbor.schmorp.de/stringref
38 root 1.18 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
39 root 1.9
40     // rfc7049
41     CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
42     CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
43     CBOR_TAG_POS_BIGNUM = 2, // byte string
44     CBOR_TAG_NEG_BIGNUM = 3, // byte string
45     CBOR_TAG_DECIMAL = 4, // decimal fraction, array
46     CBOR_TAG_BIGFLOAT = 5, // array
47    
48     CBOR_TAG_CONV_B64U = 21, // base64url, any
49     CBOR_TAG_CONV_B64 = 22, // base64, any
50     CBOR_TAG_CONV_HEX = 23, // base16, any
51     CBOR_TAG_CBOR = 24, // embedded cbor, byte string
52    
53     CBOR_TAG_URI = 32, // URI rfc3986, utf-8
54     CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
55     CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
56     CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
57     CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
58 root 1.8
59 root 1.9 CBOR_TAG_MAGIC = 55799 // self-describe cbor
60 root 1.8 };
61    
62 root 1.24 #define F_SHRINK 0x00000001UL
63     #define F_ALLOW_UNKNOWN 0x00000002UL
64     #define F_ALLOW_SHARING 0x00000004UL //TODO
65     #define F_ALLOW_STRINGREF 0x00000008UL //TODO
66 root 1.1
67     #define INIT_SIZE 32 // initial scalar size to be allocated
68    
69     #define SB do {
70     #define SE } while (0)
71    
72     #define IN_RANGE_INC(type,val,beg,end) \
73     ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
74     <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
75    
76     #define ERR_NESTING_EXCEEDED "cbor text or perl structure exceeds maximum nesting level (max_depth set too low?)"
77    
78     #ifdef USE_ITHREADS
79     # define CBOR_SLOW 1
80     # define CBOR_STASH (cbor_stash ? cbor_stash : gv_stashpv ("CBOR::XS", 1))
81     #else
82     # define CBOR_SLOW 0
83     # define CBOR_STASH cbor_stash
84     #endif
85    
86 root 1.10 static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
87 root 1.27 static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
88 root 1.1
89     typedef struct {
90     U32 flags;
91     U32 max_depth;
92     STRLEN max_size;
93 root 1.27 SV *filter;
94 root 1.1 } CBOR;
95    
96 root 1.5 ecb_inline void
97 root 1.1 cbor_init (CBOR *cbor)
98     {
99     Zero (cbor, 1, CBOR);
100     cbor->max_depth = 512;
101     }
102    
103 root 1.27 ecb_inline void
104     cbor_free (CBOR *cbor)
105     {
106     SvREFCNT_dec (cbor->filter);
107     }
108    
109 root 1.1 /////////////////////////////////////////////////////////////////////////////
110     // utility functions
111    
112 root 1.5 ecb_inline SV *
113 root 1.1 get_bool (const char *name)
114     {
115     SV *sv = get_sv (name, 1);
116    
117     SvREADONLY_on (sv);
118     SvREADONLY_on (SvRV (sv));
119    
120     return sv;
121     }
122    
123 root 1.5 ecb_inline void
124 root 1.1 shrink (SV *sv)
125     {
126     sv_utf8_downgrade (sv, 1);
127    
128     if (SvLEN (sv) > SvCUR (sv) + 1)
129     {
130     #ifdef SvPV_shrink_to_cur
131     SvPV_shrink_to_cur (sv);
132     #elif defined (SvPV_renew)
133     SvPV_renew (sv, SvCUR (sv) + 1);
134     #endif
135     }
136     }
137    
138 root 1.21 // minimum length of a string to be registered for stringref
139     ecb_inline int
140     minimum_string_length (UV idx)
141     {
142     return idx > 23
143     ? idx > 0xffU
144     ? idx > 0xffffU
145     ? idx > 0xffffffffU
146 root 1.29 ? 11
147     : 7
148 root 1.21 : 5
149     : 4
150     : 3;
151     }
152    
153 root 1.1 /////////////////////////////////////////////////////////////////////////////
154     // encoder
155    
156     // structure used for encoding CBOR
157     typedef struct
158     {
159     char *cur; // SvPVX (sv) + current output position
160     char *end; // SvEND (sv)
161     SV *sv; // result scalar
162     CBOR cbor;
163     U32 depth; // recursion level
164 root 1.20 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
165     UV stringref_idx;
166 root 1.19 HV *shareable; // ptr => index, or 0
167     UV shareable_idx;
168 root 1.1 } enc_t;
169    
170 root 1.5 ecb_inline void
171 root 1.1 need (enc_t *enc, STRLEN len)
172     {
173 root 1.5 if (ecb_expect_false (enc->cur + len >= enc->end))
174 root 1.1 {
175     STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
176     SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
177     enc->cur = SvPVX (enc->sv) + cur;
178     enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
179     }
180     }
181    
182 root 1.5 ecb_inline void
183 root 1.1 encode_ch (enc_t *enc, char ch)
184     {
185     need (enc, 1);
186     *enc->cur++ = ch;
187     }
188    
189     static void
190     encode_uint (enc_t *enc, int major, UV len)
191     {
192     need (enc, 9);
193    
194     if (len < 24)
195     *enc->cur++ = major | len;
196 root 1.4 else if (len <= 0xff)
197 root 1.1 {
198     *enc->cur++ = major | 24;
199     *enc->cur++ = len;
200     }
201 root 1.4 else if (len <= 0xffff)
202 root 1.1 {
203     *enc->cur++ = major | 25;
204     *enc->cur++ = len >> 8;
205     *enc->cur++ = len;
206     }
207 root 1.4 else if (len <= 0xffffffff)
208 root 1.1 {
209     *enc->cur++ = major | 26;
210     *enc->cur++ = len >> 24;
211     *enc->cur++ = len >> 16;
212     *enc->cur++ = len >> 8;
213     *enc->cur++ = len;
214     }
215 root 1.4 else
216 root 1.1 {
217     *enc->cur++ = major | 27;
218     *enc->cur++ = len >> 56;
219     *enc->cur++ = len >> 48;
220     *enc->cur++ = len >> 40;
221     *enc->cur++ = len >> 32;
222     *enc->cur++ = len >> 24;
223     *enc->cur++ = len >> 16;
224     *enc->cur++ = len >> 8;
225     *enc->cur++ = len;
226     }
227     }
228    
229 root 1.21 ecb_inline void
230     encode_tag (enc_t *enc, UV tag)
231     {
232     encode_uint (enc, 0xc0, tag);
233     }
234    
235 root 1.1 static void
236     encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
237     {
238 root 1.24 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF))
239 root 1.21 {
240 root 1.22 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
241 root 1.21
242     if (SvOK (*svp))
243     {
244     // already registered, use stringref
245     encode_tag (enc, CBOR_TAG_STRINGREF);
246     encode_uint (enc, 0x00, SvUV (*svp));
247     return;
248     }
249     else if (len >= minimum_string_length (enc->stringref_idx))
250     {
251     // register only
252     sv_setuv (*svp, enc->stringref_idx);
253     ++enc->stringref_idx;
254     }
255     }
256    
257 root 1.1 encode_uint (enc, utf8 ? 0x60 : 0x40, len);
258     need (enc, len);
259     memcpy (enc->cur, str, len);
260     enc->cur += len;
261     }
262    
263     static void encode_sv (enc_t *enc, SV *sv);
264    
265     static void
266     encode_av (enc_t *enc, AV *av)
267     {
268     int i, len = av_len (av);
269    
270     if (enc->depth >= enc->cbor.max_depth)
271     croak (ERR_NESTING_EXCEEDED);
272    
273     ++enc->depth;
274    
275     encode_uint (enc, 0x80, len + 1);
276    
277     for (i = 0; i <= len; ++i)
278     {
279     SV **svp = av_fetch (av, i, 0);
280     encode_sv (enc, svp ? *svp : &PL_sv_undef);
281     }
282    
283     --enc->depth;
284     }
285    
286     static void
287     encode_hv (enc_t *enc, HV *hv)
288     {
289     HE *he;
290    
291     if (enc->depth >= enc->cbor.max_depth)
292     croak (ERR_NESTING_EXCEEDED);
293    
294     ++enc->depth;
295    
296     int pairs = hv_iterinit (hv);
297     int mg = SvMAGICAL (hv);
298    
299     if (mg)
300     encode_ch (enc, 0xa0 | 31);
301     else
302     encode_uint (enc, 0xa0, pairs);
303    
304     while ((he = hv_iternext (hv)))
305     {
306 root 1.21 if (HeKLEN (he) == HEf_SVKEY)
307     encode_sv (enc, HeSVKEY (he));
308 root 1.1 else
309 root 1.21 encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he));
310 root 1.1
311 root 1.5 encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he));
312 root 1.1 }
313    
314     if (mg)
315     encode_ch (enc, 0xe0 | 31);
316    
317     --enc->depth;
318     }
319    
320     // encode objects, arrays and special \0=false and \1=true values.
321     static void
322     encode_rv (enc_t *enc, SV *sv)
323     {
324 root 1.19 SvGETMAGIC (sv);
325 root 1.1
326 root 1.19 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
327     && ecb_expect_false (SvREFCNT (sv) > 1))
328     {
329     if (!enc->shareable)
330     enc->shareable = (HV *)sv_2mortal ((SV *)newHV ());
331    
332     SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1);
333    
334     if (SvOK (*svp))
335     {
336     encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
337     encode_uint (enc, 0x00, SvUV (*svp));
338     return;
339     }
340     else
341     {
342     sv_setuv (*svp, enc->shareable_idx);
343     ++enc->shareable_idx;
344     encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE);
345     }
346     }
347 root 1.1
348 root 1.19 svtype svt = SvTYPE (sv);
349 root 1.18
350 root 1.5 if (ecb_expect_false (SvOBJECT (sv)))
351 root 1.1 {
352 root 1.10 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
353     ? types_boolean_stash
354     : gv_stashpv ("Types::Serialiser::Boolean", 1);
355     HV *error_stash = !CBOR_SLOW || types_error_stash
356     ? types_error_stash
357     : gv_stashpv ("Types::Serialiser::Error", 1);
358 root 1.6 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
359     ? cbor_tagged_stash
360     : gv_stashpv ("CBOR::XS::Tagged" , 1);
361 root 1.1
362 root 1.11 HV *stash = SvSTASH (sv);
363     GV *method;
364    
365     if (stash == boolean_stash)
366 root 1.1 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
367 root 1.11 else if (stash == error_stash)
368 root 1.10 encode_ch (enc, 0xe0 | 23);
369 root 1.11 else if (stash == tagged_stash)
370 root 1.6 {
371     if (svt != SVt_PVAV)
372     croak ("encountered CBOR::XS::Tagged object that isn't an array");
373    
374     encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
375     encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
376     }
377 root 1.11 else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0)))
378 root 1.1 {
379 root 1.11 dSP;
380    
381     ENTER; SAVETMPS; PUSHMARK (SP);
382 root 1.6 // we re-bless the reference to get overload and other niceties right
383 root 1.11 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
384    
385     PUTBACK;
386     // G_SCALAR ensures that return value is 1
387     call_sv ((SV *)GvCV (method), G_SCALAR);
388     SPAGAIN;
389    
390     // catch this surprisingly common error
391     if (SvROK (TOPs) && SvRV (TOPs) == sv)
392     croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash));
393    
394     encode_sv (enc, POPs);
395    
396     PUTBACK;
397    
398     FREETMPS; LEAVE;
399     }
400     else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0)
401     {
402     dSP;
403 root 1.6
404 root 1.11 ENTER; SAVETMPS; PUSHMARK (SP);
405     EXTEND (SP, 2);
406     // we re-bless the reference to get overload and other niceties right
407     PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
408     PUSHs (sv_cbor);
409 root 1.1
410 root 1.11 PUTBACK;
411     int count = call_sv ((SV *)GvCV (method), G_ARRAY);
412     SPAGAIN;
413 root 1.6
414 root 1.11 // catch this surprisingly common error
415     if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
416     croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
417 root 1.6
418 root 1.18 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
419 root 1.11 encode_uint (enc, 0x80, count + 1);
420     encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
421 root 1.6
422 root 1.11 while (count)
423     encode_sv (enc, SP[1 - count--]);
424 root 1.6
425 root 1.11 PUTBACK;
426 root 1.6
427 root 1.11 FREETMPS; LEAVE;
428 root 1.1 }
429 root 1.11 else
430     croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it",
431     SvPV_nolen (sv_2mortal (newRV_inc (sv))));
432 root 1.1 }
433     else if (svt == SVt_PVHV)
434     encode_hv (enc, (HV *)sv);
435     else if (svt == SVt_PVAV)
436     encode_av (enc, (AV *)sv);
437 root 1.18 else
438 root 1.1 {
439 root 1.18 encode_tag (enc, CBOR_TAG_INDIRECTION);
440     encode_sv (enc, sv);
441 root 1.1 }
442     }
443    
444     static void
445     encode_nv (enc_t *enc, SV *sv)
446     {
447     double nv = SvNVX (sv);
448    
449     need (enc, 9);
450    
451 root 1.5 if (ecb_expect_false (nv == (U32)nv))
452 root 1.1 encode_uint (enc, 0x00, (U32)nv);
453     //TODO: maybe I32?
454 root 1.5 else if (ecb_expect_false (nv == (float)nv))
455 root 1.1 {
456     uint32_t fp = ecb_float_to_binary32 (nv);
457    
458     *enc->cur++ = 0xe0 | 26;
459    
460     if (!ecb_big_endian ())
461     fp = ecb_bswap32 (fp);
462    
463     memcpy (enc->cur, &fp, 4);
464     enc->cur += 4;
465     }
466     else
467     {
468     uint64_t fp = ecb_double_to_binary64 (nv);
469    
470     *enc->cur++ = 0xe0 | 27;
471    
472     if (!ecb_big_endian ())
473     fp = ecb_bswap64 (fp);
474    
475     memcpy (enc->cur, &fp, 8);
476     enc->cur += 8;
477     }
478     }
479    
480     static void
481     encode_sv (enc_t *enc, SV *sv)
482     {
483     SvGETMAGIC (sv);
484    
485     if (SvPOKp (sv))
486     {
487     STRLEN len;
488     char *str = SvPV (sv, len);
489     encode_str (enc, SvUTF8 (sv), str, len);
490     }
491     else if (SvNOKp (sv))
492     encode_nv (enc, sv);
493     else if (SvIOKp (sv))
494     {
495     if (SvIsUV (sv))
496     encode_uint (enc, 0x00, SvUVX (sv));
497     else if (SvIVX (sv) >= 0)
498     encode_uint (enc, 0x00, SvIVX (sv));
499     else
500     encode_uint (enc, 0x20, -(SvIVX (sv) + 1));
501     }
502     else if (SvROK (sv))
503     encode_rv (enc, SvRV (sv));
504     else if (!SvOK (sv))
505     encode_ch (enc, 0xe0 | 22);
506     else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
507     encode_ch (enc, 0xe0 | 23);
508     else
509     croak ("encountered perl type (%s,0x%x) that CBOR cannot handle, check your input data",
510     SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
511     }
512    
513     static SV *
514     encode_cbor (SV *scalar, CBOR *cbor)
515     {
516 root 1.18 enc_t enc = { };
517 root 1.1
518     enc.cbor = *cbor;
519     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
520     enc.cur = SvPVX (enc.sv);
521     enc.end = SvEND (enc.sv);
522    
523     SvPOK_only (enc.sv);
524 root 1.20
525 root 1.24 if (cbor->flags & F_ALLOW_STRINGREF)
526 root 1.20 {
527     encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
528     enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
529     enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
530     }
531    
532 root 1.1 encode_sv (&enc, scalar);
533    
534     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
535     *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
536    
537     if (enc.cbor.flags & F_SHRINK)
538     shrink (enc.sv);
539    
540     return enc.sv;
541     }
542    
543     /////////////////////////////////////////////////////////////////////////////
544     // decoder
545    
546     // structure used for decoding CBOR
547     typedef struct
548     {
549     U8 *cur; // current parser pointer
550     U8 *end; // end of input string
551     const char *err; // parse error, if != 0
552     CBOR cbor;
553     U32 depth; // recursion depth
554     U32 maxdepth; // recursion depth limit
555 root 1.19 AV *shareable;
556 root 1.20 AV *stringref;
557 root 1.27 SV *decode_tagged;
558 root 1.1 } dec_t;
559    
560     #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
561    
562 root 1.5 #define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
563 root 1.1
564     #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED)
565     #define DEC_DEC_DEPTH --dec->depth
566    
567     static UV
568     decode_uint (dec_t *dec)
569     {
570     switch (*dec->cur & 31)
571     {
572     case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
573     case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
574     case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
575     return *dec->cur++ & 31;
576    
577     case 24:
578     WANT (2);
579     dec->cur += 2;
580     return dec->cur[-1];
581    
582     case 25:
583     WANT (3);
584     dec->cur += 3;
585     return (((UV)dec->cur[-2]) << 8)
586     | ((UV)dec->cur[-1]);
587    
588     case 26:
589     WANT (5);
590     dec->cur += 5;
591     return (((UV)dec->cur[-4]) << 24)
592     | (((UV)dec->cur[-3]) << 16)
593     | (((UV)dec->cur[-2]) << 8)
594     | ((UV)dec->cur[-1]);
595    
596     case 27:
597     WANT (9);
598     dec->cur += 9;
599     return (((UV)dec->cur[-8]) << 56)
600     | (((UV)dec->cur[-7]) << 48)
601     | (((UV)dec->cur[-6]) << 40)
602     | (((UV)dec->cur[-5]) << 32)
603     | (((UV)dec->cur[-4]) << 24)
604     | (((UV)dec->cur[-3]) << 16)
605     | (((UV)dec->cur[-2]) << 8)
606     | ((UV)dec->cur[-1]);
607    
608     default:
609     ERR ("corrupted CBOR data (unsupported integer minor encoding)");
610     }
611    
612     fail:
613     return 0;
614     }
615    
616     static SV *decode_sv (dec_t *dec);
617    
618     static SV *
619     decode_av (dec_t *dec)
620     {
621     AV *av = newAV ();
622    
623     DEC_INC_DEPTH;
624    
625     if ((*dec->cur & 31) == 31)
626     {
627     ++dec->cur;
628    
629     for (;;)
630     {
631     WANT (1);
632    
633 root 1.2 if (*dec->cur == (0xe0 | 31))
634 root 1.1 {
635     ++dec->cur;
636     break;
637     }
638    
639     av_push (av, decode_sv (dec));
640     }
641     }
642     else
643     {
644     int i, len = decode_uint (dec);
645    
646     av_fill (av, len - 1);
647    
648     for (i = 0; i < len; ++i)
649     AvARRAY (av)[i] = decode_sv (dec);
650     }
651    
652     DEC_DEC_DEPTH;
653     return newRV_noinc ((SV *)av);
654    
655     fail:
656     SvREFCNT_dec (av);
657     DEC_DEC_DEPTH;
658     return &PL_sv_undef;
659     }
660    
661 root 1.16 static void
662     decode_he (dec_t *dec, HV *hv)
663     {
664     // for speed reasons, we specialcase single-string
665 root 1.21 // byte or utf-8 strings as keys, but only when !stringref
666    
667 root 1.23 if (ecb_expect_true (!dec->stringref))
668 root 1.21 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
669     {
670     I32 len = decode_uint (dec);
671     char *key = (char *)dec->cur;
672 root 1.16
673 root 1.21 dec->cur += len;
674    
675     if (ecb_expect_false (dec->stringref))
676     av_push (dec->stringref, newSVpvn (key, len));
677    
678     hv_store (hv, key, len, decode_sv (dec), 0);
679 root 1.16
680 root 1.21 return;
681     }
682     else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
683     {
684     I32 len = decode_uint (dec);
685     char *key = (char *)dec->cur;
686 root 1.16
687 root 1.21 dec->cur += len;
688 root 1.20
689 root 1.21 if (ecb_expect_false (dec->stringref))
690     av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
691 root 1.16
692 root 1.21 hv_store (hv, key, -len, decode_sv (dec), 0);
693 root 1.16
694 root 1.21 return;
695     }
696 root 1.20
697 root 1.21 SV *k = decode_sv (dec);
698     SV *v = decode_sv (dec);
699 root 1.16
700 root 1.21 hv_store_ent (hv, k, v, 0);
701     SvREFCNT_dec (k);
702 root 1.16 }
703    
704 root 1.1 static SV *
705     decode_hv (dec_t *dec)
706     {
707     HV *hv = newHV ();
708    
709     DEC_INC_DEPTH;
710    
711     if ((*dec->cur & 31) == 31)
712     {
713     ++dec->cur;
714    
715     for (;;)
716     {
717     WANT (1);
718    
719 root 1.2 if (*dec->cur == (0xe0 | 31))
720 root 1.1 {
721     ++dec->cur;
722     break;
723     }
724    
725 root 1.16 decode_he (dec, hv);
726 root 1.1 }
727     }
728     else
729     {
730 root 1.16 int pairs = decode_uint (dec);
731 root 1.1
732 root 1.16 while (pairs--)
733     decode_he (dec, hv);
734 root 1.1 }
735    
736     DEC_DEC_DEPTH;
737     return newRV_noinc ((SV *)hv);
738    
739     fail:
740     SvREFCNT_dec (hv);
741     DEC_DEC_DEPTH;
742     return &PL_sv_undef;
743     }
744    
745     static SV *
746     decode_str (dec_t *dec, int utf8)
747     {
748 root 1.6 SV *sv = 0;
749 root 1.1
750     if ((*dec->cur & 31) == 31)
751     {
752     ++dec->cur;
753    
754     sv = newSVpvn ("", 0);
755    
756     // not very fast, and certainly not robust against illegal input
757     for (;;)
758     {
759     WANT (1);
760    
761 root 1.2 if (*dec->cur == (0xe0 | 31))
762 root 1.1 {
763     ++dec->cur;
764     break;
765     }
766    
767 root 1.6 sv_catsv (sv, decode_sv (dec));
768 root 1.1 }
769     }
770     else
771     {
772     STRLEN len = decode_uint (dec);
773    
774     WANT (len);
775     sv = newSVpvn (dec->cur, len);
776     dec->cur += len;
777 root 1.25
778     if (ecb_expect_false (dec->stringref)
779     && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
780     av_push (dec->stringref, SvREFCNT_inc_NN (sv));
781 root 1.1 }
782    
783     if (utf8)
784     SvUTF8_on (sv);
785    
786     return sv;
787    
788     fail:
789 root 1.6 SvREFCNT_dec (sv);
790 root 1.1 return &PL_sv_undef;
791     }
792    
793     static SV *
794 root 1.3 decode_tagged (dec_t *dec)
795     {
796 root 1.19 SV *sv = 0;
797 root 1.3 UV tag = decode_uint (dec);
798 root 1.19
799     WANT (1);
800 root 1.3
801 root 1.18 switch (tag)
802     {
803     case CBOR_TAG_MAGIC:
804 root 1.20 sv = decode_sv (dec);
805     break;
806 root 1.18
807     case CBOR_TAG_INDIRECTION:
808 root 1.20 sv = newRV_noinc (decode_sv (dec));
809     break;
810    
811     case CBOR_TAG_STRINGREF_NAMESPACE:
812     {
813     ENTER; SAVETMPS;
814    
815     SAVESPTR (dec->stringref);
816     dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
817    
818     sv = decode_sv (dec);
819    
820     FREETMPS; LEAVE;
821     }
822     break;
823    
824     case CBOR_TAG_STRINGREF:
825     {
826     if ((*dec->cur >> 5) != 0)
827     ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
828    
829     UV idx = decode_uint (dec);
830    
831     if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
832     ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
833    
834     sv = newSVsv (AvARRAY (dec->stringref)[idx]);
835     }
836     break;
837 root 1.11
838 root 1.19 case CBOR_TAG_VALUE_SHAREABLE:
839     {
840     if (ecb_expect_false (!dec->shareable))
841     dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
842    
843     sv = newSV (0);
844     av_push (dec->shareable, SvREFCNT_inc_NN (sv));
845 root 1.18
846 root 1.19 SV *osv = decode_sv (dec);
847     sv_setsv (sv, osv);
848     SvREFCNT_dec_NN (osv);
849     }
850 root 1.20 break;
851 root 1.18
852     case CBOR_TAG_VALUE_SHAREDREF:
853 root 1.17 {
854 root 1.19 if ((*dec->cur >> 5) != 0)
855     ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
856 root 1.18
857 root 1.19 UV idx = decode_uint (dec);
858    
859 root 1.20 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
860 root 1.18 ERR ("corrupted CBOR data (sharedref index out of bounds)");
861    
862 root 1.20 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
863 root 1.17 }
864 root 1.20 break;
865 root 1.17
866 root 1.18 case CBOR_TAG_PERL_OBJECT:
867     {
868 root 1.19 sv = decode_sv (dec);
869    
870 root 1.18 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
871     ERR ("corrupted CBOR data (non-array perl object)");
872    
873     AV *av = (AV *)SvRV (sv);
874     int len = av_len (av) + 1;
875     HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
876    
877     if (!stash)
878     ERR ("cannot decode perl-object (package does not exist)");
879    
880     GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
881    
882     if (!method)
883     ERR ("cannot decode perl-object (package does not have a THAW method)");
884    
885     dSP;
886    
887     ENTER; SAVETMPS; PUSHMARK (SP);
888     EXTEND (SP, len + 1);
889     // we re-bless the reference to get overload and other niceties right
890     PUSHs (*av_fetch (av, 0, 1));
891     PUSHs (sv_cbor);
892    
893     int i;
894    
895     for (i = 1; i < len; ++i)
896     PUSHs (*av_fetch (av, i, 1));
897    
898     PUTBACK;
899     call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
900     SPAGAIN;
901    
902     if (SvTRUE (ERRSV))
903     {
904     FREETMPS; LEAVE;
905     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
906     }
907    
908     SvREFCNT_dec (sv);
909     sv = SvREFCNT_inc (POPs);
910 root 1.11
911 root 1.18 PUTBACK;
912 root 1.11
913 root 1.18 FREETMPS; LEAVE;
914     }
915 root 1.20 break;
916 root 1.9
917 root 1.18 default:
918     {
919 root 1.19 sv = decode_sv (dec);
920    
921 root 1.27 dSP;
922     ENTER; SAVETMPS; PUSHMARK (SP);
923     EXTEND (SP, 2);
924     PUSHs (newSVuv (tag));
925     PUSHs (sv);
926    
927     PUTBACK;
928     int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
929     SPAGAIN;
930    
931     if (SvTRUE (ERRSV))
932     {
933     FREETMPS; LEAVE;
934     ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
935     }
936    
937     if (count)
938     {
939     SvREFCNT_dec (sv);
940     sv = SvREFCNT_inc (POPs);
941     }
942     else
943     {
944     AV *av = newAV ();
945     av_push (av, newSVuv (tag));
946     av_push (av, sv);
947    
948     HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
949     ? cbor_tagged_stash
950     : gv_stashpv ("CBOR::XS::Tagged" , 1);
951     sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
952     }
953 root 1.7
954 root 1.27 PUTBACK;
955    
956     FREETMPS; LEAVE;
957 root 1.18 }
958 root 1.20 break;
959 root 1.11 }
960 root 1.9
961 root 1.20 return sv;
962    
963 root 1.9 fail:
964     SvREFCNT_dec (sv);
965     return &PL_sv_undef;
966 root 1.3 }
967    
968     static SV *
969 root 1.1 decode_sv (dec_t *dec)
970     {
971     WANT (1);
972    
973     switch (*dec->cur >> 5)
974     {
975     case 0: // unsigned int
976     return newSVuv (decode_uint (dec));
977     case 1: // negative int
978     return newSViv (-1 - (IV)decode_uint (dec));
979     case 2: // octet string
980     return decode_str (dec, 0);
981     case 3: // utf-8 string
982     return decode_str (dec, 1);
983     case 4: // array
984     return decode_av (dec);
985     case 5: // map
986     return decode_hv (dec);
987     case 6: // tag
988 root 1.3 return decode_tagged (dec);
989 root 1.1 case 7: // misc
990     switch (*dec->cur++ & 31)
991     {
992     case 20:
993     #if CBOR_SLOW
994 root 1.10 types_false = get_bool ("Types::Serialiser::false");
995 root 1.1 #endif
996 root 1.10 return newSVsv (types_false);
997 root 1.1 case 21:
998     #if CBOR_SLOW
999 root 1.10 types_true = get_bool ("Types::Serialiser::true");
1000 root 1.1 #endif
1001 root 1.10 return newSVsv (types_true);
1002 root 1.1 case 22:
1003     return newSVsv (&PL_sv_undef);
1004 root 1.10 case 23:
1005     #if CBOR_SLOW
1006     types_error = get_bool ("Types::Serialiser::error");
1007     #endif
1008     return newSVsv (types_error);
1009 root 1.1
1010     case 25:
1011 root 1.2 {
1012     WANT (2);
1013    
1014     uint16_t fp = (dec->cur[0] << 8) | dec->cur[1];
1015     dec->cur += 2;
1016    
1017     return newSVnv (ecb_binary16_to_float (fp));
1018     }
1019 root 1.1
1020     case 26:
1021     {
1022     uint32_t fp;
1023     WANT (4);
1024     memcpy (&fp, dec->cur, 4);
1025     dec->cur += 4;
1026    
1027     if (!ecb_big_endian ())
1028     fp = ecb_bswap32 (fp);
1029    
1030     return newSVnv (ecb_binary32_to_float (fp));
1031     }
1032    
1033     case 27:
1034     {
1035     uint64_t fp;
1036     WANT (8);
1037     memcpy (&fp, dec->cur, 8);
1038     dec->cur += 8;
1039    
1040     if (!ecb_big_endian ())
1041     fp = ecb_bswap64 (fp);
1042    
1043     return newSVnv (ecb_binary64_to_double (fp));
1044     }
1045    
1046     // 0..19 unassigned
1047     // 24 reserved + unassigned (reserved values are not encodable)
1048     default:
1049     ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
1050     }
1051    
1052     break;
1053     }
1054    
1055     fail:
1056     return &PL_sv_undef;
1057     }
1058    
1059     static SV *
1060     decode_cbor (SV *string, CBOR *cbor, char **offset_return)
1061     {
1062 root 1.18 dec_t dec = { };
1063 root 1.1 SV *sv;
1064 root 1.16 STRLEN len;
1065     char *data = SvPVbyte (string, len);
1066 root 1.1
1067 root 1.16 if (len > cbor->max_size && cbor->max_size)
1068     croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
1069     (unsigned long)len, (unsigned long)cbor->max_size);
1070 root 1.1
1071     dec.cbor = *cbor;
1072 root 1.16 dec.cur = (U8 *)data;
1073     dec.end = (U8 *)data + len;
1074 root 1.1
1075     sv = decode_sv (&dec);
1076    
1077     if (offset_return)
1078     *offset_return = dec.cur;
1079    
1080     if (!(offset_return || !sv))
1081 root 1.2 if (dec.cur != dec.end && !dec.err)
1082     dec.err = "garbage after CBOR object";
1083    
1084     if (dec.err)
1085 root 1.1 {
1086 root 1.2 SvREFCNT_dec (sv);
1087 root 1.16 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
1088 root 1.1 }
1089    
1090     sv = sv_2mortal (sv);
1091    
1092     return sv;
1093     }
1094    
1095     /////////////////////////////////////////////////////////////////////////////
1096     // XS interface functions
1097    
1098     MODULE = CBOR::XS PACKAGE = CBOR::XS
1099    
1100     BOOT:
1101     {
1102     cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1103 root 1.6 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1104 root 1.1
1105 root 1.10 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1106     types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
1107    
1108     types_true = get_bool ("Types::Serialiser::true" );
1109     types_false = get_bool ("Types::Serialiser::false");
1110     types_error = get_bool ("Types::Serialiser::error");
1111 root 1.11
1112 root 1.27 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1113    
1114 root 1.11 sv_cbor = newSVpv ("CBOR", 0);
1115     SvREADONLY_on (sv_cbor);
1116 root 1.1 }
1117    
1118     PROTOTYPES: DISABLE
1119    
1120     void CLONE (...)
1121     CODE:
1122 root 1.10 cbor_stash = 0;
1123     cbor_tagged_stash = 0;
1124     types_error_stash = 0;
1125     types_boolean_stash = 0;
1126 root 1.1
1127     void new (char *klass)
1128     PPCODE:
1129     {
1130     SV *pv = NEWSV (0, sizeof (CBOR));
1131     SvPOK_only (pv);
1132     cbor_init ((CBOR *)SvPVX (pv));
1133     XPUSHs (sv_2mortal (sv_bless (
1134     newRV_noinc (pv),
1135     strEQ (klass, "CBOR::XS") ? CBOR_STASH : gv_stashpv (klass, 1)
1136     )));
1137     }
1138    
1139     void shrink (CBOR *self, int enable = 1)
1140     ALIAS:
1141     shrink = F_SHRINK
1142     allow_unknown = F_ALLOW_UNKNOWN
1143 root 1.18 allow_sharing = F_ALLOW_SHARING
1144 root 1.24 allow_stringref = F_ALLOW_STRINGREF
1145 root 1.1 PPCODE:
1146     {
1147     if (enable)
1148     self->flags |= ix;
1149     else
1150     self->flags &= ~ix;
1151    
1152     XPUSHs (ST (0));
1153     }
1154    
1155     void get_shrink (CBOR *self)
1156     ALIAS:
1157     get_shrink = F_SHRINK
1158     get_allow_unknown = F_ALLOW_UNKNOWN
1159 root 1.18 get_allow_sharing = F_ALLOW_SHARING
1160 root 1.24 get_allow_stringref = F_ALLOW_STRINGREF
1161 root 1.1 PPCODE:
1162     XPUSHs (boolSV (self->flags & ix));
1163    
1164     void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1165     PPCODE:
1166     self->max_depth = max_depth;
1167     XPUSHs (ST (0));
1168    
1169     U32 get_max_depth (CBOR *self)
1170     CODE:
1171     RETVAL = self->max_depth;
1172     OUTPUT:
1173     RETVAL
1174    
1175     void max_size (CBOR *self, U32 max_size = 0)
1176     PPCODE:
1177     self->max_size = max_size;
1178     XPUSHs (ST (0));
1179    
1180     int get_max_size (CBOR *self)
1181     CODE:
1182     RETVAL = self->max_size;
1183     OUTPUT:
1184     RETVAL
1185    
1186 root 1.27 void filter (CBOR *self, SV *filter = 0)
1187     PPCODE:
1188     SvREFCNT_dec (self->filter);
1189     self->filter = filter ? newSVsv (filter) : filter;
1190     XPUSHs (ST (0));
1191    
1192     SV *get_filter (CBOR *self)
1193     CODE:
1194     RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1195     OUTPUT:
1196     RETVAL
1197    
1198 root 1.1 void encode (CBOR *self, SV *scalar)
1199     PPCODE:
1200     PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN;
1201     XPUSHs (scalar);
1202    
1203     void decode (CBOR *self, SV *cborstr)
1204     PPCODE:
1205     PUTBACK; cborstr = decode_cbor (cborstr, self, 0); SPAGAIN;
1206     XPUSHs (cborstr);
1207    
1208     void decode_prefix (CBOR *self, SV *cborstr)
1209     PPCODE:
1210     {
1211     SV *sv;
1212     char *offset;
1213     PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN;
1214     EXTEND (SP, 2);
1215     PUSHs (sv);
1216     PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1217     }
1218    
1219 root 1.27 void DESTROY (CBOR *self)
1220     PPCODE:
1221     cbor_free (self);
1222    
1223 root 1.1 PROTOTYPES: ENABLE
1224    
1225     void encode_cbor (SV *scalar)
1226     PPCODE:
1227     {
1228     CBOR cbor;
1229     cbor_init (&cbor);
1230     PUTBACK; scalar = encode_cbor (scalar, &cbor); SPAGAIN;
1231     XPUSHs (scalar);
1232     }
1233    
1234     void decode_cbor (SV *cborstr)
1235     PPCODE:
1236     {
1237     CBOR cbor;
1238     cbor_init (&cbor);
1239     PUTBACK; cborstr = decode_cbor (cborstr, &cbor, 0); SPAGAIN;
1240     XPUSHs (cborstr);
1241     }
1242