--- CBOR-XS/XS.xs 2013/10/26 11:08:34 1.3 +++ CBOR-XS/XS.xs 2013/11/22 15:28:38 1.27 @@ -11,25 +11,61 @@ #include "ecb.h" -#define F_SHRINK 0x00000200UL -#define F_ALLOW_UNKNOWN 0x00002000UL +// compatibility with perl <5.18 +#ifndef HvNAMELEN_get +# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) +#endif +#ifndef HvNAMELEN +# define HvNAMELEN(hv) HvNAMELEN_get (hv) +#endif +#ifndef HvNAMEUTF8 +# define HvNAMEUTF8(hv) 0 +#endif + +// known tags +enum cbor_tag +{ + // inofficial extensions (pending iana registration) + CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object + CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object + CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing + CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing + CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref + CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref + CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection + + // rfc7049 + CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 + CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any + CBOR_TAG_POS_BIGNUM = 2, // byte string + CBOR_TAG_NEG_BIGNUM = 3, // byte string + CBOR_TAG_DECIMAL = 4, // decimal fraction, array + CBOR_TAG_BIGFLOAT = 5, // array + + CBOR_TAG_CONV_B64U = 21, // base64url, any + CBOR_TAG_CONV_B64 = 22, // base64, any + CBOR_TAG_CONV_HEX = 23, // base16, any + CBOR_TAG_CBOR = 24, // embedded cbor, byte string + + CBOR_TAG_URI = 32, // URI rfc3986, utf-8 + CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8 + CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8 + CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8 + CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 + + CBOR_TAG_MAGIC = 55799 // self-describe cbor +}; + +#define F_SHRINK 0x00000001UL +#define F_ALLOW_UNKNOWN 0x00000002UL +#define F_ALLOW_SHARING 0x00000004UL //TODO +#define F_ALLOW_STRINGREF 0x00000008UL //TODO #define INIT_SIZE 32 // initial scalar size to be allocated #define SB do { #define SE } while (0) -#if __GNUC__ >= 3 -# define expect(expr,value) __builtin_expect ((expr), (value)) -# define INLINE static inline -#else -# define expect(expr,value) (expr) -# define INLINE static -#endif - -#define expect_false(expr) expect ((expr) != 0, 0) -#define expect_true(expr) expect ((expr) != 0, 1) - #define IN_RANGE_INC(type,val,beg,end) \ ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) @@ -44,29 +80,33 @@ # define CBOR_STASH cbor_stash #endif -static HV *cbor_stash, *cbor_boolean_stash; // CBOR::XS:: -static SV *cbor_true, *cbor_false; +static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS:: +static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter; typedef struct { U32 flags; U32 max_depth; STRLEN max_size; - - SV *cb_object; - HV *cb_sk_object; + SV *filter; } CBOR; -INLINE void +ecb_inline void cbor_init (CBOR *cbor) { Zero (cbor, 1, CBOR); cbor->max_depth = 512; } +ecb_inline void +cbor_free (CBOR *cbor) +{ + SvREFCNT_dec (cbor->filter); +} + ///////////////////////////////////////////////////////////////////////////// // utility functions -INLINE SV * +ecb_inline SV * get_bool (const char *name) { SV *sv = get_sv (name, 1); @@ -77,7 +117,7 @@ return sv; } -INLINE void +ecb_inline void shrink (SV *sv) { sv_utf8_downgrade (sv, 1); @@ -92,10 +132,20 @@ } } -///////////////////////////////////////////////////////////////////////////// -// fp hell - -//TODO +// minimum length of a string to be registered for stringref +ecb_inline int +minimum_string_length (UV idx) +{ + return idx > 23 + ? idx > 0xffU + ? idx > 0xffffU + ? idx > 0xffffffffU + ? 7 + : 6 + : 5 + : 4 + : 3; +} ///////////////////////////////////////////////////////////////////////////// // encoder @@ -108,12 +158,16 @@ SV *sv; // result scalar CBOR cbor; U32 depth; // recursion level + HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8) + UV stringref_idx; + HV *shareable; // ptr => index, or 0 + UV shareable_idx; } enc_t; -INLINE void +ecb_inline void need (enc_t *enc, STRLEN len) { - if (expect_false (enc->cur + len >= enc->end)) + if (ecb_expect_false (enc->cur + len >= enc->end)) { STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); @@ -122,7 +176,7 @@ } } -INLINE void +ecb_inline void encode_ch (enc_t *enc, char ch) { need (enc, 1); @@ -136,18 +190,18 @@ if (len < 24) *enc->cur++ = major | len; - else if (len < 0x100) + else if (len <= 0xff) { *enc->cur++ = major | 24; *enc->cur++ = len; } - else if (len < 0x10000) + else if (len <= 0xffff) { *enc->cur++ = major | 25; *enc->cur++ = len >> 8; *enc->cur++ = len; } - else if (len < 0x100000000) + else if (len <= 0xffffffff) { *enc->cur++ = major | 26; *enc->cur++ = len >> 24; @@ -155,7 +209,7 @@ *enc->cur++ = len >> 8; *enc->cur++ = len; } - else if (len) + else { *enc->cur++ = major | 27; *enc->cur++ = len >> 56; @@ -169,9 +223,34 @@ } } +ecb_inline void +encode_tag (enc_t *enc, UV tag) +{ + encode_uint (enc, 0xc0, tag); +} + static void encode_str (enc_t *enc, int utf8, char *str, STRLEN len) { + if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF)) + { + SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); + + if (SvOK (*svp)) + { + // already registered, use stringref + encode_tag (enc, CBOR_TAG_STRINGREF); + encode_uint (enc, 0x00, SvUV (*svp)); + return; + } + else if (len >= minimum_string_length (enc->stringref_idx)) + { + // register only + sv_setuv (*svp, enc->stringref_idx); + ++enc->stringref_idx; + } + } + encode_uint (enc, utf8 ? 0x60 : 0x40, len); need (enc, len); memcpy (enc->cur, str, len); @@ -226,7 +305,7 @@ else encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); - encode_sv (enc, expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); + encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); } if (mg) @@ -239,88 +318,124 @@ static void encode_rv (enc_t *enc, SV *sv) { - svtype svt; - SvGETMAGIC (sv); - svt = SvTYPE (sv); - if (expect_false (SvOBJECT (sv))) + if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING) + && ecb_expect_false (SvREFCNT (sv) > 1)) { - HV *stash = !CBOR_SLOW || cbor_boolean_stash - ? cbor_boolean_stash - : gv_stashpv ("CBOR::XS::Boolean", 1); + if (!enc->shareable) + enc->shareable = (HV *)sv_2mortal ((SV *)newHV ()); - if (SvSTASH (sv) == stash) - encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); + SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1); + + if (SvOK (*svp)) + { + encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF); + encode_uint (enc, 0x00, SvUV (*svp)); + return; + } else { -#if 0 //TODO - if (enc->cbor.flags & F_CONV_BLESSED) - { - // we re-bless the reference to get overload and other niceties right - GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0); - - if (to_cbor) - { - dSP; - - ENTER; SAVETMPS; PUSHMARK (SP); - XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); - - // calling with G_SCALAR ensures that we always get a 1 return value - PUTBACK; - call_sv ((SV *)GvCV (to_cbor), G_SCALAR); - SPAGAIN; - - // catch this surprisingly common error - if (SvROK (TOPs) && SvRV (TOPs) == sv) - croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); - - sv = POPs; - PUTBACK; - - encode_sv (enc, sv); - - FREETMPS; LEAVE; - } - else if (enc->cbor.flags & F_ALLOW_BLESSED) - encode_str (enc, "null", 4, 0); - else - croak ("encountered object '%s', but neither allow_blessed enabled nor TO_CBOR method available on it", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); - } - else if (enc->cbor.flags & F_ALLOW_BLESSED) - encode_str (enc, "null", 4, 0); - else - croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); -#endif + sv_setuv (*svp, enc->shareable_idx); + ++enc->shareable_idx; + encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE); } } - else if (svt == SVt_PVHV) - encode_hv (enc, (HV *)sv); - else if (svt == SVt_PVAV) - encode_av (enc, (AV *)sv); - else if (svt < SVt_PVAV) + + svtype svt = SvTYPE (sv); + + if (ecb_expect_false (SvOBJECT (sv))) { - STRLEN len = 0; - char *pv = svt ? SvPV (sv, len) : 0; + HV *boolean_stash = !CBOR_SLOW || types_boolean_stash + ? types_boolean_stash + : gv_stashpv ("Types::Serialiser::Boolean", 1); + HV *error_stash = !CBOR_SLOW || types_error_stash + ? types_error_stash + : gv_stashpv ("Types::Serialiser::Error", 1); + HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash + ? cbor_tagged_stash + : gv_stashpv ("CBOR::XS::Tagged" , 1); + + HV *stash = SvSTASH (sv); + GV *method; - if (len == 1 && *pv == '1') - encode_ch (enc, 0xe0 | 21); - else if (len == 1 && *pv == '0') - encode_ch (enc, 0xe0 | 20); - else if (enc->cbor.flags & F_ALLOW_UNKNOWN) + if (stash == boolean_stash) + encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); + else if (stash == error_stash) encode_ch (enc, 0xe0 | 23); + else if (stash == tagged_stash) + { + if (svt != SVt_PVAV) + croak ("encountered CBOR::XS::Tagged object that isn't an array"); + + encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1))); + encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); + } + else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) + { + dSP; + + ENTER; SAVETMPS; PUSHMARK (SP); + // we re-bless the reference to get overload and other niceties right + XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); + + PUTBACK; + // G_SCALAR ensures that return value is 1 + call_sv ((SV *)GvCV (method), G_SCALAR); + SPAGAIN; + + // catch this surprisingly common error + if (SvROK (TOPs) && SvRV (TOPs) == sv) + croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash)); + + encode_sv (enc, POPs); + + PUTBACK; + + FREETMPS; LEAVE; + } + else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) + { + dSP; + + ENTER; SAVETMPS; PUSHMARK (SP); + EXTEND (SP, 2); + // we re-bless the reference to get overload and other niceties right + PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); + PUSHs (sv_cbor); + + PUTBACK; + int count = call_sv ((SV *)GvCV (method), G_ARRAY); + SPAGAIN; + + // catch this surprisingly common error + if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) + croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); + + encode_tag (enc, CBOR_TAG_PERL_OBJECT); + encode_uint (enc, 0x80, count + 1); + encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); + + while (count) + encode_sv (enc, SP[1 - count--]); + + PUTBACK; + + FREETMPS; LEAVE; + } else - croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", + croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it", SvPV_nolen (sv_2mortal (newRV_inc (sv)))); } - else if (enc->cbor.flags & F_ALLOW_UNKNOWN) - encode_ch (enc, 0xe0 | 23); + else if (svt == SVt_PVHV) + encode_hv (enc, (HV *)sv); + else if (svt == SVt_PVAV) + encode_av (enc, (AV *)sv); else - croak ("encountered %s, but CBOR can only represent references to arrays or hashes", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + { + encode_tag (enc, CBOR_TAG_INDIRECTION); + encode_sv (enc, sv); + } } static void @@ -330,10 +445,10 @@ need (enc, 9); - if (expect_false (nv == (U32)nv)) + if (ecb_expect_false (nv == (U32)nv)) encode_uint (enc, 0x00, (U32)nv); //TODO: maybe I32? - else if (expect_false (nv == (float)nv)) + else if (ecb_expect_false (nv == (float)nv)) { uint32_t fp = ecb_float_to_binary32 (nv); @@ -395,15 +510,22 @@ static SV * encode_cbor (SV *scalar, CBOR *cbor) { - enc_t enc; + enc_t enc = { }; enc.cbor = *cbor; enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); enc.cur = SvPVX (enc.sv); enc.end = SvEND (enc.sv); - enc.depth = 0; SvPOK_only (enc.sv); + + if (cbor->flags & F_ALLOW_STRINGREF) + { + encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE); + enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ()); + enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ()); + } + encode_sv (&enc, scalar); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); @@ -427,11 +549,14 @@ CBOR cbor; U32 depth; // recursion depth U32 maxdepth; // recursion depth limit + AV *shareable; + AV *stringref; + SV *decode_tagged; } dec_t; #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE -#define WANT(len) if (expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data"); +#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") #define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) #define DEC_DEC_DEPTH --dec->depth @@ -530,6 +655,49 @@ return &PL_sv_undef; } +static void +decode_he (dec_t *dec, HV *hv) +{ + // for speed reasons, we specialcase single-string + // byte or utf-8 strings as keys, but only when !stringref + + if (ecb_expect_true (!dec->stringref)) + if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27) + { + I32 len = decode_uint (dec); + char *key = (char *)dec->cur; + + dec->cur += len; + + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, newSVpvn (key, len)); + + hv_store (hv, key, len, decode_sv (dec), 0); + + return; + } + else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) + { + I32 len = decode_uint (dec); + char *key = (char *)dec->cur; + + dec->cur += len; + + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); + + hv_store (hv, key, -len, decode_sv (dec), 0); + + return; + } + + SV *k = decode_sv (dec); + SV *v = decode_sv (dec); + + hv_store_ent (hv, k, v, 0); + SvREFCNT_dec (k); +} + static SV * decode_hv (dec_t *dec) { @@ -551,249 +719,250 @@ break; } - SV *k = decode_sv (dec); - SV *v = decode_sv (dec); - - hv_store_ent (hv, k, v, 0); + decode_he (dec, hv); } } else { - int len = decode_uint (dec); + int pairs = decode_uint (dec); - while (len--) - { - SV *k = decode_sv (dec); - SV *v = decode_sv (dec); - - hv_store_ent (hv, k, v, 0); - } + while (pairs--) + decode_he (dec, hv); } DEC_DEC_DEPTH; return newRV_noinc ((SV *)hv); -#if 0 - SV *sv; - HV *hv = newHV (); +fail: + SvREFCNT_dec (hv); + DEC_DEC_DEPTH; + return &PL_sv_undef; +} - DEC_INC_DEPTH; - decode_ws (dec); +static SV * +decode_str (dec_t *dec, int utf8) +{ + SV *sv = 0; - for (;;) + if ((*dec->cur & 31) == 31) { - // heuristic: assume that - // a) decode_str + hv_store_ent are abysmally slow. - // b) most hash keys are short, simple ascii text. - // => try to "fast-match" such strings to avoid - // the overhead of decode_str + hv_store_ent. - { - SV *value; - char *p = dec->cur; - char *e = p + 24; // only try up to 24 bytes + ++dec->cur; - for (;;) - { - // the >= 0x80 is false on most architectures - if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') - { - // slow path, back up and use decode_str - SV *key = decode_str (dec); - if (!key) - goto fail; - - decode_ws (dec); EXPECT_CH (':'); - - decode_ws (dec); - value = decode_sv (dec); - if (!value) - { - SvREFCNT_dec (key); - goto fail; - } + sv = newSVpvn ("", 0); - hv_store_ent (hv, key, value, 0); - SvREFCNT_dec (key); + // not very fast, and certainly not robust against illegal input + for (;;) + { + WANT (1); - break; - } - else if (*p == '"') - { - // fast path, got a simple key - char *key = dec->cur; - int len = p - key; - dec->cur = p + 1; + if (*dec->cur == (0xe0 | 31)) + { + ++dec->cur; + break; + } - decode_ws (dec); EXPECT_CH (':'); + sv_catsv (sv, decode_sv (dec)); + } + } + else + { + STRLEN len = decode_uint (dec); - decode_ws (dec); - value = decode_sv (dec); - if (!value) - goto fail; + WANT (len); + sv = newSVpvn (dec->cur, len); + dec->cur += len; - hv_store (hv, key, len, value, 0); + if (ecb_expect_false (dec->stringref) + && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) + av_push (dec->stringref, SvREFCNT_inc_NN (sv)); + } - break; - } + if (utf8) + SvUTF8_on (sv); - ++p; - } - } + return sv; - decode_ws (dec); +fail: + SvREFCNT_dec (sv); + return &PL_sv_undef; +} - if (*dec->cur == '}') - { - ++dec->cur; - break; - } +static SV * +decode_tagged (dec_t *dec) +{ + SV *sv = 0; + UV tag = decode_uint (dec); - if (*dec->cur != ',') - ERR (", or } expected while parsing object/hash"); + WANT (1); - ++dec->cur; + switch (tag) + { + case CBOR_TAG_MAGIC: + sv = decode_sv (dec); + break; - decode_ws (dec); + case CBOR_TAG_INDIRECTION: + sv = newRV_noinc (decode_sv (dec)); + break; - if (*dec->cur == '}' && dec->cbor.flags & F_RELAXED) + case CBOR_TAG_STRINGREF_NAMESPACE: { - ++dec->cur; - break; + ENTER; SAVETMPS; + + SAVESPTR (dec->stringref); + dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); + + sv = decode_sv (dec); + + FREETMPS; LEAVE; } - } + break; - DEC_DEC_DEPTH; - sv = newRV_noinc ((SV *)hv); + case CBOR_TAG_STRINGREF: + { + if ((*dec->cur >> 5) != 0) + ERR ("corrupted CBOR data (stringref index not an unsigned integer)"); - // check filter callbacks - if (dec->cbor.flags & F_HOOK) - { - if (dec->cbor.cb_sk_object && HvKEYS (hv) == 1) + UV idx = decode_uint (dec); + + if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) + ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); + + sv = newSVsv (AvARRAY (dec->stringref)[idx]); + } + break; + + case CBOR_TAG_VALUE_SHAREABLE: { - HE *cb, *he; + if (ecb_expect_false (!dec->shareable)) + dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); - hv_iterinit (hv); - he = hv_iternext (hv); - hv_iterinit (hv); + sv = newSV (0); + av_push (dec->shareable, SvREFCNT_inc_NN (sv)); - // the next line creates a mortal sv each time its called. - // might want to optimise this for common cases. - cb = hv_fetch_ent (dec->cbor.cb_sk_object, hv_iterkeysv (he), 0, 0); + SV *osv = decode_sv (dec); + sv_setsv (sv, osv); + SvREFCNT_dec_NN (osv); + } + break; - if (cb) - { - dSP; - int count; + case CBOR_TAG_VALUE_SHAREDREF: + { + if ((*dec->cur >> 5) != 0) + ERR ("corrupted CBOR data (sharedref index not an unsigned integer)"); - ENTER; SAVETMPS; PUSHMARK (SP); - XPUSHs (HeVAL (he)); - sv_2mortal (sv); - - PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; - - if (count == 1) - { - sv = newSVsv (POPs); - FREETMPS; LEAVE; - return sv; - } + UV idx = decode_uint (dec); - SvREFCNT_inc (sv); - FREETMPS; LEAVE; - } + if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) + ERR ("corrupted CBOR data (sharedref index out of bounds)"); + + sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); } + break; - if (dec->cbor.cb_object) + case CBOR_TAG_PERL_OBJECT: { + sv = decode_sv (dec); + + if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) + ERR ("corrupted CBOR data (non-array perl object)"); + + AV *av = (AV *)SvRV (sv); + int len = av_len (av) + 1; + HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0); + + if (!stash) + ERR ("cannot decode perl-object (package does not exist)"); + + GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); + + if (!method) + ERR ("cannot decode perl-object (package does not have a THAW method)"); + dSP; - int count; ENTER; SAVETMPS; PUSHMARK (SP); - XPUSHs (sv_2mortal (sv)); + EXTEND (SP, len + 1); + // we re-bless the reference to get overload and other niceties right + PUSHs (*av_fetch (av, 0, 1)); + PUSHs (sv_cbor); + + int i; + + for (i = 1; i < len; ++i) + PUSHs (*av_fetch (av, i, 1)); - PUTBACK; count = call_sv (dec->cbor.cb_object, G_ARRAY); SPAGAIN; + PUTBACK; + call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL); + SPAGAIN; - if (count == 1) + if (SvTRUE (ERRSV)) { - sv = newSVsv (POPs); FREETMPS; LEAVE; - return sv; + ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); } - SvREFCNT_inc (sv); - FREETMPS; LEAVE; - } - } + SvREFCNT_dec (sv); + sv = SvREFCNT_inc (POPs); - return sv; -#endif - -fail: - SvREFCNT_dec (hv); - DEC_DEC_DEPTH; - return &PL_sv_undef; -} + PUTBACK; -static SV * -decode_str (dec_t *dec, int utf8) -{ - SV *sv; + FREETMPS; LEAVE; + } + break; - if ((*dec->cur & 31) == 31) - { - ++dec->cur; + default: + { + sv = decode_sv (dec); - sv = newSVpvn ("", 0); + dSP; + ENTER; SAVETMPS; PUSHMARK (SP); + EXTEND (SP, 2); + PUSHs (newSVuv (tag)); + PUSHs (sv); + + PUTBACK; + int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL); + SPAGAIN; - // not very fast, and certainly not robust against illegal input - for (;;) - { - WANT (1); + if (SvTRUE (ERRSV)) + { + FREETMPS; LEAVE; + ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + } - if (*dec->cur == (0xe0 | 31)) + if (count) { - ++dec->cur; - break; + SvREFCNT_dec (sv); + sv = SvREFCNT_inc (POPs); + } + else + { + AV *av = newAV (); + av_push (av, newSVuv (tag)); + av_push (av, sv); + + HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash + ? cbor_tagged_stash + : gv_stashpv ("CBOR::XS::Tagged" , 1); + sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash); } - SV *sv2 = decode_sv (dec); - sv_catsv (sv, sv2); - } - } - else - { - STRLEN len = decode_uint (dec); + PUTBACK; - WANT (len); - sv = newSVpvn (dec->cur, len); - dec->cur += len; + FREETMPS; LEAVE; + } + break; } - if (utf8) - SvUTF8_on (sv); - return sv; fail: + SvREFCNT_dec (sv); return &PL_sv_undef; } static SV * -decode_tagged (dec_t *dec) -{ - UV tag = decode_uint (dec); - SV *sv = decode_sv (dec); - - if (tag == 55799) // 2.4.5 Self-Describe CBOR - return sv; - - AV *av = newAV (); - av_push (av, newSVuv (tag)); - av_push (av, sv); - return newRV_noinc ((SV *)av); -} - -static SV * decode_sv (dec_t *dec) { WANT (1); @@ -801,7 +970,6 @@ switch (*dec->cur >> 5) { case 0: // unsigned int - //TODO: 64 bit values on 3 2bit perls return newSVuv (decode_uint (dec)); case 1: // negative int return newSViv (-1 - (IV)decode_uint (dec)); @@ -820,16 +988,21 @@ { case 20: #if CBOR_SLOW - cbor_false = get_bool ("CBOR::XS::false"); + types_false = get_bool ("Types::Serialiser::false"); #endif - return newSVsv (cbor_false); + return newSVsv (types_false); case 21: #if CBOR_SLOW - cbor_true = get_bool ("CBOR::XS::true"); + types_true = get_bool ("Types::Serialiser::true"); #endif - return newSVsv (cbor_true); + return newSVsv (types_true); case 22: return newSVsv (&PL_sv_undef); + case 23: +#if CBOR_SLOW + types_error = get_bool ("Types::Serialiser::error"); +#endif + return newSVsv (types_error); case 25: { @@ -875,62 +1048,6 @@ break; } -#if 0 - switch (*dec->cur) - { - //case '"': ++dec->cur; return decode_str (dec); - case '[': ++dec->cur; return decode_av (dec); - case '{': ++dec->cur; return decode_hv (dec); - - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - //TODO return decode_num (dec); - - case 't': - if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) - { - dec->cur += 4; -#if CBOR_SLOW - cbor_true = get_bool ("CBOR::XS::true"); -#endif - return newSVsv (cbor_true); - } - else - ERR ("'true' expected"); - - break; - - case 'f': - if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) - { - dec->cur += 5; -#if CBOR_SLOW - cbor_false = get_bool ("CBOR::XS::false"); -#endif - return newSVsv (cbor_false); - } - else - ERR ("'false' expected"); - - break; - - case 'n': - if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) - { - dec->cur += 4; - return newSVsv (&PL_sv_undef); - } - else - ERR ("'null' expected"); - - break; - - default: - ERR ("malformed CBOR string, neither array, object, number, string or atom"); - break; - } -#endif fail: return &PL_sv_undef; @@ -939,52 +1056,18 @@ static SV * decode_cbor (SV *string, CBOR *cbor, char **offset_return) { - dec_t dec; + dec_t dec = { }; SV *sv; + STRLEN len; + char *data = SvPVbyte (string, len); - /* work around bugs in 5.10 where manipulating magic values - * makes perl ignore the magic in subsequent accesses. - * also make a copy of non-PV values, to get them into a clean - * state (SvPV should do that, but it's buggy, see below). - */ - /*SvGETMAGIC (string);*/ - if (SvMAGICAL (string) || !SvPOK (string)) - string = sv_2mortal (newSVsv (string)); - - SvUPGRADE (string, SVt_PV); - - /* work around a bug in perl 5.10, which causes SvCUR to fail an - * assertion with -DDEBUGGING, although SvCUR is documented to - * return the xpv_cur field which certainly exists after upgrading. - * according to nicholas clark, calling SvPOK fixes this. - * But it doesn't fix it, so try another workaround, call SvPV_nolen - * and hope for the best. - * Damnit, SvPV_nolen still trips over yet another assertion. This - * assertion business is seriously broken, try yet another workaround - * for the broken -DDEBUGGING. - */ - { -#ifdef DEBUGGING - STRLEN offset = SvOK (string) ? sv_len (string) : 0; -#else - STRLEN offset = SvCUR (string); -#endif - - if (offset > cbor->max_size && cbor->max_size) - croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", - (unsigned long)SvCUR (string), (unsigned long)cbor->max_size); - } - - sv_utf8_downgrade (string, 0); + if (len > cbor->max_size && cbor->max_size) + croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", + (unsigned long)len, (unsigned long)cbor->max_size); dec.cbor = *cbor; - dec.cur = (U8 *)SvPVX (string); - dec.end = (U8 *)SvEND (string); - dec.err = 0; - dec.depth = 0; - - if (dec.cbor.cb_object || dec.cbor.cb_sk_object) - ;//TODO dec.cbor.flags |= F_HOOK; + dec.cur = (U8 *)data; + dec.end = (U8 *)data + len; sv = decode_sv (&dec); @@ -998,7 +1081,7 @@ if (dec.err) { SvREFCNT_dec (sv); - croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)SvPVX (string), (int)(uint8_t)*dec.cur); + croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); } sv = sv_2mortal (sv); @@ -1014,18 +1097,29 @@ BOOT: { cbor_stash = gv_stashpv ("CBOR::XS" , 1); - cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1); + cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1); + + types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); + types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1); + + types_true = get_bool ("Types::Serialiser::true" ); + types_false = get_bool ("Types::Serialiser::false"); + types_error = get_bool ("Types::Serialiser::error"); + + default_filter = newSVpv ("CBOR::XS::default_filter", 0); - cbor_true = get_bool ("CBOR::XS::true"); - cbor_false = get_bool ("CBOR::XS::false"); + sv_cbor = newSVpv ("CBOR", 0); + SvREADONLY_on (sv_cbor); } PROTOTYPES: DISABLE void CLONE (...) CODE: - cbor_stash = 0; - cbor_boolean_stash = 0; + cbor_stash = 0; + cbor_tagged_stash = 0; + types_error_stash = 0; + types_boolean_stash = 0; void new (char *klass) PPCODE: @@ -1043,6 +1137,8 @@ ALIAS: shrink = F_SHRINK allow_unknown = F_ALLOW_UNKNOWN + allow_sharing = F_ALLOW_SHARING + allow_stringref = F_ALLOW_STRINGREF PPCODE: { if (enable) @@ -1057,6 +1153,8 @@ ALIAS: get_shrink = F_SHRINK get_allow_unknown = F_ALLOW_UNKNOWN + get_allow_sharing = F_ALLOW_SHARING + get_allow_stringref = F_ALLOW_STRINGREF PPCODE: XPUSHs (boolSV (self->flags & ix)); @@ -1082,40 +1180,17 @@ OUTPUT: RETVAL -#if 0 //TODO - -void filter_cbor_object (CBOR *self, SV *cb = &PL_sv_undef) - PPCODE: -{ - SvREFCNT_dec (self->cb_object); - self->cb_object = SvOK (cb) ? newSVsv (cb) : 0; - - XPUSHs (ST (0)); -} - -void filter_cbor_single_key_object (CBOR *self, SV *key, SV *cb = &PL_sv_undef) +void filter (CBOR *self, SV *filter = 0) PPCODE: -{ - if (!self->cb_sk_object) - self->cb_sk_object = newHV (); - - if (SvOK (cb)) - hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0); - else - { - hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0); - - if (!HvKEYS (self->cb_sk_object)) - { - SvREFCNT_dec (self->cb_sk_object); - self->cb_sk_object = 0; - } - } - + SvREFCNT_dec (self->filter); + self->filter = filter ? newSVsv (filter) : filter; XPUSHs (ST (0)); -} -#endif +SV *get_filter (CBOR *self) + CODE: + RETVAL = self->filter ? self->filter : NEWSV (0, 0); + OUTPUT: + RETVAL void encode (CBOR *self, SV *scalar) PPCODE: @@ -1139,9 +1214,8 @@ } void DESTROY (CBOR *self) - CODE: - SvREFCNT_dec (self->cb_sk_object); - SvREFCNT_dec (self->cb_object); + PPCODE: + cbor_free (self); PROTOTYPES: ENABLE