--- CBOR-XS/XS.xs 2013/10/27 20:40:25 1.10 +++ CBOR-XS/XS.xs 2013/11/20 02:03:09 1.19 @@ -11,12 +11,28 @@ #include "ecb.h" +// compatibility with perl <5.18 +#ifndef HvNAMELEN_get +# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) +#endif +#ifndef HvNAMELEN +# define HvNAMELEN(hv) HvNAMELEN_get (hv) +#endif +#ifndef HvNAMEUTF8 +# define HvNAMEUTF8(hv) 0 +#endif + // known tags enum cbor_tag { // inofficial extensions (pending iana registration) - CBOR_TAG_PERL_OBJECT = 256, - CBOR_TAG_GENERIC_OBJECT = 257, + CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object + CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object + CBOR_TAG_VALUE_SHAREABLE = 26, // http://cbor.schmorp.de/value-sharing + CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing + CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref + CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref + CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection // rfc7049 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 @@ -40,8 +56,11 @@ CBOR_TAG_MAGIC = 55799 // self-describe cbor }; -#define F_SHRINK 0x00000200UL -#define F_ALLOW_UNKNOWN 0x00002000UL +#define F_SHRINK 0x00000001UL +#define F_ALLOW_UNKNOWN 0x00000002UL +#define F_ALLOW_SHARING 0x00000004UL //TODO +#define F_DEDUP_STRINGS 0x00000008UL //TODO +#define F_DEDUP_KEYS 0x00000010UL //TODO #define INIT_SIZE 32 // initial scalar size to be allocated @@ -63,7 +82,7 @@ #endif static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS:: -static SV *types_true, *types_false, *types_error; +static SV *types_true, *types_false, *types_error, *sv_cbor; typedef struct { U32 flags; @@ -108,11 +127,6 @@ } ///////////////////////////////////////////////////////////////////////////// -// fp hell - -//TODO - -///////////////////////////////////////////////////////////////////////////// // encoder // structure used for encoding CBOR @@ -123,6 +137,9 @@ SV *sv; // result scalar CBOR cbor; U32 depth; // recursion level + HV *stringref; // string => index, or 0 + HV *shareable; // ptr => index, or 0 + UV shareable_idx; } enc_t; ecb_inline void @@ -193,6 +210,12 @@ enc->cur += len; } +ecb_inline void +encode_tag (enc_t *enc, UV tag) +{ + encode_uint (enc, 0xc0, tag); +} + static void encode_sv (enc_t *enc, SV *sv); static void @@ -254,10 +277,31 @@ static void encode_rv (enc_t *enc, SV *sv) { - svtype svt; - SvGETMAGIC (sv); - svt = SvTYPE (sv); + + if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING) + && ecb_expect_false (SvREFCNT (sv) > 1)) + { + if (!enc->shareable) + enc->shareable = (HV *)sv_2mortal ((SV *)newHV ()); + + SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1); + + if (SvOK (*svp)) + { + encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF); + encode_uint (enc, 0x00, SvUV (*svp)); + return; + } + else + { + sv_setuv (*svp, enc->shareable_idx); + ++enc->shareable_idx; + encode_tag (enc, CBOR_TAG_VALUE_SHAREABLE); + } + } + + svtype svt = SvTYPE (sv); if (ecb_expect_false (SvOBJECT (sv))) { @@ -271,11 +315,14 @@ ? cbor_tagged_stash : gv_stashpv ("CBOR::XS::Tagged" , 1); - if (SvSTASH (sv) == boolean_stash) + HV *stash = SvSTASH (sv); + GV *method; + + if (stash == boolean_stash) encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); - else if (SvSTASH (sv) == error_stash) + else if (stash == error_stash) encode_ch (enc, 0xe0 | 23); - else if (SvSTASH (sv) == tagged_stash) + else if (stash == tagged_stash) { if (svt != SVt_PVAV) croak ("encountered CBOR::XS::Tagged object that isn't an array"); @@ -283,63 +330,71 @@ encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1))); encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); } - else + else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) { + dSP; + + ENTER; SAVETMPS; PUSHMARK (SP); // we re-bless the reference to get overload and other niceties right - GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0); + XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); - if (to_cbor) - { - dSP; + PUTBACK; + // G_SCALAR ensures that return value is 1 + call_sv ((SV *)GvCV (method), G_SCALAR); + SPAGAIN; - ENTER; SAVETMPS; PUSHMARK (SP); - XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); + // catch this surprisingly common error + if (SvROK (TOPs) && SvRV (TOPs) == sv) + croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (stash)); - // calling with G_SCALAR ensures that we always get a 1 return value - PUTBACK; - call_sv ((SV *)GvCV (to_cbor), G_SCALAR); - SPAGAIN; + encode_sv (enc, POPs); - // catch this surprisingly common error - if (SvROK (TOPs) && SvRV (TOPs) == sv) - croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); + PUTBACK; - sv = POPs; - PUTBACK; + FREETMPS; LEAVE; + } + else if ((method = gv_fetchmethod_autoload (stash, "FREEZE", 0)) != 0) + { + dSP; - encode_sv (enc, sv); + ENTER; SAVETMPS; PUSHMARK (SP); + EXTEND (SP, 2); + // we re-bless the reference to get overload and other niceties right + PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); + PUSHs (sv_cbor); - FREETMPS; LEAVE; - } - else - croak ("encountered object '%s', but no TO_CBOR method available on it", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + PUTBACK; + int count = call_sv ((SV *)GvCV (method), G_ARRAY); + SPAGAIN; + + // catch this surprisingly common error + if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) + croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); + + encode_tag (enc, CBOR_TAG_PERL_OBJECT); + encode_uint (enc, 0x80, count + 1); + encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); + + while (count) + encode_sv (enc, SP[1 - count--]); + + PUTBACK; + + FREETMPS; LEAVE; } + else + croak ("encountered object '%s', but no TO_CBOR or FREEZE methods available on it", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); } else if (svt == SVt_PVHV) encode_hv (enc, (HV *)sv); else if (svt == SVt_PVAV) encode_av (enc, (AV *)sv); - else if (svt < SVt_PVAV) + else { - STRLEN len = 0; - char *pv = svt ? SvPV (sv, len) : 0; - - if (len == 1 && *pv == '1') - encode_ch (enc, 0xe0 | 21); - else if (len == 1 && *pv == '0') - encode_ch (enc, 0xe0 | 20); - else if (enc->cbor.flags & F_ALLOW_UNKNOWN) - encode_ch (enc, 0xe0 | 23); - else - croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + encode_tag (enc, CBOR_TAG_INDIRECTION); + encode_sv (enc, sv); } - else if (enc->cbor.flags & F_ALLOW_UNKNOWN) - encode_ch (enc, 0xe0 | 23); - else - croak ("encountered %s, but CBOR can only represent references to arrays or hashes", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); } static void @@ -414,13 +469,12 @@ static SV * encode_cbor (SV *scalar, CBOR *cbor) { - enc_t enc; + enc_t enc = { }; enc.cbor = *cbor; enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); enc.cur = SvPVX (enc.sv); enc.end = SvEND (enc.sv); - enc.depth = 0; SvPOK_only (enc.sv); encode_sv (&enc, scalar); @@ -446,6 +500,7 @@ CBOR cbor; U32 depth; // recursion depth U32 maxdepth; // recursion depth limit + AV *shareable; } dec_t; #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE @@ -549,6 +604,40 @@ return &PL_sv_undef; } +static void +decode_he (dec_t *dec, HV *hv) +{ + // for speed reasons, we specialcase single-string + // byte or utf-8 strings as keys. + + if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27) + { + I32 len = decode_uint (dec); + char *key = (char *)dec->cur; + + dec->cur += len; + + hv_store (hv, key, len, decode_sv (dec), 0); + } + else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) + { + I32 len = decode_uint (dec); + char *key = (char *)dec->cur; + + dec->cur += len; + + hv_store (hv, key, -len, decode_sv (dec), 0); + } + else + { + SV *k = decode_sv (dec); + SV *v = decode_sv (dec); + + hv_store_ent (hv, k, v, 0); + SvREFCNT_dec (k); + } +} + static SV * decode_hv (dec_t *dec) { @@ -570,23 +659,15 @@ break; } - SV *k = decode_sv (dec); - SV *v = decode_sv (dec); - - hv_store_ent (hv, k, v, 0); + decode_he (dec, hv); } } else { - int len = decode_uint (dec); + int pairs = decode_uint (dec); - while (len--) - { - SV *k = decode_sv (dec); - SV *v = decode_sv (dec); - - hv_store_ent (hv, k, v, 0); - } + while (pairs--) + decode_he (dec, hv); } DEC_DEC_DEPTH; @@ -645,29 +726,114 @@ static SV * decode_tagged (dec_t *dec) { + SV *sv = 0; UV tag = decode_uint (dec); - SV *sv = decode_sv (dec); - if (tag == CBOR_TAG_MAGIC) - return sv; + WANT (1); - if (tag == CBOR_TAG_PERL_OBJECT) + switch (tag) { - if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) - ERR ("corrupted CBOR data (non-array perl object)"); - - // TODO - } + case CBOR_TAG_MAGIC: + return decode_sv (dec); - AV *av = newAV (); - av_push (av, newSVuv (tag)); - av_push (av, sv); + case CBOR_TAG_INDIRECTION: + return newRV_noinc (decode_sv (dec)); + + case CBOR_TAG_VALUE_SHAREABLE: + { + if (ecb_expect_false (!dec->shareable)) + dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); + + sv = newSV (0); + av_push (dec->shareable, SvREFCNT_inc_NN (sv)); + + SV *osv = decode_sv (dec); + sv_setsv (sv, osv); + SvREFCNT_dec_NN (osv); + } + + return sv; + + case CBOR_TAG_VALUE_SHAREDREF: + { + if ((*dec->cur >> 5) != 0) + ERR ("corrupted CBOR data (sharedref index not an unsigned integer)"); + + UV idx = decode_uint (dec); + + if (!dec->shareable || idx > AvFILLp (dec->shareable)) + ERR ("corrupted CBOR data (sharedref index out of bounds)"); + + return SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); + } + + case CBOR_TAG_PERL_OBJECT: + { + sv = decode_sv (dec); + + if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) + ERR ("corrupted CBOR data (non-array perl object)"); + + AV *av = (AV *)SvRV (sv); + int len = av_len (av) + 1; + HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0); + + if (!stash) + ERR ("cannot decode perl-object (package does not exist)"); + + GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); + + if (!method) + ERR ("cannot decode perl-object (package does not have a THAW method)"); + + dSP; - HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash - ? cbor_tagged_stash - : gv_stashpv ("CBOR::XS::Tagged" , 1); + ENTER; SAVETMPS; PUSHMARK (SP); + EXTEND (SP, len + 1); + // we re-bless the reference to get overload and other niceties right + PUSHs (*av_fetch (av, 0, 1)); + PUSHs (sv_cbor); + + int i; + + for (i = 1; i < len; ++i) + PUSHs (*av_fetch (av, i, 1)); + + PUTBACK; + call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL); + SPAGAIN; + + if (SvTRUE (ERRSV)) + { + FREETMPS; LEAVE; + ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + } + + SvREFCNT_dec (sv); + sv = SvREFCNT_inc (POPs); - return sv_bless (newRV_noinc ((SV *)av), tagged_stash); + PUTBACK; + + FREETMPS; LEAVE; + + return sv; + } + + default: + { + sv = decode_sv (dec); + + AV *av = newAV (); + av_push (av, newSVuv (tag)); + av_push (av, sv); + + HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash + ? cbor_tagged_stash + : gv_stashpv ("CBOR::XS::Tagged" , 1); + + return sv_bless (newRV_noinc ((SV *)av), tagged_stash); + } + } fail: SvREFCNT_dec (sv); @@ -768,49 +934,18 @@ static SV * decode_cbor (SV *string, CBOR *cbor, char **offset_return) { - dec_t dec; + dec_t dec = { }; SV *sv; + STRLEN len; + char *data = SvPVbyte (string, len); - /* work around bugs in 5.10 where manipulating magic values - * makes perl ignore the magic in subsequent accesses. - * also make a copy of non-PV values, to get them into a clean - * state (SvPV should do that, but it's buggy, see below). - */ - /*SvGETMAGIC (string);*/ - if (SvMAGICAL (string) || !SvPOK (string)) - string = sv_2mortal (newSVsv (string)); - - SvUPGRADE (string, SVt_PV); - - /* work around a bug in perl 5.10, which causes SvCUR to fail an - * assertion with -DDEBUGGING, although SvCUR is documented to - * return the xpv_cur field which certainly exists after upgrading. - * according to nicholas clark, calling SvPOK fixes this. - * But it doesn't fix it, so try another workaround, call SvPV_nolen - * and hope for the best. - * Damnit, SvPV_nolen still trips over yet another assertion. This - * assertion business is seriously broken, try yet another workaround - * for the broken -DDEBUGGING. - */ - { -#ifdef DEBUGGING - STRLEN offset = SvOK (string) ? sv_len (string) : 0; -#else - STRLEN offset = SvCUR (string); -#endif - - if (offset > cbor->max_size && cbor->max_size) - croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", - (unsigned long)SvCUR (string), (unsigned long)cbor->max_size); - } - - sv_utf8_downgrade (string, 0); + if (len > cbor->max_size && cbor->max_size) + croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", + (unsigned long)len, (unsigned long)cbor->max_size); dec.cbor = *cbor; - dec.cur = (U8 *)SvPVX (string); - dec.end = (U8 *)SvEND (string); - dec.err = 0; - dec.depth = 0; + dec.cur = (U8 *)data; + dec.end = (U8 *)data + len; sv = decode_sv (&dec); @@ -824,7 +959,7 @@ if (dec.err) { SvREFCNT_dec (sv); - croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)SvPVX (string), (int)(uint8_t)*dec.cur); + croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); } sv = sv_2mortal (sv); @@ -848,6 +983,9 @@ types_true = get_bool ("Types::Serialiser::true" ); types_false = get_bool ("Types::Serialiser::false"); types_error = get_bool ("Types::Serialiser::error"); + + sv_cbor = newSVpv ("CBOR", 0); + SvREADONLY_on (sv_cbor); } PROTOTYPES: DISABLE @@ -875,6 +1013,9 @@ ALIAS: shrink = F_SHRINK allow_unknown = F_ALLOW_UNKNOWN + allow_sharing = F_ALLOW_SHARING + dedup_keys = F_DEDUP_KEYS + dedup_strings = F_DEDUP_STRINGS PPCODE: { if (enable) @@ -889,6 +1030,9 @@ ALIAS: get_shrink = F_SHRINK get_allow_unknown = F_ALLOW_UNKNOWN + get_allow_sharing = F_ALLOW_SHARING + get_dedup_keys = F_DEDUP_KEYS + get_dedup_strings = F_DEDUP_STRINGS PPCODE: XPUSHs (boolSV (self->flags & ix)); @@ -914,41 +1058,6 @@ OUTPUT: RETVAL -#if 0 //TODO - -void filter_cbor_object (CBOR *self, SV *cb = &PL_sv_undef) - PPCODE: -{ - SvREFCNT_dec (self->cb_object); - self->cb_object = SvOK (cb) ? newSVsv (cb) : 0; - - XPUSHs (ST (0)); -} - -void filter_cbor_single_key_object (CBOR *self, SV *key, SV *cb = &PL_sv_undef) - PPCODE: -{ - if (!self->cb_sk_object) - self->cb_sk_object = newHV (); - - if (SvOK (cb)) - hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0); - else - { - hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0); - - if (!HvKEYS (self->cb_sk_object)) - { - SvREFCNT_dec (self->cb_sk_object); - self->cb_sk_object = 0; - } - } - - XPUSHs (ST (0)); -} - -#endif - void encode (CBOR *self, SV *scalar) PPCODE: PUTBACK; scalar = encode_cbor (scalar, self); SPAGAIN; @@ -970,15 +1079,6 @@ PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); } -#if 0 - -void DESTROY (CBOR *self) - CODE: - SvREFCNT_dec (self->cb_sk_object); - SvREFCNT_dec (self->cb_object); - -#endif - PROTOTYPES: ENABLE void encode_cbor (SV *scalar)