--- CBOR-XS/XS.xs 2013/10/29 22:04:52 1.17 +++ CBOR-XS/XS.xs 2013/11/20 01:09:46 1.18 @@ -26,8 +26,13 @@ enum cbor_tag { // inofficial extensions (pending iana registration) - CBOR_TAG_PERL_OBJECT = 256, - CBOR_TAG_GENERIC_OBJECT = 257, + CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object + CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object + CBOR_TAG_VALUE_SHARABLE = 26, // http://cbor.schmorp.de/value-sharing + CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing + CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref + CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref + CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection // rfc7049 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 @@ -51,8 +56,11 @@ CBOR_TAG_MAGIC = 55799 // self-describe cbor }; -#define F_SHRINK 0x00000200UL -#define F_ALLOW_UNKNOWN 0x00002000UL +#define F_SHRINK 0x00000001UL +#define F_ALLOW_UNKNOWN 0x00000002UL +#define F_ALLOW_SHARING 0x00000004UL //TODO +#define F_DEDUP_STRINGS 0x00000008UL //TODO +#define F_DEDUP_KEYS 0x00000010UL //TODO #define INIT_SIZE 32 // initial scalar size to be allocated @@ -119,11 +127,6 @@ } ///////////////////////////////////////////////////////////////////////////// -// fp hell - -//TODO - -///////////////////////////////////////////////////////////////////////////// // encoder // structure used for encoding CBOR @@ -134,6 +137,9 @@ SV *sv; // result scalar CBOR cbor; U32 depth; // recursion level + HV *stringref; // string => index, or 0 + HV *sharable; // ptr => index, or 0 + HV *sharable_idx; } enc_t; ecb_inline void @@ -204,6 +210,46 @@ enc->cur += len; } +ecb_inline void +encode_tag (enc_t *enc, UV tag) +{ + encode_uint (enc, 0xc0, tag); +} + +static int +encode_sharable2 (enc_t *enc, SV *sv) +{ + if (!enc->sharable) + enc->sharable = (HV *)sv_2mortal ((SV *)newHV ()); + + SV **svp = hv_fetch (enc->sharable, &sv, sizeof (sv), 1); + + if (SvOK (*svp)) + { + encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF); + encode_uint (enc, 0x00, SvUV (*svp)); + + return 1; + } + else + { + sv_setuv (*svp, enc->sharable_idx++); + encode_tag (enc, CBOR_TAG_VALUE_SHARABLE); + + return 0; + } +} + +ecb_inline int +encode_sharable (enc_t *enc, SV *sv) +{ + if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING) + && ecb_expect_false (SvREFCNT (sv) > 1)) + return encode_sharable2 (enc, sv); + + return 0; +} + static void encode_sv (enc_t *enc, SV *sv); static void @@ -270,6 +316,9 @@ SvGETMAGIC (sv); svt = SvTYPE (sv); + if (encode_sharable (enc, sv)) + return; + if (ecb_expect_false (SvOBJECT (sv))) { HV *boolean_stash = !CBOR_SLOW || types_boolean_stash @@ -338,7 +387,7 @@ if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); - encode_uint (enc, 0xc0, CBOR_TAG_PERL_OBJECT); + encode_tag (enc, CBOR_TAG_PERL_OBJECT); encode_uint (enc, 0x80, count + 1); encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); @@ -357,26 +406,11 @@ encode_hv (enc, (HV *)sv); else if (svt == SVt_PVAV) encode_av (enc, (AV *)sv); - else if (svt < SVt_PVAV) + else { - STRLEN len = 0; - char *pv = svt ? SvPV (sv, len) : 0; - - if (len == 1 && *pv == '1') - encode_ch (enc, 0xe0 | 21); - else if (len == 1 && *pv == '0') - encode_ch (enc, 0xe0 | 20); - else if (enc->cbor.flags & F_ALLOW_UNKNOWN) - encode_ch (enc, 0xe0 | 23); - else - croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + encode_tag (enc, CBOR_TAG_INDIRECTION); + encode_sv (enc, sv); } - else if (enc->cbor.flags & F_ALLOW_UNKNOWN) - encode_ch (enc, 0xe0 | 23); - else - croak ("encountered %s, but CBOR can only represent references to arrays or hashes", - SvPV_nolen (sv_2mortal (newRV_inc (sv)))); } static void @@ -420,6 +454,9 @@ { SvGETMAGIC (sv); + if (encode_sharable (enc, sv)) + return; + if (SvPOKp (sv)) { STRLEN len; @@ -451,13 +488,12 @@ static SV * encode_cbor (SV *scalar, CBOR *cbor) { - enc_t enc; + enc_t enc = { }; enc.cbor = *cbor; enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); enc.cur = SvPVX (enc.sv); enc.end = SvEND (enc.sv); - enc.depth = 0; SvPOK_only (enc.sv); encode_sv (&enc, scalar); @@ -483,6 +519,7 @@ CBOR cbor; U32 depth; // recursion depth U32 maxdepth; // recursion depth limit + AV *sharable; } dec_t; #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE @@ -711,68 +748,97 @@ UV tag = decode_uint (dec); SV *sv = decode_sv (dec); - if (tag == CBOR_TAG_MAGIC) - return sv; - else if (tag == CBOR_TAG_PERL_OBJECT) - { - if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) - ERR ("corrupted CBOR data (non-array perl object)"); - - AV *av = (AV *)SvRV (sv); - int len = av_len (av) + 1; - HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0); - - if (!stash) - ERR ("cannot decode perl-object (package does not exist)"); - - GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); - - if (!method) - ERR ("cannot decode perl-object (package does not have a THAW method)"); - - dSP; - - ENTER; SAVETMPS; PUSHMARK (SP); - EXTEND (SP, len + 1); - // we re-bless the reference to get overload and other niceties right - PUSHs (*av_fetch (av, 0, 1)); - PUSHs (sv_cbor); - - int i; - - for (i = 1; i < len; ++i) - PUSHs (*av_fetch (av, i, 1)); - - PUTBACK; - call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL); - SPAGAIN; + switch (tag) + { + case CBOR_TAG_MAGIC: + return sv; + + case CBOR_TAG_INDIRECTION: + return newRV_noinc (sv); - if (SvTRUE (ERRSV)) + case CBOR_TAG_VALUE_SHARABLE: + if (ecb_expect_false (!dec->sharable)) + dec->sharable = (AV *)sv_2mortal ((SV *)newAV ()); + + av_push (dec->sharable, SvREFCNT_inc_NN (sv)); + + return sv; + + case CBOR_TAG_VALUE_SHAREDREF: { - FREETMPS; LEAVE; - ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + // TODO: should verify that the sv atcually was a CBOR unsigned integer + UV idx = SvUV (sv); + + if (!dec->sharable || idx > AvFILLp (dec->sharable)) + ERR ("corrupted CBOR data (sharedref index out of bounds)"); + + SvREFCNT_dec (sv); + + return SvREFCNT_inc_NN (AvARRAY (dec->sharable)[idx]); } - SvREFCNT_dec (sv); - sv = SvREFCNT_inc (POPs); + case CBOR_TAG_PERL_OBJECT: + { + if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) + ERR ("corrupted CBOR data (non-array perl object)"); - PUTBACK; + AV *av = (AV *)SvRV (sv); + int len = av_len (av) + 1; + HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0); + + if (!stash) + ERR ("cannot decode perl-object (package does not exist)"); + + GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); + + if (!method) + ERR ("cannot decode perl-object (package does not have a THAW method)"); + + dSP; - FREETMPS; LEAVE; + ENTER; SAVETMPS; PUSHMARK (SP); + EXTEND (SP, len + 1); + // we re-bless the reference to get overload and other niceties right + PUSHs (*av_fetch (av, 0, 1)); + PUSHs (sv_cbor); - return sv; - } - else - { - AV *av = newAV (); - av_push (av, newSVuv (tag)); - av_push (av, sv); + int i; - HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash - ? cbor_tagged_stash - : gv_stashpv ("CBOR::XS::Tagged" , 1); + for (i = 1; i < len; ++i) + PUSHs (*av_fetch (av, i, 1)); + + PUTBACK; + call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL); + SPAGAIN; + + if (SvTRUE (ERRSV)) + { + FREETMPS; LEAVE; + ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + } + + SvREFCNT_dec (sv); + sv = SvREFCNT_inc (POPs); + + PUTBACK; + + FREETMPS; LEAVE; + + return sv; + } - return sv_bless (newRV_noinc ((SV *)av), tagged_stash); + default: + { + AV *av = newAV (); + av_push (av, newSVuv (tag)); + av_push (av, sv); + + HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash + ? cbor_tagged_stash + : gv_stashpv ("CBOR::XS::Tagged" , 1); + + return sv_bless (newRV_noinc ((SV *)av), tagged_stash); + } } fail: @@ -874,7 +940,7 @@ static SV * decode_cbor (SV *string, CBOR *cbor, char **offset_return) { - dec_t dec; + dec_t dec = { }; SV *sv; STRLEN len; char *data = SvPVbyte (string, len); @@ -886,8 +952,6 @@ dec.cbor = *cbor; dec.cur = (U8 *)data; dec.end = (U8 *)data + len; - dec.err = 0; - dec.depth = 0; sv = decode_sv (&dec); @@ -955,6 +1019,9 @@ ALIAS: shrink = F_SHRINK allow_unknown = F_ALLOW_UNKNOWN + allow_sharing = F_ALLOW_SHARING + dedup_keys = F_DEDUP_KEYS + dedup_strings = F_DEDUP_STRINGS PPCODE: { if (enable) @@ -969,6 +1036,9 @@ ALIAS: get_shrink = F_SHRINK get_allow_unknown = F_ALLOW_UNKNOWN + get_allow_sharing = F_ALLOW_SHARING + get_dedup_keys = F_DEDUP_KEYS + get_dedup_strings = F_DEDUP_STRINGS PPCODE: XPUSHs (boolSV (self->flags & ix));