--- CBOR-XS/XS.xs 2013/11/20 02:03:09 1.19 +++ CBOR-XS/XS.xs 2013/11/20 16:29:02 1.24 @@ -56,11 +56,10 @@ CBOR_TAG_MAGIC = 55799 // self-describe cbor }; -#define F_SHRINK 0x00000001UL -#define F_ALLOW_UNKNOWN 0x00000002UL -#define F_ALLOW_SHARING 0x00000004UL //TODO -#define F_DEDUP_STRINGS 0x00000008UL //TODO -#define F_DEDUP_KEYS 0x00000010UL //TODO +#define F_SHRINK 0x00000001UL +#define F_ALLOW_UNKNOWN 0x00000002UL +#define F_ALLOW_SHARING 0x00000004UL //TODO +#define F_ALLOW_STRINGREF 0x00000008UL //TODO #define INIT_SIZE 32 // initial scalar size to be allocated @@ -126,6 +125,21 @@ } } +// minimum length of a string to be registered for stringref +ecb_inline int +minimum_string_length (UV idx) +{ + return idx > 23 + ? idx > 0xffU + ? idx > 0xffffU + ? idx > 0xffffffffU + ? 7 + : 6 + : 5 + : 4 + : 3; +} + ///////////////////////////////////////////////////////////////////////////// // encoder @@ -137,7 +151,8 @@ SV *sv; // result scalar CBOR cbor; U32 depth; // recursion level - HV *stringref; // string => index, or 0 + HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8) + UV stringref_idx; HV *shareable; // ptr => index, or 0 UV shareable_idx; } enc_t; @@ -201,21 +216,40 @@ } } +ecb_inline void +encode_tag (enc_t *enc, UV tag) +{ + encode_uint (enc, 0xc0, tag); +} + static void encode_str (enc_t *enc, int utf8, char *str, STRLEN len) { + if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF)) + { + SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1); + + if (SvOK (*svp)) + { + // already registered, use stringref + encode_tag (enc, CBOR_TAG_STRINGREF); + encode_uint (enc, 0x00, SvUV (*svp)); + return; + } + else if (len >= minimum_string_length (enc->stringref_idx)) + { + // register only + sv_setuv (*svp, enc->stringref_idx); + ++enc->stringref_idx; + } + } + encode_uint (enc, utf8 ? 0x60 : 0x40, len); need (enc, len); memcpy (enc->cur, str, len); enc->cur += len; } -ecb_inline void -encode_tag (enc_t *enc, UV tag) -{ - encode_uint (enc, 0xc0, tag); -} - static void encode_sv (enc_t *enc, SV *sv); static void @@ -239,6 +273,11 @@ --enc->depth; } +ecb_inline void +encode_he (enc_t *enc, HE *he) +{ +} + static void encode_hv (enc_t *enc, HV *hv) { @@ -477,6 +516,14 @@ enc.end = SvEND (enc.sv); SvPOK_only (enc.sv); + + if (cbor->flags & F_ALLOW_STRINGREF) + { + encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE); + enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ()); + enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ()); + } + encode_sv (&enc, scalar); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); @@ -501,6 +548,7 @@ U32 depth; // recursion depth U32 maxdepth; // recursion depth limit AV *shareable; + AV *stringref; } dec_t; #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE @@ -608,34 +656,43 @@ decode_he (dec_t *dec, HV *hv) { // for speed reasons, we specialcase single-string - // byte or utf-8 strings as keys. + // byte or utf-8 strings as keys, but only when !stringref - if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27) - { - I32 len = decode_uint (dec); - char *key = (char *)dec->cur; + if (ecb_expect_true (!dec->stringref)) + if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27) + { + I32 len = decode_uint (dec); + char *key = (char *)dec->cur; - dec->cur += len; + dec->cur += len; - hv_store (hv, key, len, decode_sv (dec), 0); - } - else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) - { - I32 len = decode_uint (dec); - char *key = (char *)dec->cur; + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, newSVpvn (key, len)); - dec->cur += len; + hv_store (hv, key, len, decode_sv (dec), 0); - hv_store (hv, key, -len, decode_sv (dec), 0); - } - else - { - SV *k = decode_sv (dec); - SV *v = decode_sv (dec); + return; + } + else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) + { + I32 len = decode_uint (dec); + char *key = (char *)dec->cur; - hv_store_ent (hv, k, v, 0); - SvREFCNT_dec (k); - } + dec->cur += len; + + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); + + hv_store (hv, key, -len, decode_sv (dec), 0); + + return; + } + + SV *k = decode_sv (dec); + SV *v = decode_sv (dec); + + hv_store_ent (hv, k, v, 0); + SvREFCNT_dec (k); } static SV * @@ -716,6 +773,10 @@ if (utf8) SvUTF8_on (sv); + if (ecb_expect_false (dec->stringref) + && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1)) + av_push (dec->stringref, SvREFCNT_inc_NN (sv)); + return sv; fail: @@ -734,10 +795,39 @@ switch (tag) { case CBOR_TAG_MAGIC: - return decode_sv (dec); + sv = decode_sv (dec); + break; case CBOR_TAG_INDIRECTION: - return newRV_noinc (decode_sv (dec)); + sv = newRV_noinc (decode_sv (dec)); + break; + + case CBOR_TAG_STRINGREF_NAMESPACE: + { + ENTER; SAVETMPS; + + SAVESPTR (dec->stringref); + dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); + + sv = decode_sv (dec); + + FREETMPS; LEAVE; + } + break; + + case CBOR_TAG_STRINGREF: + { + if ((*dec->cur >> 5) != 0) + ERR ("corrupted CBOR data (stringref index not an unsigned integer)"); + + UV idx = decode_uint (dec); + + if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) + ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); + + sv = newSVsv (AvARRAY (dec->stringref)[idx]); + } + break; case CBOR_TAG_VALUE_SHAREABLE: { @@ -751,8 +841,7 @@ sv_setsv (sv, osv); SvREFCNT_dec_NN (osv); } - - return sv; + break; case CBOR_TAG_VALUE_SHAREDREF: { @@ -761,11 +850,12 @@ UV idx = decode_uint (dec); - if (!dec->shareable || idx > AvFILLp (dec->shareable)) + if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) ERR ("corrupted CBOR data (sharedref index out of bounds)"); - return SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); + sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); } + break; case CBOR_TAG_PERL_OBJECT: { @@ -815,9 +905,8 @@ PUTBACK; FREETMPS; LEAVE; - - return sv; } + break; default: { @@ -831,10 +920,13 @@ ? cbor_tagged_stash : gv_stashpv ("CBOR::XS::Tagged" , 1); - return sv_bless (newRV_noinc ((SV *)av), tagged_stash); + sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash); } + break; } + return sv; + fail: SvREFCNT_dec (sv); return &PL_sv_undef; @@ -1014,8 +1106,7 @@ shrink = F_SHRINK allow_unknown = F_ALLOW_UNKNOWN allow_sharing = F_ALLOW_SHARING - dedup_keys = F_DEDUP_KEYS - dedup_strings = F_DEDUP_STRINGS + allow_stringref = F_ALLOW_STRINGREF PPCODE: { if (enable) @@ -1031,8 +1122,7 @@ get_shrink = F_SHRINK get_allow_unknown = F_ALLOW_UNKNOWN get_allow_sharing = F_ALLOW_SHARING - get_dedup_keys = F_DEDUP_KEYS - get_dedup_strings = F_DEDUP_STRINGS + get_allow_stringref = F_ALLOW_STRINGREF PPCODE: XPUSHs (boolSV (self->flags & ix));