--- CBOR-XS/XS.xs 2013/11/20 02:03:09 1.19 +++ CBOR-XS/XS.xs 2013/11/20 11:06:42 1.20 @@ -137,7 +137,8 @@ SV *sv; // result scalar CBOR cbor; U32 depth; // recursion level - HV *stringref; // string => index, or 0 + HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8) + UV stringref_idx; HV *shareable; // ptr => index, or 0 UV shareable_idx; } enc_t; @@ -239,6 +240,15 @@ --enc->depth; } +ecb_inline void +encode_he (enc_t *enc, HE *he) +{ + if (HeKLEN (he) == HEf_SVKEY) + encode_sv (enc, HeSVKEY (he)); + else + encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); +} + static void encode_hv (enc_t *enc, HV *hv) { @@ -259,10 +269,29 @@ while ((he = hv_iternext (hv))) { - if (HeKLEN (he) == HEf_SVKEY) - encode_sv (enc, HeSVKEY (he)); + if (ecb_expect_false (enc->cbor.flags & (F_DEDUP_STRINGS | F_DEDUP_KEYS))) + { + SV **svp; + + if (HeKLEN (he) == HEf_SVKEY) + svp = hv_fetch_ent (enc->stringref[!! SvUTF8 (HeSVKEY (he))], HeSVKEY (he) , 1, 0);//TODO return HE :/ + else + svp = hv_fetch (enc->stringref[!! HeKUTF8 (he) ], HeKEY (he), HeKLEN (he), 1); + + if (SvOK (*svp)) + { + encode_tag (enc, CBOR_TAG_STRINGREF); + encode_uint (enc, 0x00, SvUV (*svp)); + } + else + { + sv_setuv (*svp, enc->stringref_idx); + ++enc->stringref_idx; + encode_he (enc, he); + } + } else - encode_str (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); + encode_he (enc, he); encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); } @@ -477,6 +506,14 @@ enc.end = SvEND (enc.sv); SvPOK_only (enc.sv); + + if (cbor->flags & (F_DEDUP_STRINGS | F_DEDUP_KEYS)) + { + encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE); + enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ()); + enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ()); + } + encode_sv (&enc, scalar); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); @@ -501,6 +538,7 @@ U32 depth; // recursion depth U32 maxdepth; // recursion depth limit AV *shareable; + AV *stringref; } dec_t; #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE @@ -617,6 +655,9 @@ dec->cur += len; + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, newSVpvn (key, len)); + hv_store (hv, key, len, decode_sv (dec), 0); } else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) @@ -626,6 +667,9 @@ dec->cur += len; + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, newSVpvn_utf8 (key, len, 1)); + hv_store (hv, key, -len, decode_sv (dec), 0); } else @@ -716,6 +760,9 @@ if (utf8) SvUTF8_on (sv); + if (ecb_expect_false (dec->stringref)) + av_push (dec->stringref, SvREFCNT_inc_NN (sv)); + return sv; fail: @@ -734,10 +781,39 @@ switch (tag) { case CBOR_TAG_MAGIC: - return decode_sv (dec); + sv = decode_sv (dec); + break; case CBOR_TAG_INDIRECTION: - return newRV_noinc (decode_sv (dec)); + sv = newRV_noinc (decode_sv (dec)); + break; + + case CBOR_TAG_STRINGREF_NAMESPACE: + { + ENTER; SAVETMPS; + + SAVESPTR (dec->stringref); + dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); + + sv = decode_sv (dec); + + FREETMPS; LEAVE; + } + break; + + case CBOR_TAG_STRINGREF: + { + if ((*dec->cur >> 5) != 0) + ERR ("corrupted CBOR data (stringref index not an unsigned integer)"); + + UV idx = decode_uint (dec); + + if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) + ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); + + sv = newSVsv (AvARRAY (dec->stringref)[idx]); + } + break; case CBOR_TAG_VALUE_SHAREABLE: { @@ -751,8 +827,7 @@ sv_setsv (sv, osv); SvREFCNT_dec_NN (osv); } - - return sv; + break; case CBOR_TAG_VALUE_SHAREDREF: { @@ -761,11 +836,12 @@ UV idx = decode_uint (dec); - if (!dec->shareable || idx > AvFILLp (dec->shareable)) + if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) ERR ("corrupted CBOR data (sharedref index out of bounds)"); - return SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); + sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); } + break; case CBOR_TAG_PERL_OBJECT: { @@ -815,9 +891,8 @@ PUTBACK; FREETMPS; LEAVE; - - return sv; } + break; default: { @@ -831,10 +906,13 @@ ? cbor_tagged_stash : gv_stashpv ("CBOR::XS::Tagged" , 1); - return sv_bless (newRV_noinc ((SV *)av), tagged_stash); + sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash); } + break; } + return sv; + fail: SvREFCNT_dec (sv); return &PL_sv_undef;