--- CBOR-XS/XS.xs 2014/02/18 22:12:12 1.44 +++ CBOR-XS/XS.xs 2016/11/25 13:27:29 1.58 @@ -102,7 +102,9 @@ #define F_ALLOW_SHARING 0x00000004UL #define F_ALLOW_CYCLES 0x00000008UL #define F_PACK_STRINGS 0x00000010UL -#define F_VALIDATE_UTF8 0x00000020UL +#define F_TEXT_KEYS 0x00000020UL +#define F_TEXT_STRINGS 0x00000040UL +#define F_VALIDATE_UTF8 0x00000080UL #define INIT_SIZE 32 // initial scalar size to be allocated @@ -278,17 +280,47 @@ encode_uint (enc, MAJOR_TAG, tag); } +// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded +ecb_noinline static void +encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len) +{ + STRLEN ulen = len; + U8 *p, *pend = (U8 *)str + len; + + for (p = (U8 *)str; p < pend; ++p) + ulen += *p >> 7; // count set high bits + + encode_uint (enc, MAJOR_TEXT, ulen); + + need (enc, ulen); + for (p = (U8 *)str; p < pend; ++p) + if (*p < 0x80) + *enc->cur++ = *p; + else + { + *enc->cur++ = 0xc0 + (*p >> 6); + *enc->cur++ = 0x80 + (*p & 63); + } +} + ecb_inline void -encode_str (enc_t *enc, int utf8, char *str, STRLEN len) +encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { + if (ecb_expect_false (upgrade_utf8)) + if (!utf8) + { + encode_str_utf8 (enc, utf8, str, len); + return; + } + encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); need (enc, len); memcpy (enc->cur, str, len); enc->cur += len; } -static void -encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) +ecb_inline void +encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) { @@ -309,7 +341,7 @@ } } - encode_str (enc, utf8, str, len); + encode_str (enc, upgrade_utf8, utf8, str, len); } static void encode_sv (enc_t *enc, SV *sv); @@ -326,11 +358,18 @@ encode_uint (enc, MAJOR_ARRAY, len + 1); - for (i = 0; i <= len; ++i) - { - SV **svp = av_fetch (av, i, 0); - encode_sv (enc, svp ? *svp : &PL_sv_undef); - } + if (SvMAGICAL (av)) + for (i = 0; i <= len; ++i) + { + SV **svp = av_fetch (av, i, 0); + encode_sv (enc, svp ? *svp : &PL_sv_undef); + } + else + for (i = 0; i <= len; ++i) + { + SV *sv = AvARRAY (av)[i]; + encode_sv (enc, sv ? sv : &PL_sv_undef); + } --enc->depth; } @@ -358,7 +397,7 @@ if (HeKLEN (he) == HEf_SVKEY) encode_sv (enc, HeSVKEY (he)); else - encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); + encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); } @@ -444,7 +483,8 @@ { dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); // we re-bless the reference to get overload and other niceties right XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); @@ -467,7 +507,9 @@ { dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); EXTEND (SP, 2); // we re-bless the reference to get overload and other niceties right PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); @@ -483,7 +525,7 @@ encode_tag (enc, CBOR_TAG_PERL_OBJECT); encode_uint (enc, MAJOR_ARRAY, count + 1); - encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); + encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); while (count) encode_sv (enc, SP[1 - count--]); @@ -552,7 +594,7 @@ { STRLEN len; char *str = SvPV (sv, len); - encode_strref (enc, SvUTF8 (sv), str, len); + encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); } else if (SvNOKp (sv)) encode_nv (enc, sv); @@ -579,7 +621,7 @@ static SV * encode_cbor (SV *scalar, CBOR *cbor) { - enc_t enc = { }; + enc_t enc = { 0 }; enc.cbor = *cbor; enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); @@ -625,9 +667,9 @@ #define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE -#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") +#define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR ("unexpected end of CBOR data") -#define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) +#define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED) #define DEC_DEC_DEPTH --dec->depth static UV @@ -714,7 +756,7 @@ } else { - int i, len = decode_uint (dec); + UV i, len = decode_uint (dec); WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays av_fill (av, len - 1); @@ -741,20 +783,22 @@ if (ecb_expect_true (!dec->stringref)) if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) { - I32 len = decode_uint (dec); + STRLEN len = decode_uint (dec); char *key = (char *)dec->cur; + WANT (len); dec->cur += len; - hv_store (hv, key, len, decode_sv (dec), 0); + hv_store (hv, key, len, decode_sv (dec), 0); return; } else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) { - I32 len = decode_uint (dec); + STRLEN len = decode_uint (dec); char *key = (char *)dec->cur; + WANT (len); dec->cur += len; if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) @@ -802,7 +846,9 @@ } else { - int pairs = decode_uint (dec); + UV pairs = decode_uint (dec); + + WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes while (pairs--) decode_he (dec, hv); @@ -900,14 +946,16 @@ case CBOR_TAG_STRINGREF_NAMESPACE: { - ENTER; SAVETMPS; + // do nmot use SAVETMPS/FREETMPS, as these will + // erase mortalised caches, e.g. "shareable" + ENTER; SAVESPTR (dec->stringref); dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); sv = decode_sv (dec); - FREETMPS; LEAVE; + LEAVE; } break; @@ -987,7 +1035,8 @@ dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); EXTEND (SP, len + 1); // we re-bless the reference to get overload and other niceties right PUSHs (*av_fetch (av, 0, 1)); @@ -1019,12 +1068,16 @@ default: { + SV *tag_sv = newSVuv (tag); + sv = decode_sv (dec); dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); EXTEND (SP, 2); - PUSHs (newSVuv (tag)); + PUSHs (tag_sv); PUSHs (sv); PUTBACK; @@ -1033,19 +1086,21 @@ if (SvTRUE (ERRSV)) { + SvREFCNT_dec (tag_sv); FREETMPS; LEAVE; ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); } if (count) { + SvREFCNT_dec (tag_sv); SvREFCNT_dec (sv); sv = SvREFCNT_inc (POPs); } else { AV *av = newAV (); - av_push (av, newSVuv (tag)); + av_push (av, tag_sv); av_push (av, sv); HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash @@ -1158,7 +1213,7 @@ static SV * decode_cbor (SV *string, CBOR *cbor, char **offset_return) { - dec_t dec = { }; + dec_t dec = { 0 }; SV *sv; STRLEN len; char *data = SvPVbyte (string, len); @@ -1184,7 +1239,7 @@ { if (dec.shareable) { - // need to break cyclic links, which whould all be in shareable + // need to break cyclic links, which would all be in shareable int i; SV **svp; @@ -1259,6 +1314,10 @@ switch (major) { + case MAJOR_TAG >> MAJOR_SHIFT: + ++count; // tags merely prefix another value + break; + case MAJOR_BYTES >> MAJOR_SHIFT: case MAJOR_TEXT >> MAJOR_SHIFT: case MAJOR_ARRAY >> MAJOR_SHIFT: @@ -1343,6 +1402,8 @@ sv_cbor = newSVpv ("CBOR", 0); SvREADONLY_on (sv_cbor); + + assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1)); } PROTOTYPES: DISABLE @@ -1373,6 +1434,8 @@ allow_sharing = F_ALLOW_SHARING allow_cycles = F_ALLOW_CYCLES pack_strings = F_PACK_STRINGS + text_keys = F_TEXT_KEYS + text_strings = F_TEXT_STRINGS validate_utf8 = F_VALIDATE_UTF8 PPCODE: { @@ -1391,6 +1454,8 @@ get_allow_sharing = F_ALLOW_SHARING get_allow_cycles = F_ALLOW_CYCLES get_pack_strings = F_PACK_STRINGS + get_text_keys = F_TEXT_KEYS + get_text_strings = F_TEXT_STRINGS get_validate_utf8 = F_VALIDATE_UTF8 PPCODE: XPUSHs (boolSV (self->flags & ix));