--- CBOR-XS/XS.xs 2016/04/24 19:31:55 1.52 +++ CBOR-XS/XS.xs 2016/11/26 00:47:02 1.60 @@ -101,9 +101,11 @@ #define F_ALLOW_UNKNOWN 0x00000002UL #define F_ALLOW_SHARING 0x00000004UL #define F_ALLOW_CYCLES 0x00000008UL -#define F_PACK_STRINGS 0x00000010UL -#define F_UTF8_STRINGS 0x00000020UL -#define F_VALIDATE_UTF8 0x00000040UL +#define F_FORBID_OBJECTS 0x00000010UL +#define F_PACK_STRINGS 0x00000020UL +#define F_TEXT_KEYS 0x00000040UL +#define F_TEXT_STRINGS 0x00000080UL +#define F_VALIDATE_UTF8 0x00000100UL #define INIT_SIZE 32 // initial scalar size to be allocated @@ -279,31 +281,36 @@ encode_uint (enc, MAJOR_TAG, tag); } +// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded +ecb_noinline static void +encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len) +{ + STRLEN ulen = len; + U8 *p, *pend = (U8 *)str + len; + + for (p = (U8 *)str; p < pend; ++p) + ulen += *p >> 7; // count set high bits + + encode_uint (enc, MAJOR_TEXT, ulen); + + need (enc, ulen); + for (p = (U8 *)str; p < pend; ++p) + if (*p < 0x80) + *enc->cur++ = *p; + else + { + *enc->cur++ = 0xc0 + (*p >> 6); + *enc->cur++ = 0x80 + (*p & 63); + } +} + ecb_inline void -encode_str (enc_t *enc, int utf8, char *str, STRLEN len) +encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { - if (ecb_expect_false (enc->cbor.flags & F_UTF8_STRINGS)) + if (ecb_expect_false (upgrade_utf8)) if (!utf8) { - // exceptional path for bytze strings that need to be utf8-encoded - STRLEN ulen = len; - U8 *p, *pend = (U8 *)str + len; - - for (p = (U8 *)str; p < pend; ++p) - ulen += *p >> 7; // count set high bits - - encode_uint (enc, MAJOR_TEXT, ulen); - - need (enc, ulen); - for (p = (U8 *)str; p < pend; ++p) - if (*p < 0x80) - *enc->cur++ = *p; - else - { - *enc->cur++ = 0xc0 + (*p >> 6); - *enc->cur++ = 0x80 + (*p & 63); - } - + encode_str_utf8 (enc, utf8, str, len); return; } @@ -313,8 +320,8 @@ enc->cur += len; } -static void -encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) +ecb_inline void +encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) { @@ -335,7 +342,7 @@ } } - encode_str (enc, utf8, str, len); + encode_str (enc, upgrade_utf8, utf8, str, len); } static void encode_sv (enc_t *enc, SV *sv); @@ -391,7 +398,7 @@ if (HeKLEN (he) == HEf_SVKEY) encode_sv (enc, HeSVKEY (he)); else - encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); + encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); } @@ -473,7 +480,10 @@ HV *stash = SvSTASH (sv); GV *method; - if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) + if (enc->cbor.flags & F_FORBID_OBJECTS) + croak ("encountered object '%s', but forbid_objects is enabled", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) { dSP; @@ -519,7 +529,7 @@ encode_tag (enc, CBOR_TAG_PERL_OBJECT); encode_uint (enc, MAJOR_ARRAY, count + 1); - encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); + encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); while (count) encode_sv (enc, SP[1 - count--]); @@ -588,7 +598,7 @@ { STRLEN len; char *str = SvPV (sv, len); - encode_strref (enc, SvUTF8 (sv), str, len); + encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); } else if (SvNOKp (sv)) encode_nv (enc, sv); @@ -657,13 +667,52 @@ AV *shareable; AV *stringref; SV *decode_tagged; + SV *err_sv; // optional sv for error, needs to be freed } dec_t; -#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE +// set dec->err to ERRSV +ecb_cold static void +err_errsv (dec_t *dec) +{ + if (!dec->err) + { + dec->err_sv = newSVsv (ERRSV); + + // chop off the trailing \n + SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1); + *SvEND (dec->err_sv) = 0; + + dec->err = SvPVutf8_nolen (dec->err_sv); + } +} + +// the following functions are used to reduce code size and help the compiler to optimise +ecb_cold static void +err_set (dec_t *dec, const char *reason) +{ + if (!dec->err) + dec->err = reason; +} + +ecb_cold static void +err_unexpected_end (dec_t *dec) +{ + err_set (dec, "unexpected end of CBOR data"); +} + +ecb_cold static void +err_nesting_exceeded (dec_t *dec) +{ + err_set (dec, ERR_NESTING_EXCEEDED); +} + +#define ERR_DO(do) SB do; goto fail; SE +#define ERR(reason) ERR_DO (err_set (dec, reason)) +#define ERR_ERRSV ERR_DO (err_errsv (dec)) -#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") +#define WANT(len) if (ecb_expect_false ((UV)(dec->end - dec->cur) < (UV)len)) ERR_DO (err_unexpected_end (dec)) -#define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) +#define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED) #define DEC_DEC_DEPTH --dec->depth static UV @@ -750,7 +799,7 @@ } else { - int i, len = decode_uint (dec); + UV i, len = decode_uint (dec); WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays av_fill (av, len - 1); @@ -777,7 +826,7 @@ if (ecb_expect_true (!dec->stringref)) if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) { - I32 len = decode_uint (dec); + STRLEN len = decode_uint (dec); char *key = (char *)dec->cur; WANT (len); @@ -789,7 +838,7 @@ } else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) { - I32 len = decode_uint (dec); + STRLEN len = decode_uint (dec); char *key = (char *)dec->cur; WANT (len); @@ -807,6 +856,34 @@ SV *k = decode_sv (dec); SV *v = decode_sv (dec); + // we leak memory if uncaught exceptions are thrown by random magical + // methods, and this is hopefully the only place where it can happen, + // so if there is a chance of an exception, take the very slow path. + // since catching exceptions is "undocumented/internal/forbidden" by + // the new p5p powers, we need to call out to a perl function :/ + if (ecb_expect_false (SvAMAGIC (k))) + { + dSP; + + ENTER; SAVETMPS; + PUSHMARK (SP); + EXTEND (SP, 3); + PUSHs (sv_2mortal (newRV_inc ((SV *)hv))); + PUSHs (sv_2mortal (k)); + PUSHs (sv_2mortal (v)); + + PUTBACK; + call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL); + SPAGAIN; + + FREETMPS; LEAVE; + + if (SvTRUE (ERRSV)) + ERR_ERRSV; + + return; + } + hv_store_ent (hv, k, v, 0); SvREFCNT_dec (k); @@ -840,7 +917,9 @@ } else { - int pairs = decode_uint (dec); + UV pairs = decode_uint (dec); + + WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes while (pairs--) decode_he (dec, hv); @@ -938,14 +1017,16 @@ case CBOR_TAG_STRINGREF_NAMESPACE: { - ENTER; SAVETMPS; + // do nmot use SAVETMPS/FREETMPS, as these will + // erase mortalised caches, e.g. "shareable" + ENTER; SAVESPTR (dec->stringref); dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); sv = decode_sv (dec); - FREETMPS; LEAVE; + LEAVE; } break; @@ -1006,6 +1087,9 @@ case CBOR_TAG_PERL_OBJECT: { + if (dec->cbor.flags & F_FORBID_OBJECTS) + goto filter; + sv = decode_sv (dec); if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) @@ -1044,7 +1128,7 @@ if (SvTRUE (ERRSV)) { FREETMPS; LEAVE; - ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + ERR_ERRSV; } SvREFCNT_dec (sv); @@ -1057,7 +1141,10 @@ break; default: + filter: { + SV *tag_sv = newSVuv (tag); + sv = decode_sv (dec); dSP; @@ -1065,7 +1152,7 @@ SAVESTACK_POS (); PUSHMARK (SP); EXTEND (SP, 2); - PUSHs (newSVuv (tag)); + PUSHs (tag_sv); PUSHs (sv); PUTBACK; @@ -1074,19 +1161,21 @@ if (SvTRUE (ERRSV)) { + SvREFCNT_dec (tag_sv); FREETMPS; LEAVE; - ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + ERR_ERRSV; } if (count) { + SvREFCNT_dec (tag_sv); SvREFCNT_dec (sv); sv = SvREFCNT_inc (POPs); } else { AV *av = newAV (); - av_push (av, newSVuv (tag)); + av_push (av, tag_sv); av_push (av, sv); HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash @@ -1225,7 +1314,7 @@ { if (dec.shareable) { - // need to break cyclic links, which whould all be in shareable + // need to break cyclic links, which would all be in shareable int i; SV **svp; @@ -1235,6 +1324,10 @@ } SvREFCNT_dec (sv); + + if (dec.err_sv) + sv_2mortal (dec.err_sv); + croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); } @@ -1388,6 +1481,8 @@ sv_cbor = newSVpv ("CBOR", 0); SvREADONLY_on (sv_cbor); + + assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1)); } PROTOTYPES: DISABLE @@ -1417,8 +1512,10 @@ allow_unknown = F_ALLOW_UNKNOWN allow_sharing = F_ALLOW_SHARING allow_cycles = F_ALLOW_CYCLES + forbid_objects = F_FORBID_OBJECTS pack_strings = F_PACK_STRINGS - utf8_strings = F_UTF8_STRINGS + text_keys = F_TEXT_KEYS + text_strings = F_TEXT_STRINGS validate_utf8 = F_VALIDATE_UTF8 PPCODE: { @@ -1436,7 +1533,10 @@ get_allow_unknown = F_ALLOW_UNKNOWN get_allow_sharing = F_ALLOW_SHARING get_allow_cycles = F_ALLOW_CYCLES + get_forbid_objects = F_FORBID_OBJECTS get_pack_strings = F_PACK_STRINGS + get_text_keys = F_TEXT_KEYS + get_text_strings = F_TEXT_STRINGS get_validate_utf8 = F_VALIDATE_UTF8 PPCODE: XPUSHs (boolSV (self->flags & ix));