--- CBOR-XS/XS.xs 2013/12/01 14:45:03 1.39 +++ CBOR-XS/XS.xs 2020/11/30 18:30:29 1.68 @@ -8,7 +8,9 @@ #include #include #include +#include +#define ECB_NO_THREADS 1 #include "ecb.h" // compatibility with perl <5.18 @@ -21,6 +23,9 @@ #ifndef HvNAMEUTF8 # define HvNAMEUTF8(hv) 0 #endif +#ifndef SvREFCNT_inc_NN +# define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv) +#endif #ifndef SvREFCNT_dec_NN # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) #endif @@ -96,12 +101,28 @@ CBOR_TAG_MAGIC = 55799, // self-describe cbor }; +// known forced types, also hardcoded in CBOR.pm +enum +{ + AS_CBOR = 0, + AS_INT = 1, + AS_BYTES = 2, + AS_TEXT = 3, + AS_FLOAT16 = 4, + AS_FLOAT32 = 5, + AS_FLOAT64 = 6, + // possibly future enhancements: (generic) float, (generic) string +}; + #define F_SHRINK 0x00000001UL #define F_ALLOW_UNKNOWN 0x00000002UL #define F_ALLOW_SHARING 0x00000004UL #define F_ALLOW_CYCLES 0x00000008UL -#define F_PACK_STRINGS 0x00000010UL -#define F_VALIDATE_UTF8 0x00000020UL +#define F_FORBID_OBJECTS 0x00000010UL +#define F_PACK_STRINGS 0x00000020UL +#define F_TEXT_KEYS 0x00000040UL +#define F_TEXT_STRINGS 0x00000080UL +#define F_VALIDATE_UTF8 0x00000100UL #define INIT_SIZE 32 // initial scalar size to be allocated @@ -130,6 +151,11 @@ U32 max_depth; STRLEN max_size; SV *filter; + + // for the incremental parser + STRLEN incr_pos; // the current offset into the text + STRLEN incr_need; // minimum bytes needed to decode + AV *incr_count; // for every nesting level, the number of outstanding values, or -1 for indef. } CBOR; ecb_inline void @@ -143,6 +169,7 @@ cbor_free (CBOR *cbor) { SvREFCNT_dec (cbor->filter); + SvREFCNT_dec (cbor->incr_count); } ///////////////////////////////////////////////////////////////////////////// @@ -178,15 +205,11 @@ ecb_inline int minimum_string_length (UV idx) { - return idx > 23 - ? idx > 0xffU - ? idx > 0xffffU - ? idx > 0xffffffffU - ? 11 - : 7 - : 5 - : 4 - : 3; + return idx <= 23 ? 3 + : idx <= 0xffU ? 4 + : idx <= 0xffffU ? 5 + : idx <= 0xffffffffU ? 7 + : 11; } ///////////////////////////////////////////////////////////////////////////// @@ -209,7 +232,7 @@ ecb_inline void need (enc_t *enc, STRLEN len) { - if (ecb_expect_false (enc->cur + len >= enc->end)) + if (ecb_expect_false ((uintptr_t)(enc->end - enc->cur) < len)) { STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); @@ -225,6 +248,7 @@ *enc->cur++ = ch; } +// used for tags, intregers, element counts and so on static void encode_uint (enc_t *enc, int major, UV len) { @@ -265,23 +289,65 @@ } } +// encodes a perl value into a CBOR integer +ecb_inline void +encode_int (enc_t *enc, SV *sv) +{ + if (SvIsUV (sv)) + encode_uint (enc, MAJOR_POS_INT, SvUVX (sv)); + else if (SvIVX (sv) >= 0) + encode_uint (enc, MAJOR_POS_INT, SvIVX (sv)); + else + encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1)); +} + ecb_inline void encode_tag (enc_t *enc, UV tag) { encode_uint (enc, MAJOR_TAG, tag); } +// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded +ecb_noinline static void +encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len) +{ + STRLEN ulen = len; + U8 *p, *pend = (U8 *)str + len; + + for (p = (U8 *)str; p < pend; ++p) + ulen += *p >> 7; // count set high bits + + encode_uint (enc, MAJOR_TEXT, ulen); + + need (enc, ulen); + for (p = (U8 *)str; p < pend; ++p) + if (*p < 0x80) + *enc->cur++ = *p; + else + { + *enc->cur++ = 0xc0 + (*p >> 6); + *enc->cur++ = 0x80 + (*p & 63); + } +} + ecb_inline void -encode_str (enc_t *enc, int utf8, char *str, STRLEN len) +encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { + if (ecb_expect_false (upgrade_utf8)) + if (!utf8) + { + encode_str_utf8 (enc, utf8, str, len); + return; + } + encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); need (enc, len); memcpy (enc->cur, str, len); enc->cur += len; } -static void -encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) +ecb_inline void +encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) { @@ -302,7 +368,97 @@ } } - encode_str (enc, utf8, str, len); + encode_str (enc, upgrade_utf8, utf8, str, len); +} + +ecb_inline void +encode_float16 (enc_t *enc, NV nv) +{ + need (enc, 1+2); + + *enc->cur++ = MAJOR_MISC | MISC_FLOAT16; + + uint16_t fp = ecb_float_to_binary16 (nv); + + if (!ecb_big_endian ()) + fp = ecb_bswap16 (fp); + + memcpy (enc->cur, &fp, 2); + enc->cur += 2; +} + +ecb_inline void +encode_float32 (enc_t *enc, NV nv) +{ + need (enc, 1+4); + + *enc->cur++ = MAJOR_MISC | MISC_FLOAT32; + + uint32_t fp = ecb_float_to_binary32 (nv); + + if (!ecb_big_endian ()) + fp = ecb_bswap32 (fp); + + memcpy (enc->cur, &fp, 4); + enc->cur += 4; +} + +ecb_inline void +encode_float64 (enc_t *enc, NV nv) +{ + need (enc, 1+8); + + *enc->cur++ = MAJOR_MISC | MISC_FLOAT64; + + uint64_t fp = ecb_double_to_binary64 (nv); + + if (!ecb_big_endian ()) + fp = ecb_bswap64 (fp); + + memcpy (enc->cur, &fp, 8); + enc->cur += 8; +} + +ecb_inline void +encode_forced (enc_t *enc, UV type, SV *sv) +{ + switch (type) + { + case AS_CBOR: + { + STRLEN len; + char *str = SvPVbyte (sv, len); + + need (enc, len); + memcpy (enc->cur, str, len); + enc->cur += len; + } + break; + + case AS_BYTES: + { + STRLEN len; + char *str = SvPVbyte (sv, len); + encode_strref (enc, 0, 0, str, len); + } + break; + + case AS_TEXT: + { + STRLEN len; + char *str = SvPVutf8 (sv, len); + encode_strref (enc, 1, 1, str, len); + } + break; + + case AS_INT: encode_int (enc, sv); break; + case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break; + case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break; + case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break; + + default: + croak ("encountered malformed CBOR::XS::Tagged object"); + } } static void encode_sv (enc_t *enc, SV *sv); @@ -319,11 +475,18 @@ encode_uint (enc, MAJOR_ARRAY, len + 1); - for (i = 0; i <= len; ++i) - { - SV **svp = av_fetch (av, i, 0); - encode_sv (enc, svp ? *svp : &PL_sv_undef); - } + if (ecb_expect_false (SvMAGICAL (av))) + for (i = 0; i <= len; ++i) + { + SV **svp = av_fetch (av, i, 0); + encode_sv (enc, svp ? *svp : &PL_sv_undef); + } + else + for (i = 0; i <= len; ++i) + { + SV *sv = AvARRAY (av)[i]; + encode_sv (enc, sv ? sv : &PL_sv_undef); + } --enc->depth; } @@ -341,7 +504,7 @@ int pairs = hv_iterinit (hv); int mg = SvMAGICAL (hv); - if (mg) + if (ecb_expect_false (mg)) encode_ch (enc, MAJOR_MAP | MINOR_INDEF); else encode_uint (enc, MAJOR_MAP, pairs); @@ -351,12 +514,12 @@ if (HeKLEN (he) == HEf_SVKEY) encode_sv (enc, HeSVKEY (he)); else - encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); + encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); } - if (mg) + if (ecb_expect_false (mg)) encode_ch (enc, MAJOR_MISC | MINOR_INDEF); --enc->depth; @@ -399,8 +562,22 @@ if (svt != SVt_PVAV) croak ("encountered CBOR::XS::Tagged object that isn't an array"); - encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); - encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); + switch (av_len ((AV *)sv)) + { + case 2-1: + // actually a tagged value + encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); + encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); + break; + + case 3-1: + // a forced type [value, type, undef] + encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1)); + break; + + default: + croak ("encountered malformed CBOR::XS::Tagged object"); + } return; } @@ -409,7 +586,7 @@ if (ecb_expect_false (SvREFCNT (sv) > 1) && ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)) { - if (!enc->shareable) + if (ecb_expect_false (!enc->shareable)) enc->shareable = (HV *)sv_2mortal ((SV *)newHV ()); SV **svp = hv_fetch (enc->shareable, (char *)&sv, sizeof (sv), 1); @@ -433,11 +610,15 @@ HV *stash = SvSTASH (sv); GV *method; - if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) + if (enc->cbor.flags & F_FORBID_OBJECTS) + croak ("encountered object '%s', but forbid_objects is enabled", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + else if ((method = gv_fetchmethod_autoload (stash, "TO_CBOR", 0))) { dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); // we re-bless the reference to get overload and other niceties right XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); @@ -460,7 +641,8 @@ { dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); EXTEND (SP, 2); // we re-bless the reference to get overload and other niceties right PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); @@ -476,10 +658,16 @@ encode_tag (enc, CBOR_TAG_PERL_OBJECT); encode_uint (enc, MAJOR_ARRAY, count + 1); - encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); + encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); - while (count) - encode_sv (enc, SP[1 - count--]); + { + int i; + + for (i = 0; i < count; ++i) + encode_sv (enc, SP[i + 1 - count]); + + SP -= count; + } PUTBACK; @@ -511,29 +699,9 @@ encode_uint (enc, MAJOR_POS_INT, (U32)nv); //TODO: maybe I32? else if (ecb_expect_false (nv == (float)nv)) - { - uint32_t fp = ecb_float_to_binary32 (nv); - - *enc->cur++ = MAJOR_MISC | MISC_FLOAT32; - - if (!ecb_big_endian ()) - fp = ecb_bswap32 (fp); - - memcpy (enc->cur, &fp, 4); - enc->cur += 4; - } + encode_float32 (enc, nv); else - { - uint64_t fp = ecb_double_to_binary64 (nv); - - *enc->cur++ = MAJOR_MISC | MISC_FLOAT64; - - if (!ecb_big_endian ()) - fp = ecb_bswap64 (fp); - - memcpy (enc->cur, &fp, 8); - enc->cur += 8; - } + encode_float64 (enc, nv); } static void @@ -545,19 +713,12 @@ { STRLEN len; char *str = SvPV (sv, len); - encode_strref (enc, SvUTF8 (sv), str, len); + encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); } else if (SvNOKp (sv)) encode_nv (enc, sv); else if (SvIOKp (sv)) - { - if (SvIsUV (sv)) - encode_uint (enc, MAJOR_POS_INT, SvUVX (sv)); - else if (SvIVX (sv) >= 0) - encode_uint (enc, MAJOR_POS_INT, SvIVX (sv)); - else - encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1)); - } + encode_int (enc, sv); else if (SvROK (sv)) encode_rv (enc, SvRV (sv)); else if (!SvOK (sv)) @@ -572,12 +733,12 @@ static SV * encode_cbor (SV *scalar, CBOR *cbor) { - enc_t enc = { }; + enc_t enc = { 0 }; - enc.cbor = *cbor; - enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); - enc.cur = SvPVX (enc.sv); - enc.end = SvEND (enc.sv); + enc.cbor = *cbor; + enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); + enc.cur = SvPVX (enc.sv); + enc.end = SvEND (enc.sv); SvPOK_only (enc.sv); @@ -614,13 +775,46 @@ AV *shareable; AV *stringref; SV *decode_tagged; + SV *err_sv; // optional sv for error, needs to be freed } dec_t; -#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE +// set dec->err to ERRSV +ecb_cold static void +err_errsv (dec_t *dec) +{ + if (!dec->err) + { + dec->err_sv = newSVsv (ERRSV); -#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") + // chop off the trailing \n + SvCUR_set (dec->err_sv, SvCUR (dec->err_sv) - 1); + *SvEND (dec->err_sv) = 0; -#define DEC_INC_DEPTH if (++dec->depth > dec->cbor.max_depth) ERR (ERR_NESTING_EXCEEDED) + dec->err = SvPVutf8_nolen (dec->err_sv); + } +} + +// the following functions are used to reduce code size and help the compiler to optimise +ecb_cold static void +err_set (dec_t *dec, const char *reason) +{ + if (!dec->err) + dec->err = reason; +} + +ecb_cold static void +err_unexpected_end (dec_t *dec) +{ + err_set (dec, "unexpected end of CBOR data"); +} + +#define ERR_DO(do) SB do; goto fail; SE +#define ERR(reason) ERR_DO (err_set (dec, reason)) +#define ERR_ERRSV ERR_DO (err_errsv (dec)) + +#define WANT(len) if (ecb_expect_false ((uintptr_t)(dec->end - dec->cur) < (STRLEN)len)) ERR_DO (err_unexpected_end (dec)) + +#define DEC_INC_DEPTH if (ecb_expect_false (++dec->depth > dec->cbor.max_depth)) ERR (ERR_NESTING_EXCEEDED) #define DEC_DEC_DEPTH --dec->depth static UV @@ -696,7 +890,7 @@ { WANT (1); - if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) + if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err) { ++dec->cur; break; @@ -707,7 +901,7 @@ } else { - int i, len = decode_uint (dec); + UV i, len = decode_uint (dec); WANT (len); // complexity check for av_fill - need at least one byte per value, do not allow supersize arrays av_fill (av, len - 1); @@ -720,7 +914,7 @@ return newRV_noinc ((SV *)av); fail: - SvREFCNT_dec (av); + SvREFCNT_dec_NN (av); DEC_DEC_DEPTH; return &PL_sv_undef; } @@ -732,22 +926,24 @@ // byte or utf-8 strings as keys, but only when !stringref if (ecb_expect_true (!dec->stringref)) - if (ecb_expect_true ((*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) + if (ecb_expect_true ((U8)(*dec->cur - MAJOR_BYTES) <= LENGTH_EXT8)) { - I32 len = decode_uint (dec); + STRLEN len = decode_uint (dec); char *key = (char *)dec->cur; + WANT (len); dec->cur += len; - hv_store (hv, key, len, decode_sv (dec), 0); + hv_store (hv, key, len, decode_sv (dec), 0); return; } - else if (ecb_expect_true ((*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) + else if (ecb_expect_true ((U8)(*dec->cur - MAJOR_TEXT) <= LENGTH_EXT8)) { - I32 len = decode_uint (dec); + STRLEN len = decode_uint (dec); char *key = (char *)dec->cur; + WANT (len); dec->cur += len; if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) @@ -762,8 +958,36 @@ SV *k = decode_sv (dec); SV *v = decode_sv (dec); + // we leak memory if uncaught exceptions are thrown by random magical + // methods, and this is hopefully the only place where it can happen, + // so if there is a chance of an exception, take the very slow path. + // since catching exceptions is "undocumented/internal/forbidden" by + // the new p5p powers, we need to call out to a perl function :/ + if (ecb_expect_false (SvAMAGIC (k))) + { + dSP; + + ENTER; SAVETMPS; + PUSHMARK (SP); + EXTEND (SP, 3); + PUSHs (sv_2mortal (newRV_inc ((SV *)hv))); + PUSHs (sv_2mortal (k)); + PUSHs (sv_2mortal (v)); + + PUTBACK; + call_pv ("CBOR::XS::_hv_store", G_VOID | G_DISCARD | G_EVAL); + SPAGAIN; + + FREETMPS; LEAVE; + + if (SvTRUE (ERRSV)) + ERR_ERRSV; + + return; + } + hv_store_ent (hv, k, v, 0); - SvREFCNT_dec (k); + SvREFCNT_dec_NN (k); fail: ; @@ -784,7 +1008,7 @@ { WANT (1); - if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) + if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err) { ++dec->cur; break; @@ -795,7 +1019,9 @@ } else { - int pairs = decode_uint (dec); + UV pairs = decode_uint (dec); + + WANT (pairs); // complexity check - need at least one byte per value, do not allow supersize hashes while (pairs--) decode_he (dec, hv); @@ -805,7 +1031,7 @@ return newRV_noinc ((SV *)hv); fail: - SvREFCNT_dec (hv); + SvREFCNT_dec_NN (hv); DEC_DEC_DEPTH; return &PL_sv_undef; } @@ -815,7 +1041,7 @@ { SV *sv = 0; - if ((*dec->cur & MINOR_MASK) == MINOR_INDEF) + if (ecb_expect_false ((*dec->cur & MINOR_MASK) == MINOR_INDEF)) { // indefinite length strings ++dec->cur; @@ -893,14 +1119,16 @@ case CBOR_TAG_STRINGREF_NAMESPACE: { - ENTER; SAVETMPS; + // do not use SAVETMPS/FREETMPS, as these will + // erase mortalised caches, e.g. "shareable" + ENTER; SAVESPTR (dec->stringref); dec->stringref = (AV *)sv_2mortal ((SV *)newAV ()); sv = decode_sv (dec); - FREETMPS; LEAVE; + LEAVE; } break; @@ -911,7 +1139,7 @@ UV idx = decode_uint (dec); - if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) + if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref))) ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); sv = newSVsv (AvARRAY (dec->stringref)[idx]); @@ -949,7 +1177,7 @@ UV idx = decode_uint (dec); - if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) + if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) ERR ("corrupted CBOR data (sharedref index out of bounds)"); sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); @@ -961,6 +1189,9 @@ case CBOR_TAG_PERL_OBJECT: { + if (dec->cbor.flags & F_FORBID_OBJECTS) + goto filter; + sv = decode_sv (dec); if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) @@ -980,7 +1211,8 @@ dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); EXTEND (SP, len + 1); // we re-bless the reference to get overload and other niceties right PUSHs (*av_fetch (av, 0, 1)); @@ -998,10 +1230,10 @@ if (SvTRUE (ERRSV)) { FREETMPS; LEAVE; - ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + ERR_ERRSV; } - SvREFCNT_dec (sv); + SvREFCNT_dec_NN (sv); sv = SvREFCNT_inc (POPs); PUTBACK; @@ -1011,13 +1243,17 @@ break; default: + filter: { + SV *tag_sv = newSVuv (tag); + sv = decode_sv (dec); dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); EXTEND (SP, 2); - PUSHs (newSVuv (tag)); + PUSHs (tag_sv); PUSHs (sv); PUTBACK; @@ -1026,19 +1262,22 @@ if (SvTRUE (ERRSV)) { + SvREFCNT_dec_NN (tag_sv); FREETMPS; LEAVE; - ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); + ERR_ERRSV; } if (count) { - SvREFCNT_dec (sv); - sv = SvREFCNT_inc (POPs); + SvREFCNT_dec_NN (tag_sv); + SvREFCNT_dec_NN (sv); + sv = SvREFCNT_inc_NN (TOPs); + SP -= count; } else { AV *av = newAV (); - av_push (av, newSVuv (tag)); + av_push (av, tag_sv); av_push (av, sv); HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash @@ -1134,9 +1373,11 @@ } // 0..19 unassigned simple - // 24 reserved + unassigned (reserved values are not encodable) + // 24 reserved + unassigned simple (reserved values are not encodable) + // 28-30 unassigned misc + // 31 break code default: - ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); + ERR ("corrupted CBOR data (reserved/unassigned/unexpected major 7 value)"); } break; @@ -1149,7 +1390,7 @@ static SV * decode_cbor (SV *string, CBOR *cbor, char **offset_return) { - dec_t dec = { }; + dec_t dec = { 0 }; SV *sv; STRLEN len; char *data = SvPVbyte (string, len); @@ -1175,7 +1416,7 @@ { if (dec.shareable) { - // need to break cyclic links, which whould all be in shareable + // need to break cyclic links, which would all be in shareable int i; SV **svp; @@ -1184,7 +1425,11 @@ sv_setsv (*svp, &PL_sv_undef); } - SvREFCNT_dec (sv); + SvREFCNT_dec_NN (sv); + + if (dec.err_sv) + sv_2mortal (dec.err_sv); + croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur); } @@ -1194,6 +1439,130 @@ } ///////////////////////////////////////////////////////////////////////////// +// incremental parser + +#define INCR_DONE(cbor) (AvFILLp (cbor->incr_count) < 0) + +// returns 0 for notyet, 1 for success or error +static int +incr_parse (CBOR *self, SV *cborstr) +{ + STRLEN cur; + SvPV (cborstr, cur); + + while (ecb_expect_true (self->incr_need <= cur)) + { + // table of integer count bytes + static I8 incr_len[MINOR_MASK + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 4, 8,-1,-1,-1,-2 + }; + + const U8 *p = SvPVX (cborstr) + self->incr_pos; + U8 m = *p & MINOR_MASK; + IV count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]); + I8 ilen = incr_len[m]; + + self->incr_need = self->incr_pos + 1; + + if (ecb_expect_false (ilen < 0)) + { + if (m != MINOR_INDEF) + return 1; // error + + if (*p == (MAJOR_MISC | MINOR_INDEF)) + { + if (count >= 0) + return 1; // error + + count = 1; + } + else + { + av_push (self->incr_count, newSViv (-1)); //TODO: nest + count = -1; + } + } + else + { + self->incr_need += ilen; + if (ecb_expect_false (self->incr_need > cur)) + return 0; + + int major = *p >> MAJOR_SHIFT; + + switch (major) + { + case MAJOR_TAG >> MAJOR_SHIFT: + ++count; // tags merely prefix another value + break; + + case MAJOR_BYTES >> MAJOR_SHIFT: + case MAJOR_TEXT >> MAJOR_SHIFT: + case MAJOR_ARRAY >> MAJOR_SHIFT: + case MAJOR_MAP >> MAJOR_SHIFT: + { + UV len; + + if (ecb_expect_false (ilen)) + { + len = 0; + + do { + len = (len << 8) | *++p; + } while (--ilen); + } + else + len = m; + + switch (major) + { + case MAJOR_BYTES >> MAJOR_SHIFT: + case MAJOR_TEXT >> MAJOR_SHIFT: + self->incr_need += len; + if (ecb_expect_false (self->incr_need > cur)) + return 0; + + break; + + case MAJOR_MAP >> MAJOR_SHIFT: + len <<= 1; + case MAJOR_ARRAY >> MAJOR_SHIFT: + if (len) + { + av_push (self->incr_count, newSViv (len + 1)); //TODO: nest + count = len + 1; + } + break; + } + } + } + } + + self->incr_pos = self->incr_need; + + if (count > 0) + { + while (!--count) + { + if (!AvFILLp (self->incr_count)) + return 1; // done + + SvREFCNT_dec_NN (av_pop (self->incr_count)); + count = SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]); + } + + SvIVX (AvARRAY (self->incr_count)[AvFILLp (self->incr_count)]) = count; + } + } + + return 0; +} + + +///////////////////////////////////////////////////////////////////////////// // XS interface functions MODULE = CBOR::XS PACKAGE = CBOR::XS @@ -1214,6 +1583,8 @@ sv_cbor = newSVpv ("CBOR", 0); SvREADONLY_on (sv_cbor); + + assert (("STRLEN must be an unsigned type", 0 <= (STRLEN)-1)); } PROTOTYPES: DISABLE @@ -1243,7 +1614,10 @@ allow_unknown = F_ALLOW_UNKNOWN allow_sharing = F_ALLOW_SHARING allow_cycles = F_ALLOW_CYCLES + forbid_objects = F_FORBID_OBJECTS pack_strings = F_PACK_STRINGS + text_keys = F_TEXT_KEYS + text_strings = F_TEXT_STRINGS validate_utf8 = F_VALIDATE_UTF8 PPCODE: { @@ -1261,7 +1635,10 @@ get_allow_unknown = F_ALLOW_UNKNOWN get_allow_sharing = F_ALLOW_SHARING get_allow_cycles = F_ALLOW_CYCLES + get_forbid_objects = F_FORBID_OBJECTS get_pack_strings = F_PACK_STRINGS + get_text_keys = F_TEXT_KEYS + get_text_strings = F_TEXT_STRINGS get_validate_utf8 = F_VALIDATE_UTF8 PPCODE: XPUSHs (boolSV (self->flags & ix)); @@ -1321,6 +1698,58 @@ PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); } +void incr_parse (CBOR *self, SV *cborstr) + ALIAS: + incr_parse_multiple = 1 + PPCODE: +{ + if (SvUTF8 (cborstr)) + sv_utf8_downgrade (cborstr, 0); + + if (!self->incr_count) + { + self->incr_count = newAV (); + self->incr_pos = 0; + self->incr_need = 1; + + av_push (self->incr_count, newSViv (1)); + } + + do + { + if (!incr_parse (self, cborstr)) + { + if (self->incr_need > self->max_size && self->max_size) + croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", + (unsigned long)self->incr_need, (unsigned long)self->max_size); + + break; + } + + SV *sv; + char *offset; + + PUTBACK; sv = decode_cbor (cborstr, self, &offset); SPAGAIN; + XPUSHs (sv); + + sv_chop (cborstr, offset); + + av_clear (self->incr_count); + av_push (self->incr_count, newSViv (1)); + + self->incr_pos = 0; + self->incr_need = self->incr_pos + 1; + } + while (ix); +} + +void incr_reset (CBOR *self) + CODE: +{ + SvREFCNT_dec (self->incr_count); + self->incr_count = 0; +} + void DESTROY (CBOR *self) PPCODE: cbor_free (self); @@ -1349,3 +1778,19 @@ XPUSHs (cborstr); } +#ifdef __AFL_COMPILER + +void +afl_init () + CODE: + __AFL_INIT (); + +int +afl_loop (unsigned int count = 10000) + CODE: + RETVAL = __AFL_LOOP (count); + OUTPUT: + RETVAL + +#endif +