--- CBOR-XS/XS.xs 2016/11/26 04:50:58 1.63 +++ CBOR-XS/XS.xs 2021/10/23 03:00:31 1.73 @@ -30,6 +30,9 @@ # define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv) #endif +// perl's is_utf8_string interprets len=0 as "calculate len", but we want it to mean 0 +#define cbor_is_utf8_string(str,len) (!(len) || is_utf8_string ((str), (len))) + // known major and minor types enum cbor_type { @@ -101,6 +104,20 @@ CBOR_TAG_MAGIC = 55799, // self-describe cbor }; +// known forced types, also hardcoded in CBOR.pm +enum +{ + AS_CBOR = 0, + AS_INT = 1, + AS_BYTES = 2, + AS_TEXT = 3, + AS_FLOAT16 = 4, + AS_FLOAT32 = 5, + AS_FLOAT64 = 6, + AS_MAP = 7, + // possibly future enhancements: (generic) float, (generic) string +}; + #define F_SHRINK 0x00000001UL #define F_ALLOW_UNKNOWN 0x00000002UL #define F_ALLOW_SHARING 0x00000004UL @@ -189,7 +206,7 @@ } // minimum length of a string to be registered for stringref -ecb_inline int +ecb_inline STRLEN minimum_string_length (UV idx) { return idx <= 23 ? 3 @@ -228,6 +245,8 @@ } } +static void encode_sv (enc_t *enc, SV *sv); + ecb_inline void encode_ch (enc_t *enc, char ch) { @@ -235,6 +254,7 @@ *enc->cur++ = ch; } +// used for tags, intregers, element counts and so on static void encode_uint (enc_t *enc, int major, UV len) { @@ -275,6 +295,18 @@ } } +// encodes a perl value into a CBOR integer +ecb_inline void +encode_int (enc_t *enc, SV *sv) +{ + if (SvIsUV (sv)) + encode_uint (enc, MAJOR_POS_INT, SvUVX (sv)); + else if (SvIVX (sv) >= 0) + encode_uint (enc, MAJOR_POS_INT, SvIVX (sv)); + else + encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1)); +} + ecb_inline void encode_tag (enc_t *enc, UV tag) { @@ -345,7 +377,136 @@ encode_str (enc, upgrade_utf8, utf8, str, len); } -static void encode_sv (enc_t *enc, SV *sv); +ecb_inline void +encode_float16 (enc_t *enc, NV nv) +{ + need (enc, 1+2); + + *enc->cur++ = MAJOR_MISC | MISC_FLOAT16; + + uint16_t fp = ecb_float_to_binary16 (nv); + + if (!ecb_big_endian ()) + fp = ecb_bswap16 (fp); + + memcpy (enc->cur, &fp, 2); + enc->cur += 2; +} + +ecb_inline void +encode_float32 (enc_t *enc, NV nv) +{ + need (enc, 1+4); + + *enc->cur++ = MAJOR_MISC | MISC_FLOAT32; + + uint32_t fp = ecb_float_to_binary32 (nv); + + if (!ecb_big_endian ()) + fp = ecb_bswap32 (fp); + + memcpy (enc->cur, &fp, 4); + enc->cur += 4; +} + +ecb_inline void +encode_float64 (enc_t *enc, NV nv) +{ + need (enc, 1+8); + + *enc->cur++ = MAJOR_MISC | MISC_FLOAT64; + + uint64_t fp = ecb_double_to_binary64 (nv); + + if (!ecb_big_endian ()) + fp = ecb_bswap64 (fp); + + memcpy (enc->cur, &fp, 8); + enc->cur += 8; +} + +ecb_inline void +encode_bool (enc_t *enc, int istrue) +{ + encode_ch (enc, istrue ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); +} + +// encodes an arrayref containing key-value pairs as CBOR map +ecb_inline void +encode_array_as_map (enc_t *enc, SV *sv) +{ + if (enc->depth >= enc->cbor.max_depth) + croak (ERR_NESTING_EXCEEDED); + + ++enc->depth; + + // as_map does error checking for us, but we re-check in case + // things have changed. + + if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) + croak ("CBOR::XS::as_map requires an array reference (did you change the array after calling as_map?)"); + + AV *av = (AV *)SvRV (sv); + int i, len = av_len (av); + + if (!(len & 1)) + croak ("CBOR::XS::as_map requires an even number of elements (did you change the array after calling as_map?)"); + + encode_uint (enc, MAJOR_MAP, (len + 1) >> 1); + + for (i = 0; i <= len; ++i) + { + SV **svp = av_fetch (av, i, 0); + encode_sv (enc, svp ? *svp : &PL_sv_undef); + } + + --enc->depth; +} + +ecb_inline void +encode_forced (enc_t *enc, UV type, SV *sv) +{ + switch (type) + { + case AS_CBOR: + { + STRLEN len; + char *str = SvPVbyte (sv, len); + + need (enc, len); + memcpy (enc->cur, str, len); + enc->cur += len; + } + break; + + case AS_BYTES: + { + STRLEN len; + char *str = SvPVbyte (sv, len); + encode_strref (enc, 0, 0, str, len); + } + break; + + case AS_TEXT: + { + STRLEN len; + char *str = SvPVutf8 (sv, len); + encode_strref (enc, 1, 1, str, len); + } + break; + + case AS_INT: encode_int (enc, sv); break; + + case AS_FLOAT16: encode_float16 (enc, SvNV (sv)); break; + case AS_FLOAT32: encode_float32 (enc, SvNV (sv)); break; + case AS_FLOAT64: encode_float64 (enc, SvNV (sv)); break; + + case AS_MAP: encode_array_as_map (enc, sv); break; + + default: + croak ("encountered malformed CBOR::XS::Tagged object"); + } +} static void encode_av (enc_t *enc, AV *av) @@ -433,7 +594,7 @@ if (stash == boolean_stash) { - encode_ch (enc, SvIV (sv) ? MAJOR_MISC | SIMPLE_TRUE : MAJOR_MISC | SIMPLE_FALSE); + encode_bool (enc, SvIV (sv)); return; } else if (stash == error_stash) @@ -446,8 +607,22 @@ if (svt != SVt_PVAV) croak ("encountered CBOR::XS::Tagged object that isn't an array"); - encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); - encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); + switch (av_len ((AV *)sv)) + { + case 2-1: + // actually a tagged value + encode_uint (enc, MAJOR_TAG, SvUV (*av_fetch ((AV *)sv, 0, 1))); + encode_sv (enc, *av_fetch ((AV *)sv, 1, 1)); + break; + + case 3-1: + // a forced type [value, type, undef] + encode_forced (enc, SvUV (*av_fetch ((AV *)sv, 1, 1)), *av_fetch ((AV *)sv, 0, 1)); + break; + + default: + croak ("encountered malformed CBOR::XS::Tagged object"); + } return; } @@ -512,7 +687,6 @@ dSP; ENTER; SAVETMPS; - SAVESTACK_POS (); PUSHMARK (SP); EXTEND (SP, 2); // we re-bless the reference to get overload and other niceties right @@ -531,8 +705,14 @@ encode_uint (enc, MAJOR_ARRAY, count + 1); encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); - while (count) - encode_sv (enc, SP[1 - count--]); + { + int i; + + for (i = 0; i < count; ++i) + encode_sv (enc, SP[i + 1 - count]); + + SP -= count; + } PUTBACK; @@ -564,29 +744,9 @@ encode_uint (enc, MAJOR_POS_INT, (U32)nv); //TODO: maybe I32? else if (ecb_expect_false (nv == (float)nv)) - { - *enc->cur++ = MAJOR_MISC | MISC_FLOAT32; - - uint32_t fp = ecb_float_to_binary32 (nv); - - if (!ecb_big_endian ()) - fp = ecb_bswap32 (fp); - - memcpy (enc->cur, &fp, 4); - enc->cur += 4; - } + encode_float32 (enc, nv); else - { - *enc->cur++ = MAJOR_MISC | MISC_FLOAT64; - - uint64_t fp = ecb_double_to_binary64 (nv); - - if (!ecb_big_endian ()) - fp = ecb_bswap64 (fp); - - memcpy (enc->cur, &fp, 8); - enc->cur += 8; - } + encode_float64 (enc, nv); } static void @@ -603,14 +763,7 @@ else if (SvNOKp (sv)) encode_nv (enc, sv); else if (SvIOKp (sv)) - { - if (SvIsUV (sv)) - encode_uint (enc, MAJOR_POS_INT, SvUVX (sv)); - else if (SvIVX (sv) >= 0) - encode_uint (enc, MAJOR_POS_INT, SvIVX (sv)); - else - encode_uint (enc, MAJOR_NEG_INT, -(SvIVX (sv) + 1)); - } + encode_int (enc, sv); else if (SvROK (sv)) encode_rv (enc, SvRV (sv)); else if (!SvOK (sv)) @@ -782,7 +935,7 @@ { WANT (1); - if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) + if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err) { ++dec->cur; break; @@ -839,7 +992,7 @@ dec->cur += len; if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) - if (!is_utf8_string (key, len)) + if (!cbor_is_utf8_string ((U8 *)key, len)) ERR ("corrupted CBOR data (invalid UTF-8 in map key)"); hv_store (hv, key, -len, decode_sv (dec), 0); @@ -900,7 +1053,7 @@ { WANT (1); - if (*dec->cur == (MAJOR_MISC | MINOR_INDEF)) + if (*dec->cur == (MAJOR_MISC | MINOR_INDEF) || dec->err) { ++dec->cur; break; @@ -978,7 +1131,7 @@ if (utf8) { if (ecb_expect_false (dec->cbor.flags & F_VALIDATE_UTF8)) - if (!is_utf8_string (SvPVX (sv), SvCUR (sv))) + if (!cbor_is_utf8_string (SvPVX (sv), SvCUR (sv))) ERR ("corrupted CBOR data (invalid UTF-8 in text string)"); SvUTF8_on (sv); @@ -1031,7 +1184,7 @@ UV idx = decode_uint (dec); - if (!dec->stringref || (int)idx > AvFILLp (dec->stringref)) + if (!dec->stringref || idx >= (UV)(1 + AvFILLp (dec->stringref))) ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)"); sv = newSVsv (AvARRAY (dec->stringref)[idx]); @@ -1069,7 +1222,7 @@ UV idx = decode_uint (dec); - if (!dec->shareable || (int)idx > AvFILLp (dec->shareable)) + if (!dec->shareable || idx >= (UV)(1 + AvFILLp (dec->shareable))) ERR ("corrupted CBOR data (sharedref index out of bounds)"); sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); @@ -1143,7 +1296,6 @@ dSP; ENTER; SAVETMPS; - SAVESTACK_POS (); PUSHMARK (SP); EXTEND (SP, 2); PUSHs (tag_sv); @@ -1164,7 +1316,8 @@ { SvREFCNT_dec_NN (tag_sv); SvREFCNT_dec_NN (sv); - sv = SvREFCNT_inc_NN (POPs); + sv = SvREFCNT_inc_NN (TOPs); + SP -= count; } else { @@ -1421,6 +1574,7 @@ case MAJOR_MAP >> MAJOR_SHIFT: len <<= 1; + /* FALLTHROUGH */ case MAJOR_ARRAY >> MAJOR_SHIFT: if (len) { @@ -1670,3 +1824,19 @@ XPUSHs (cborstr); } +#ifdef __AFL_COMPILER + +void +afl_init () + CODE: + __AFL_INIT (); + +int +afl_loop (unsigned int count = 10000) + CODE: + RETVAL = __AFL_LOOP (count); + OUTPUT: + RETVAL + +#endif +