--- JSON-XS/XS.xs 2013/10/28 23:23:10 1.120 +++ JSON-XS/XS.xs 2017/09/05 13:07:09 1.132 @@ -8,6 +8,7 @@ #include #include #include +#include #if defined(__BORLANDC__) || defined(_MSC_VER) # define snprintf _snprintf // C compilers have this in stdio.h @@ -19,6 +20,17 @@ # define UTF8_MAXBYTES 13 #endif +// compatibility with perl <5.18 +#ifndef HvNAMELEN_get +# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) +#endif +#ifndef HvNAMELEN +# define HvNAMELEN(hv) HvNAMELEN_get (hv) +#endif +#ifndef HvNAMEUTF8 +# define HvNAMEUTF8(hv) 0 +#endif + // three extra for rounding, sign, and end of string #define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) @@ -70,16 +82,18 @@ #ifdef USE_ITHREADS # define JSON_SLOW 1 # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) +# define BOOL_STASH (bool_stash ? bool_stash : gv_stashpv ("Types::Serialiser::Boolean", 1)) #else # define JSON_SLOW 0 # define JSON_STASH json_stash +# define BOOL_STASH bool_stash #endif // the amount of HEs to allocate on the stack, when sorting keys #define STACK_HES 64 -static HV *json_stash, *types_boolean_stash; // JSON::XS:: -static SV *types_true, *types_false, *sv_json; +static HV *json_stash, *bool_stash; // JSON::XS::, Types::Serialiser::Boolean:: +static SV *bool_true, *bool_false, *sv_json; enum { INCR_M_WS = 0, // initial whitespace skipping, must be 0 @@ -143,7 +157,32 @@ } } -// decode an utf-8 character and return it, or (UV)-1 in +/* adds two STRLENs together, slow, and with paranoia */ +STRLEN +strlen_sum (STRLEN l1, STRLEN l2) +{ + size_t sum = l1 + l2; + + if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) + croak ("JSON::XS: string size overflow"); + + return sum; +} + +/* similar to SvGROW, but somewhat safer and guarantees exponential realloc strategy */ +static char * +json_sv_grow (SV *sv, size_t len1, size_t len2) +{ + len1 = strlen_sum (len1, len2); + len1 = strlen_sum (len1, len1 >> 1); + + if (len1 > 4096 - 24) + len1 = (len1 | 4095) - 24; + + return SvGROW (sv, len1); +} + +// decode a utf-8 character and return it, or (UV)-1 in // case of an error. // we special-case "safe" characters from U+80 .. U+7FF, // but use the very good perl function to parse anything else. @@ -290,6 +329,49 @@ return neg ? -accum : accum; } + +// target of scalar reference is bool? -1 == nope, 0 == false, 1 == true +static int +ref_bool_type (SV *sv) +{ + svtype svt = SvTYPE (sv); + + if (svt < SVt_PVAV) + { + STRLEN len = 0; + char *pv = svt ? SvPV (sv, len) : 0; + + if (len == 1) + if (*pv == '1') + return 1; + else if (*pv == '0') + return 0; + } + + return -1; +} + +// returns whether scalar is not a reference in the sense of allow_nonref +static int +json_nonref (SV *scalar) +{ + if (!SvROK (scalar)) + return 1; + + scalar = SvRV (scalar); + + if (SvTYPE (scalar) >= SVt_PVMG) + { + if (SvSTASH (scalar) == bool_stash) + return 1; + + if (!SvOBJECT (scalar) && ref_bool_type (scalar) >= 0) + return 1; + } + + return 0; +} + ///////////////////////////////////////////////////////////////////////////// // encoder @@ -307,12 +389,12 @@ INLINE void need (enc_t *enc, STRLEN len) { - if (expect_false (enc->cur + len >= enc->end)) + if (expect_false ((uintptr_t)(enc->end - enc->cur) < len)) { STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); - SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); - enc->cur = SvPVX (enc->sv) + cur; - enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; + char *buf = json_sv_grow (enc->sv, cur, len); + enc->cur = buf + cur; + enc->end = buf + SvLEN (enc->sv) - 1; } } @@ -338,13 +420,13 @@ { if (expect_false (ch == '"')) // but with slow exceptions { - need (enc, len += 1); + need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = '"'; } else if (expect_false (ch == '\\')) { - need (enc, len += 1); + need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = '\\'; } @@ -357,11 +439,11 @@ { switch (ch) { - case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; - case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; - case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; - case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; - case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; + case '\010': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; + case '\011': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; + case '\012': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; + case '\014': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; + case '\015': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; default: { @@ -387,7 +469,7 @@ if (uch >= 0x110000UL) croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); - need (enc, len += 11); + need (enc, len + 11); sprintf (enc->cur, "\\u%04x\\u%04x", (int)((uch - 0x10000) / 0x400 + 0xD800), (int)((uch - 0x10000) % 0x400 + 0xDC00)); @@ -395,7 +477,7 @@ } else { - need (enc, len += 5); + need (enc, len + 5); *enc->cur++ = '\\'; *enc->cur++ = 'u'; *enc->cur++ = PL_hexdigit [ uch >> 12 ]; @@ -413,7 +495,7 @@ } else if (is_utf8) { - need (enc, len += clen); + need (enc, len + clen); do { *enc->cur++ = *str++; @@ -422,7 +504,7 @@ } else { - need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed + need (enc, len + UTF8_MAXBYTES - 1); // never more than 11 bytes needed enc->cur = encode_utf8 (enc->cur, uch); ++str; } @@ -692,12 +774,9 @@ if (expect_false (SvOBJECT (sv))) { - HV *boolean_stash = !JSON_SLOW || types_boolean_stash - ? types_boolean_stash - : gv_stashpv ("Types::Serialiser::Boolean", 1); HV *stash = SvSTASH (sv); - if (stash == boolean_stash) + if (stash == bool_stash) { if (SvIV (sv)) encode_str (enc, "true", 4, 0); @@ -709,7 +788,9 @@ int count; dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); EXTEND (SP, 2); // we re-bless the reference to get overload and other niceties right PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); @@ -721,7 +802,7 @@ // catch this surprisingly common error if (SvROK (TOPs) && SvRV (TOPs) == sv) - croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); + croak ("%s::FREEZE method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); encode_ch (enc, '('); encode_ch (enc, '"'); @@ -740,15 +821,14 @@ encode_ch (enc, ']'); - PUTBACK; - FREETMPS; LEAVE; } else if ((enc->json.flags & F_CONV_BLESSED) && (method = gv_fetchmethod_autoload (stash, "TO_JSON", 0))) { dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); // we re-bless the reference to get overload and other niceties right XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); @@ -780,12 +860,11 @@ encode_av (enc, (AV *)sv); else if (svt < SVt_PVAV) { - STRLEN len = 0; - char *pv = svt ? SvPV (sv, len) : 0; + int bool_type = ref_bool_type (sv); - if (len == 1 && *pv == '1') + if (bool_type == 1) encode_str (enc, "true", 4, 0); - else if (len == 1 && *pv == '0') + else if (bool_type == 0) encode_str (enc, "false", 5, 0); else if (enc->json.flags & F_ALLOW_UNKNOWN) encode_str (enc, "null", 4, 0); @@ -878,7 +957,7 @@ { enc_t enc; - if (!(json->flags & F_ALLOW_NONREF) && !SvROK (scalar)) + if (!(json->flags & F_ALLOW_NONREF) && json_nonref (scalar)) croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)"); enc.json = *json; @@ -1095,6 +1174,8 @@ utf8 = 1; } + else if (ch == '\t' && dec->json.flags & F_RELAXED) + *cur++ = ch; else { --dec_cur; @@ -1114,8 +1195,8 @@ { STRLEN cur = SvCUR (sv); - if (SvLEN (sv) <= cur + len) - SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + if (SvLEN (sv) - cur <= len) + json_sv_grow (sv, cur, len); memcpy (SvPVX (sv) + SvCUR (sv), buf, len); SvCUR_set (sv, SvCUR (sv) + len); @@ -1426,7 +1507,7 @@ he = hv_iternext (hv); hv_iterinit (hv); - // the next line creates a mortal sv each time its called. + // the next line creates a mortal sv each time it's called. // might want to optimise this for common cases. cb = hv_fetch_ent (dec->json.cb_sk_object, hv_iterkeysv (he), 0, 0); @@ -1435,7 +1516,9 @@ dSP; int count; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); XPUSHs (HeVAL (he)); sv_2mortal (sv); @@ -1458,7 +1541,9 @@ dSP; int count; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); XPUSHs (sv_2mortal (sv)); PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; @@ -1494,6 +1579,8 @@ ++dec->cur; + decode_ws (dec); + tag = decode_sv (dec); if (!tag) goto fail; @@ -1501,11 +1588,15 @@ if (!SvPOK (tag)) ERR ("malformed JSON string, (tag) must be a string"); + decode_ws (dec); + if (*dec->cur != ')') ERR (") expected after tag"); ++dec->cur; + decode_ws (dec); + val = decode_sv (dec); if (!val) goto fail; @@ -1529,7 +1620,8 @@ dSP; - ENTER; SAVETMPS; PUSHMARK (SP); + ENTER; SAVETMPS; + PUSHMARK (SP); EXTEND (SP, len + 2); // we re-bless the reference to get overload and other niceties right PUSHs (tag); @@ -1581,9 +1673,9 @@ { dec->cur += 4; #if JSON_SLOW - types_true = get_bool ("Types::Serialiser::true"); + bool_true = get_bool ("Types::Serialiser::true"); #endif - return newSVsv (types_true); + return newSVsv (bool_true); } else ERR ("'true' expected"); @@ -1595,9 +1687,9 @@ { dec->cur += 5; #if JSON_SLOW - types_false = get_bool ("Types::Serialiser::false"); + bool_false = get_bool ("Types::Serialiser::false"); #endif - return newSVsv (types_false); + return newSVsv (bool_false); } else ERR ("'false' expected"); @@ -1625,7 +1717,7 @@ } static SV * -decode_json (SV *string, JSON *json, char **offset_return) +decode_json (SV *string, JSON *json, STRLEN *offset_return) { dec_t dec; SV *sv; @@ -1634,9 +1726,12 @@ * makes perl ignore the magic in subsequent accesses. * also make a copy of non-PV values, to get them into a clean * state (SvPV should do that, but it's buggy, see below). + * + * SvIsCOW_shared_hash works around a bug in perl (possibly 5.16), + * as reported by Reini Urban. */ /*SvGETMAGIC (string);*/ - if (SvMAGICAL (string) || !SvPOK (string)) + if (SvMAGICAL (string) || !SvPOK (string) || SvIsCOW_shared_hash (string)) string = sv_2mortal (newSVsv (string)); SvUPGRADE (string, SVt_PV); @@ -1685,9 +1780,8 @@ sv = decode_sv (&dec); if (offset_return) - *offset_return = dec.cur; - - if (!(offset_return || !sv)) + *offset_return = dec.cur - SvPVX (string); + else if (sv) { // check for trailing garbage decode_ws (&dec); @@ -1721,7 +1815,7 @@ sv = sv_2mortal (sv); - if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv)) + if (!(dec.json.flags & F_ALLOW_NONREF) && json_nonref (sv)) croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); return sv; @@ -1901,11 +1995,10 @@ : i >= 'A' && i <= 'F' ? i - 'A' + 10 : -1; - json_stash = gv_stashpv ("JSON::XS" , 1); - types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); - - types_true = get_bool ("Types::Serialiser::true"); - types_false = get_bool ("Types::Serialiser::false"); + json_stash = gv_stashpv ("JSON::XS" , 1); + bool_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); + bool_true = get_bool ("Types::Serialiser::true"); + bool_false = get_bool ("Types::Serialiser::false"); sv_json = newSVpv ("JSON", 0); SvREADONLY_on (sv_json); @@ -1917,8 +2010,8 @@ void CLONE (...) CODE: - json_stash = 0; - types_boolean_stash = 0; + json_stash = 0; + bool_stash = 0; void new (char *klass) PPCODE: @@ -2045,11 +2138,11 @@ PPCODE: { SV *sv; - char *offset; + STRLEN offset; PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; EXTEND (SP, 2); PUSHs (sv); - PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); + PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, SvPV_nolen (jsonstr) + offset)))); } void incr_parse (JSON *self, SV *jsonstr = 0) @@ -2093,8 +2186,8 @@ const char *str = SvPV (jsonstr, len); STRLEN cur = SvCUR (self->incr_text); - if (SvLEN (self->incr_text) <= cur + len) - SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + if (SvLEN (self->incr_text) - cur <= len) + json_sv_grow (self->incr_text, cur, len); Move (str, SvEND (self->incr_text), len, char); SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); @@ -2106,7 +2199,7 @@ do { SV *sv; - char *offset; + STRLEN offset; if (!INCR_DONE (self)) { @@ -2132,11 +2225,11 @@ PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; XPUSHs (sv); - self->incr_pos -= offset - SvPVX (self->incr_text); + self->incr_pos -= offset; self->incr_nest = 0; self->incr_mode = 0; - sv_chop (self->incr_text, offset); + sv_chop (self->incr_text, SvPVX (self->incr_text) + offset); } while (GIMME_V == G_ARRAY); }