--- JSON-XS/XS.xs 2016/02/21 16:18:19 1.126 +++ JSON-XS/XS.xs 2019/02/24 04:21:05 1.137 @@ -8,6 +8,7 @@ #include #include #include +#include #if defined(__BORLANDC__) || defined(_MSC_VER) # define snprintf _snprintf // C compilers have this in stdio.h @@ -51,7 +52,7 @@ #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER -#define INIT_SIZE 32 // initial scalar size to be allocated +#define INIT_SIZE 64 // initial scalar size to be allocated #define INDENT_STEP 3 // spaces per indentation level #define SHORT_STRING_LEN 16384 // special-case strings of up to this size @@ -79,23 +80,26 @@ #define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)" #ifdef USE_ITHREADS -# define JSON_SLOW 1 -# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) -# define BOOL_STASH (bool_stash ? bool_stash : gv_stashpv ("Types::Serialiser::Boolean", 1)) +# define JSON_STASH (expect_true (json_stash) ? json_stash : gv_stashpv ("JSON::XS", 1)) +# define BOOL_STASH (expect_true (bool_stash) ? bool_stash : gv_stashpv ("Types::Serialiser::Boolean", 1)) +# define GET_BOOL(value) (expect_true (bool_ ## value) ? bool_ ## value : get_bool ("Types::Serialiser::" # value)) #else -# define JSON_SLOW 0 # define JSON_STASH json_stash # define BOOL_STASH bool_stash +# define GET_BOOL(value) bool_ ## value #endif // the amount of HEs to allocate on the stack, when sorting keys #define STACK_HES 64 static HV *json_stash, *bool_stash; // JSON::XS::, Types::Serialiser::Boolean:: -static SV *bool_true, *bool_false, *sv_json; +static SV *bool_false, *bool_true; +static SV *sv_json; enum { INCR_M_WS = 0, // initial whitespace skipping, must be 0 + INCR_M_TFN, // inside true/false/null + INCR_M_NUM, // inside number INCR_M_STR, // inside string INCR_M_BS, // inside backslash INCR_M_C0, // inside comment in initial whitespace sequence @@ -118,13 +122,16 @@ STRLEN incr_pos; // the current offset into the text int incr_nest; // {[]}-nesting level unsigned char incr_mode; + + SV *v_false, *v_true; } JSON; INLINE void json_init (JSON *json) { - Zero (json, 1, JSON); - json->max_depth = 512; + static const JSON init = { F_ALLOW_NONREF, 512 }; + + *json = init; } ///////////////////////////////////////////////////////////////////////////// @@ -156,7 +163,32 @@ } } -// decode an utf-8 character and return it, or (UV)-1 in +/* adds two STRLENs together, slow, and with paranoia */ +STRLEN +strlen_sum (STRLEN l1, STRLEN l2) +{ + size_t sum = l1 + l2; + + if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) + croak ("JSON::XS: string size overflow"); + + return sum; +} + +/* similar to SvGROW, but somewhat safer and guarantees exponential realloc strategy */ +static char * +json_sv_grow (SV *sv, size_t len1, size_t len2) +{ + len1 = strlen_sum (len1, len2); + len1 = strlen_sum (len1, len1 >> 1); + + if (len1 > 4096 - 24) + len1 = (len1 | 4095) - 24; + + return SvGROW (sv, len1); +} + +// decode a utf-8 character and return it, or (UV)-1 in // case of an error. // we special-case "safe" characters from U+80 .. U+7FF, // but use the very good perl function to parse anything else. @@ -320,7 +352,6 @@ return 1; else if (*pv == '0') return 0; - } return -1; @@ -335,11 +366,14 @@ scalar = SvRV (scalar); - if (SvSTASH (scalar) == bool_stash) - return 1; + if (SvTYPE (scalar) >= SVt_PVMG) + { + if (SvSTASH (scalar) == bool_stash) + return 1; - if (!SvOBJECT (scalar) && ref_bool_type (scalar) >= 0) - return 1; + if (!SvOBJECT (scalar) && ref_bool_type (scalar) >= 0) + return 1; + } return 0; } @@ -361,12 +395,12 @@ INLINE void need (enc_t *enc, STRLEN len) { - if (expect_false (enc->cur + len >= enc->end)) + if (expect_false ((uintptr_t)(enc->end - enc->cur) < len)) { STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); - SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); - enc->cur = SvPVX (enc->sv) + cur; - enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; + char *buf = json_sv_grow (enc->sv, cur, len); + enc->cur = buf + cur; + enc->end = buf + SvLEN (enc->sv) - 1; } } @@ -392,13 +426,13 @@ { if (expect_false (ch == '"')) // but with slow exceptions { - need (enc, len += 1); + need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = '"'; } else if (expect_false (ch == '\\')) { - need (enc, len += 1); + need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = '\\'; } @@ -411,11 +445,11 @@ { switch (ch) { - case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; - case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; - case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; - case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; - case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; + case '\010': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; + case '\011': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; + case '\012': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; + case '\014': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; + case '\015': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; default: { @@ -441,7 +475,7 @@ if (uch >= 0x110000UL) croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); - need (enc, len += 11); + need (enc, len + 11); sprintf (enc->cur, "\\u%04x\\u%04x", (int)((uch - 0x10000) / 0x400 + 0xD800), (int)((uch - 0x10000) % 0x400 + 0xDC00)); @@ -449,7 +483,7 @@ } else { - need (enc, len += 5); + need (enc, len + 5); *enc->cur++ = '\\'; *enc->cur++ = 'u'; *enc->cur++ = PL_hexdigit [ uch >> 12 ]; @@ -467,7 +501,7 @@ } else if (is_utf8) { - need (enc, len += clen); + need (enc, len + clen); do { *enc->cur++ = *str++; @@ -476,7 +510,7 @@ } else { - need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed + need (enc, len + UTF8_MAXBYTES - 1); // never more than 11 bytes needed enc->cur = encode_utf8 (enc->cur, uch); ++str; } @@ -750,10 +784,8 @@ if (stash == bool_stash) { - if (SvIV (sv)) - encode_str (enc, "true", 4, 0); - else - encode_str (enc, "false", 5, 0); + if (SvIV (sv)) encode_str (enc, "true" , 4, 0); + else encode_str (enc, "false", 5, 0); } else if ((enc->json.flags & F_ALLOW_TAGS) && (method = gv_fetchmethod_autoload (stash, "FREEZE", 0))) { @@ -761,7 +793,6 @@ dSP; ENTER; SAVETMPS; - SAVESTACK_POS (); PUSHMARK (SP); EXTEND (SP, 2); // we re-bless the reference to get overload and other niceties right @@ -783,12 +814,18 @@ encode_ch (enc, ')'); encode_ch (enc, '['); - while (count) + if (count) { - encode_sv (enc, SP[1 - count--]); + int i; - if (count) - encode_ch (enc, ','); + for (i = 0; i < count - 1; ++i) + { + encode_sv (enc, SP[i + 1 - count]); + encode_ch (enc, ','); + } + + encode_sv (enc, TOPs); + SP -= count; } encode_ch (enc, ']'); @@ -1167,8 +1204,8 @@ { STRLEN cur = SvCUR (sv); - if (SvLEN (sv) <= cur + len) - SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + if (SvLEN (sv) - cur <= len) + json_sv_grow (sv, cur, len); memcpy (SvPVX (sv) + SvCUR (sv), buf, len); SvCUR_set (sv, SvCUR (sv) + len); @@ -1469,7 +1506,7 @@ sv = newRV_noinc ((SV *)hv); // check filter callbacks - if (dec->json.flags & F_HOOK) + if (expect_false (dec->json.flags & F_HOOK)) { if (dec->json.cb_sk_object && HvKEYS (hv) == 1) { @@ -1489,7 +1526,6 @@ int count; ENTER; SAVETMPS; - SAVESTACK_POS (); PUSHMARK (SP); XPUSHs (HeVAL (he)); sv_2mortal (sv); @@ -1502,6 +1538,8 @@ FREETMPS; LEAVE; return sv; } + else if (count) + croak ("filter_json_single_key_object callbacks must not return more than one scalar"); SvREFCNT_inc (sv); FREETMPS; LEAVE; @@ -1514,20 +1552,18 @@ int count; ENTER; SAVETMPS; - SAVESTACK_POS (); PUSHMARK (SP); XPUSHs (sv_2mortal (sv)); PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; if (count == 1) - { - sv = newSVsv (POPs); - FREETMPS; LEAVE; - return sv; - } + sv = newSVsv (POPs); + else if (count == 0) + SvREFCNT_inc (sv); + else + croak ("filter_json_object callbacks must not return more than one scalar"); - SvREFCNT_inc (sv); FREETMPS; LEAVE; } } @@ -1640,31 +1676,33 @@ case '5': case '6': case '7': case '8': case '9': return decode_num (dec); - case 't': - if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) + case 'f': + if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) { - dec->cur += 4; -#if JSON_SLOW - bool_true = get_bool ("Types::Serialiser::true"); -#endif - return newSVsv (bool_true); + dec->cur += 5; + + if (expect_false (!dec->json.v_false)) + dec->json.v_false = GET_BOOL (false); + + return newSVsv (dec->json.v_false); } else - ERR ("'true' expected"); + ERR ("'false' expected"); break; - case 'f': - if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) + case 't': + if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) { - dec->cur += 5; -#if JSON_SLOW - bool_false = get_bool ("Types::Serialiser::false"); -#endif - return newSVsv (bool_false); + dec->cur += 4; + + if (expect_false (!dec->json.v_true)) + dec->json.v_true = GET_BOOL (true); + + return newSVsv (dec->json.v_true); } else - ERR ("'false' expected"); + ERR ("'true' expected"); break; @@ -1689,7 +1727,7 @@ } static SV * -decode_json (SV *string, JSON *json, char **offset_return) +decode_json (SV *string, JSON *json, STRLEN *offset_return) { dec_t dec; SV *sv; @@ -1698,9 +1736,12 @@ * makes perl ignore the magic in subsequent accesses. * also make a copy of non-PV values, to get them into a clean * state (SvPV should do that, but it's buggy, see below). + * + * SvIsCOW_shared_hash works around a bug in perl (possibly 5.16), + * as reported by Reini Urban. */ /*SvGETMAGIC (string);*/ - if (SvMAGICAL (string) || !SvPOK (string)) + if (SvMAGICAL (string) || !SvPOK (string) || SvIsCOW_shared_hash (string)) string = sv_2mortal (newSVsv (string)); SvUPGRADE (string, SVt_PV); @@ -1749,14 +1790,13 @@ sv = decode_sv (&dec); if (offset_return) - *offset_return = dec.cur; - - if (!(offset_return || !sv)) + *offset_return = dec.cur - SvPVX (string); + else if (sv) { // check for trailing garbage decode_ws (&dec); - if (*dec.cur) + if (dec.cur != dec.end) { dec.err = "garbage after JSON object"; SvREFCNT_dec (sv); @@ -1804,9 +1844,17 @@ for (;;) { - //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D switch (self->incr_mode) { + // reached end of a scalar, see if we are inside a nested structure or not + end_of_scalar: + self->incr_mode = INCR_M_JSON; + + if (self->incr_nest) // end of a scalar inside array, object or tag + goto incr_m_json; + else // end of scalar outside structure, json text ends here + goto interrupt; + // only used for initial whitespace skipping case INCR_M_WS: for (;;) @@ -1858,6 +1906,40 @@ break; + // inside true/false/null + case INCR_M_TFN: + incr_m_tfn: + for (;;) + switch (*p++) + { + case 'r': case 'u': case 'e': // tRUE, falsE, nUll + case 'a': case 'l': case 's': // fALSe, nuLL + // allowed + break; + + default: + --p; + goto end_of_scalar; + } + + // inside a number + case INCR_M_NUM: + incr_m_num: + for (;;) + switch (*p++) + { + case 'e': case 'E': case '.': case '+': + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // allowed + break; + + default: + --p; + goto end_of_scalar; + } + // inside a string case INCR_M_STR: incr_m_str: @@ -1866,12 +1948,7 @@ if (*p == '"') { ++p; - self->incr_mode = INCR_M_JSON; - - if (!self->incr_nest) - goto interrupt; - - goto incr_m_json; + goto end_of_scalar; } else if (*p == '\\') { @@ -1911,6 +1988,21 @@ } break; + // the following three blocks handle scalars. this makes the parser + // more strict than required inside arrays or objects, and could + // be moved to a special case on the toplevel (except strings) + case 't': + case 'f': + case 'n': + self->incr_mode = INCR_M_TFN; + goto incr_m_tfn; + + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + self->incr_mode = INCR_M_NUM; + goto incr_m_num; + case '"': self->incr_mode = INCR_M_STR; goto incr_m_str; @@ -1967,8 +2059,8 @@ json_stash = gv_stashpv ("JSON::XS" , 1); bool_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); - bool_true = get_bool ("Types::Serialiser::true"); bool_false = get_bool ("Types::Serialiser::false"); + bool_true = get_bool ("Types::Serialiser::true"); sv_json = newSVpv ("JSON", 0); SvREADONLY_on (sv_json); @@ -1980,8 +2072,13 @@ void CLONE (...) CODE: + // as long as these writes are atomic, the race should not matter + // as existing threads either already use 0, or use the old value, + // which is sitll correct for the initial thread. json_stash = 0; bool_stash = 0; + bool_false = 0; + bool_true = 0; void new (char *klass) PPCODE: @@ -1995,6 +2092,21 @@ ))); } +void boolean_values (JSON *self, SV *v_false = 0, SV *v_true = 0) + PPCODE: + self->v_false = newSVsv (v_false); + self->v_true = newSVsv (v_true); + XPUSHs (ST (0)); + +void get_boolean_values (JSON *self) + PPCODE: + if (self->v_false && self->v_true) + { + EXTEND (SP, 2); + PUSHs (self->v_false); + PUSHs (self->v_true); + } + void ascii (JSON *self, int enable = 1) ALIAS: ascii = F_ASCII @@ -2096,23 +2208,21 @@ void encode (JSON *self, SV *scalar) PPCODE: - PUTBACK; scalar = encode_json (scalar, self); SPAGAIN; - XPUSHs (scalar); + PUTBACK; XPUSHs (encode_json (scalar, self)); void decode (JSON *self, SV *jsonstr) PPCODE: - PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN; - XPUSHs (jsonstr); + PUTBACK; XPUSHs (decode_json (jsonstr, self, 0)); void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { SV *sv; - char *offset; - PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; + STRLEN offset; + PUTBACK; sv = decode_json (jsonstr, self, &offset); EXTEND (SP, 2); PUSHs (sv); - PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); + PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, SvPV_nolen (jsonstr) + offset)))); } void incr_parse (JSON *self, SV *jsonstr = 0) @@ -2156,8 +2266,8 @@ const char *str = SvPV (jsonstr, len); STRLEN cur = SvCUR (self->incr_text); - if (SvLEN (self->incr_text) <= cur + len) - SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + if (SvLEN (self->incr_text) - cur <= len) + json_sv_grow (self->incr_text, cur, len); Move (str, SvEND (self->incr_text), len, char); SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); @@ -2169,7 +2279,7 @@ do { SV *sv; - char *offset; + STRLEN offset; if (!INCR_DONE (self)) { @@ -2192,14 +2302,14 @@ } } - PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; + PUTBACK; sv = decode_json (self->incr_text, self, &offset); XPUSHs (sv); - self->incr_pos -= offset - SvPVX (self->incr_text); + self->incr_pos -= offset; self->incr_nest = 0; self->incr_mode = 0; - sv_chop (self->incr_text, offset); + sv_chop (self->incr_text, SvPVX (self->incr_text) + offset); } while (GIMME_V == G_ARRAY); } @@ -2240,6 +2350,8 @@ void DESTROY (JSON *self) CODE: + SvREFCNT_dec (self->v_false); + SvREFCNT_dec (self->v_true); SvREFCNT_dec (self->cb_sk_object); SvREFCNT_dec (self->cb_object); SvREFCNT_dec (self->incr_text); @@ -2252,8 +2364,7 @@ JSON json; json_init (&json); json.flags |= F_UTF8; - PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN; - XPUSHs (scalar); + PUTBACK; XPUSHs (encode_json (scalar, &json)); } void decode_json (SV *jsonstr) @@ -2262,7 +2373,6 @@ JSON json; json_init (&json); json.flags |= F_UTF8; - PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN; - XPUSHs (jsonstr); + PUTBACK; XPUSHs (decode_json (jsonstr, &json, 0)); }