--- JSON-XS/XS.xs 2010/01/19 01:07:27 1.105 +++ JSON-XS/XS.xs 2013/10/25 20:27:57 1.118 @@ -44,6 +44,8 @@ #define SHORT_STRING_LEN 16384 // special-case strings of up to this size +#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) + #define SB do { #define SE } while (0) @@ -72,6 +74,9 @@ # define JSON_STASH json_stash #endif +// the amount of HEs to allocate on the stack, when sorting keys +#define STACK_HES 64 + static HV *json_stash, *json_boolean_stash; // JSON::XS:: static SV *json_true, *json_false; @@ -194,11 +199,17 @@ // scan a group of digits, and a trailing exponent static void -json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp) +json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp, int maxdepth) { UV uaccum = 0; int eaccum = 0; + // if we recurse too deep, skip all remaining digits + // to avoid a stack overflow attack + if (expect_false (--maxdepth <= 0)) + while (((U8)*s - '0') < 10) + ++s; + for (;;) { U8 dig = (U8)*s - '0'; @@ -208,7 +219,7 @@ if (dig == (U8)((U8)'.' - (U8)'0')) { ++s; - json_atof_scan1 (s, accum, expo, 1); + json_atof_scan1 (s, accum, expo, 1, maxdepth); } else if ((dig | ' ') == 'e' - '0') { @@ -244,7 +255,7 @@ if (uaccum >= (UV_MAX - 9) / 10) { if (postdp) *expo -= eaccum; - json_atof_scan1 (s, accum, expo, postdp); + json_atof_scan1 (s, accum, expo, postdp, maxdepth); if (postdp) *expo += eaccum; break; @@ -254,6 +265,7 @@ // this relies greatly on the quality of the pow () // implementation of the platform, but a good // implementation is hard to beat. + // (IEEE 754 conformant ones are required to be exact) if (postdp) *expo -= eaccum; *accum += uaccum * Perl_pow (10., *expo); *expo += eaccum; @@ -272,7 +284,8 @@ neg = 1; } - json_atof_scan1 (s, &accum, &expo, 0); + // a recursion depth of ten gives us >>500 bits + json_atof_scan1 (s, &accum, &expo, 0, 10); return neg ? -accum : accum; } @@ -472,7 +485,7 @@ croak (ERR_NESTING_EXCEEDED); encode_ch (enc, '['); - + if (len >= 0) { encode_nl (enc); ++enc->indent; @@ -494,7 +507,7 @@ encode_nl (enc); --enc->indent; encode_indent (enc); } - + encode_ch (enc, ']'); } @@ -508,7 +521,7 @@ SV *sv = HeSVKEY (he); STRLEN len; char *str; - + SvGETMAGIC (sv); str = SvPV (sv, len); @@ -584,11 +597,15 @@ if (count) { int i, fast = 1; -#if defined(__BORLANDC__) || defined(_MSC_VER) - HE **hes = _alloca (count * sizeof (HE)); -#else - HE *hes [count]; // if your compiler dies here, you need to enable C99 mode -#endif + HE *hes_stack [STACK_HES]; + HE **hes = hes_stack; + + // allocate larger arrays on the heap + if (count > STACK_HES) + { + SV *sv = sv_2mortal (NEWSV (0, count * sizeof (*hes))); + hes = (HE **)SvPVX (sv); + } i = 0; while ((he = hv_iternext (hv))) @@ -817,7 +834,7 @@ { // large integer, use the (rather slow) snprintf way. need (enc, IVUV_MAXCHARS); - enc->cur += + enc->cur += SvIsUV(sv) ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); @@ -828,8 +845,8 @@ else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) encode_str (enc, "null", 4, 0); else - croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", - SvPV_nolen (sv), SvFLAGS (sv)); + croak ("encountered perl type (%s,0x%x) that JSON cannot handle, check your input data", + SvPV_nolen (sv), (unsigned int)SvFLAGS (sv)); } static SV * @@ -1251,7 +1268,7 @@ ++dec->cur; break; } - + if (*dec->cur != ',') ERR (", or ] expected while parsing array"); @@ -1396,6 +1413,7 @@ ENTER; SAVETMPS; PUSHMARK (SP); XPUSHs (HeVAL (he)); + sv_2mortal (sv); PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; @@ -1406,6 +1424,7 @@ return sv; } + SvREFCNT_inc (sv); FREETMPS; LEAVE; } } @@ -1447,8 +1466,8 @@ // to parse everything. switch (*dec->cur) { - case '"': ++dec->cur; return decode_str (dec); - case '[': ++dec->cur; return decode_av (dec); + case '"': ++dec->cur; return decode_str (dec); + case '[': ++dec->cur; return decode_av (dec); case '{': ++dec->cur; return decode_hv (dec); case '-': @@ -1511,10 +1530,12 @@ SV *sv; /* work around bugs in 5.10 where manipulating magic values - * will perl ignore the magic in subsequent accesses + * makes perl ignore the magic in subsequent accesses. + * also make a copy of non-PV values, to get them into a clean + * state (SvPV should do that, but it's buggy, see below). */ /*SvGETMAGIC (string);*/ - if (SvMAGICAL (string)) + if (SvMAGICAL (string) || !SvPOK (string)) string = sv_2mortal (newSVsv (string)); SvUPGRADE (string, SVt_PV); @@ -1541,7 +1562,7 @@ (unsigned long)SvCUR (string), (unsigned long)json->max_size); } - if (json->flags & F_UTF8) + if (DECODE_WANTS_OCTETS (json)) sv_utf8_downgrade (string, 0); else sv_utf8_upgrade (string); @@ -1593,7 +1614,7 @@ croak ("%s, at character offset %d (before \"%s\")", dec.err, - ptr_to_index (string, dec.cur), + (int)ptr_to_index (string, dec.cur), dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); } @@ -1793,7 +1814,7 @@ void new (char *klass) PPCODE: { - SV *pv = NEWSV (0, sizeof (JSON)); + SV *pv = NEWSV (0, sizeof (JSON)); SvPOK_only (pv); json_init ((JSON *)SvPVX (pv)); XPUSHs (sv_2mortal (sv_bless ( @@ -1880,7 +1901,7 @@ void filter_json_single_key_object (JSON *self, SV *key, SV *cb = &PL_sv_undef) PPCODE: { - if (!self->cb_sk_object) + if (!self->cb_sk_object) self->cb_sk_object = newHV (); if (SvOK (cb)) @@ -1901,18 +1922,22 @@ void encode (JSON *self, SV *scalar) PPCODE: - XPUSHs (encode_json (scalar, self)); + PUTBACK; scalar = encode_json (scalar, self); SPAGAIN; + XPUSHs (scalar); void decode (JSON *self, SV *jsonstr) PPCODE: - XPUSHs (decode_json (jsonstr, self, 0)); + PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN; + XPUSHs (jsonstr); void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { + SV *sv; char *offset; + PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; EXTEND (SP, 2); - PUSHs (decode_json (jsonstr, self, &offset)); + PUSHs (sv); PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); } @@ -1922,24 +1947,36 @@ if (!self->incr_text) self->incr_text = newSVpvn ("", 0); + /* if utf8-ness doesn't match the decoder, need to upgrade/downgrade */ + if (!DECODE_WANTS_OCTETS (self) == !SvUTF8 (self->incr_text)) + if (DECODE_WANTS_OCTETS (self)) + { + if (self->incr_pos) + self->incr_pos = utf8_length ((U8 *)SvPVX (self->incr_text), + (U8 *)SvPVX (self->incr_text) + self->incr_pos); + + sv_utf8_downgrade (self->incr_text, 0); + } + else + { + sv_utf8_upgrade (self->incr_text); + + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } + // append data, if any if (jsonstr) { - if (SvUTF8 (jsonstr)) - { - if (!SvUTF8 (self->incr_text)) - { - /* utf-8-ness differs, need to upgrade */ - sv_utf8_upgrade (self->incr_text); - - if (self->incr_pos) - self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) - - (U8 *)SvPVX (self->incr_text); - } - } - else if (SvUTF8 (self->incr_text)) - sv_utf8_upgrade (jsonstr); + /* make sure both strings have same encoding */ + if (SvUTF8 (jsonstr) != SvUTF8 (self->incr_text)) + if (SvUTF8 (jsonstr)) + sv_utf8_downgrade (jsonstr, 0); + else + sv_utf8_upgrade (jsonstr); + /* and then just blindly append */ { STRLEN len; const char *str = SvPV (jsonstr, len); @@ -1957,6 +1994,7 @@ if (GIMME_V != G_VOID) do { + SV *sv; char *offset; if (!INCR_DONE (self)) @@ -1968,10 +2006,20 @@ (unsigned long)self->incr_pos, (unsigned long)self->max_size); if (!INCR_DONE (self)) - break; + { + // as an optimisation, do not accumulate white space in the incr buffer + if (self->incr_mode == INCR_M_WS && self->incr_pos) + { + self->incr_pos = 0; + SvCUR_set (self->incr_text, 0); + } + + break; + } } - XPUSHs (decode_json (self->incr_text, self, &offset)); + PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; + XPUSHs (sv); self->incr_pos -= offset - SvPVX (self->incr_text); self->incr_nest = 0; @@ -2025,26 +2073,22 @@ PROTOTYPES: ENABLE void encode_json (SV *scalar) - ALIAS: - to_json_ = 0 - encode_json = F_UTF8 PPCODE: { JSON json; json_init (&json); - json.flags |= ix; - XPUSHs (encode_json (scalar, &json)); + json.flags |= F_UTF8; + PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN; + XPUSHs (scalar); } void decode_json (SV *jsonstr) - ALIAS: - from_json_ = 0 - decode_json = F_UTF8 PPCODE: { JSON json; json_init (&json); - json.flags |= ix; - XPUSHs (decode_json (jsonstr, &json, 0)); + json.flags |= F_UTF8; + PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN; + XPUSHs (jsonstr); }