--- JSON-XS/XS.xs 2009/09/08 18:00:03 1.101 +++ JSON-XS/XS.xs 2013/10/25 20:19:57 1.117 @@ -19,7 +19,8 @@ # define UTF8_MAXBYTES 13 #endif -#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) +// three extra for rounding, sign, and end of string +#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) #define F_ASCII 0x00000001UL #define F_LATIN1 0x00000002UL @@ -43,6 +44,8 @@ #define SHORT_STRING_LEN 16384 // special-case strings of up to this size +#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) + #define SB do { #define SE } while (0) @@ -71,6 +74,9 @@ # define JSON_STASH json_stash #endif +// the amount of HEs to allocate on the stack, when sorting keys +#define STACK_HES 64 + static HV *json_stash, *json_boolean_stash; // JSON::XS:: static SV *json_true, *json_false; @@ -78,6 +84,8 @@ INCR_M_WS = 0, // initial whitespace skipping, must be 0 INCR_M_STR, // inside string INCR_M_BS, // inside backslash + INCR_M_C0, // inside comment in initial whitespace sequence + INCR_M_C1, // inside comment in other places INCR_M_JSON // outside anything, count nesting }; @@ -187,6 +195,101 @@ } ///////////////////////////////////////////////////////////////////////////// +// fp hell + +// scan a group of digits, and a trailing exponent +static void +json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp, int maxdepth) +{ + UV uaccum = 0; + int eaccum = 0; + + // if we recurse too deep, skip all remaining digits + // to avoid a stack overflow attack + if (expect_false (--maxdepth <= 0)) + while (((U8)*s - '0') < 10) + ++s; + + for (;;) + { + U8 dig = (U8)*s - '0'; + + if (expect_false (dig >= 10)) + { + if (dig == (U8)((U8)'.' - (U8)'0')) + { + ++s; + json_atof_scan1 (s, accum, expo, 1, maxdepth); + } + else if ((dig | ' ') == 'e' - '0') + { + int exp2 = 0; + int neg = 0; + + ++s; + + if (*s == '-') + { + ++s; + neg = 1; + } + else if (*s == '+') + ++s; + + while ((dig = (U8)*s - '0') < 10) + exp2 = exp2 * 10 + *s++ - '0'; + + *expo += neg ? -exp2 : exp2; + } + + break; + } + + ++s; + + uaccum = uaccum * 10 + dig; + ++eaccum; + + // if we have too many digits, then recurse for more + // we actually do this for rather few digits + if (uaccum >= (UV_MAX - 9) / 10) + { + if (postdp) *expo -= eaccum; + json_atof_scan1 (s, accum, expo, postdp, maxdepth); + if (postdp) *expo += eaccum; + + break; + } + } + + // this relies greatly on the quality of the pow () + // implementation of the platform, but a good + // implementation is hard to beat. + // (IEEE 754 conformant ones are required to be exact) + if (postdp) *expo -= eaccum; + *accum += uaccum * Perl_pow (10., *expo); + *expo += eaccum; +} + +static NV +json_atof (const char *s) +{ + NV accum = 0.; + int expo = 0; + int neg = 0; + + if (*s == '-') + { + ++s; + neg = 1; + } + + // a recursion depth of ten gives us >>500 bits + json_atof_scan1 (s, &accum, &expo, 0, 10); + + return neg ? -accum : accum; +} +///////////////////////////////////////////////////////////////////////////// // encoder // structure used for encoding JSON @@ -382,7 +485,7 @@ croak (ERR_NESTING_EXCEEDED); encode_ch (enc, '['); - + if (len >= 0) { encode_nl (enc); ++enc->indent; @@ -404,7 +507,7 @@ encode_nl (enc); --enc->indent; encode_indent (enc); } - + encode_ch (enc, ']'); } @@ -418,7 +521,7 @@ SV *sv = HeSVKEY (he); STRLEN len; char *str; - + SvGETMAGIC (sv); str = SvPV (sv, len); @@ -494,11 +597,15 @@ if (count) { int i, fast = 1; -#if defined(__BORLANDC__) || defined(_MSC_VER) - HE **hes = _alloca (count * sizeof (HE)); -#else - HE *hes [count]; // if your compiler dies here, you need to enable C99 mode -#endif + HE *hes_stack [STACK_HES]; + HE **hes = hes_stack; + + // allocate larger arrays on the heap + if (count > STACK_HES) + { + SV *sv = sv_2mortal (NEWSV (0, count * sizeof (*hes))); + hes = (HE **)SvPVX (sv); + } i = 0; while ((he = hv_iternext (hv))) @@ -727,7 +834,7 @@ { // large integer, use the (rather slow) snprintf way. need (enc, IVUV_MAXCHARS); - enc->cur += + enc->cur += SvIsUV(sv) ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); @@ -738,8 +845,8 @@ else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) encode_str (enc, "null", 4, 0); else - croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", - SvPV_nolen (sv), SvFLAGS (sv)); + croak ("encountered perl type (%s,0x%x) that JSON cannot handle, check your input data", + SvPV_nolen (sv), (unsigned int)SvFLAGS (sv)); } static SV * @@ -1118,20 +1225,16 @@ len -= *start == '-' ? 1 : 0; // does not fit into IV or UV, try NV - if ((sizeof (NV) == sizeof (double) && DBL_DIG >= len) - #if defined (LDBL_DIG) - || (sizeof (NV) == sizeof (long double) && LDBL_DIG >= len) - #endif - ) + if (len <= NV_DIG) // fits into NV without loss of precision - return newSVnv (Atof (start)); + return newSVnv (json_atof (start)); // everything else fails, convert it to a string return newSVpvn (start, dec->cur - start); } // loss of precision here - return newSVnv (Atof (start)); + return newSVnv (json_atof (start)); fail: return 0; @@ -1165,7 +1268,7 @@ ++dec->cur; break; } - + if (*dec->cur != ',') ERR (", or ] expected while parsing array"); @@ -1310,6 +1413,7 @@ ENTER; SAVETMPS; PUSHMARK (SP); XPUSHs (HeVAL (he)); + sv_2mortal (sv); PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; @@ -1320,6 +1424,7 @@ return sv; } + SvREFCNT_inc (sv); FREETMPS; LEAVE; } } @@ -1361,8 +1466,8 @@ // to parse everything. switch (*dec->cur) { - case '"': ++dec->cur; return decode_str (dec); - case '[': ++dec->cur; return decode_av (dec); + case '"': ++dec->cur; return decode_str (dec); + case '[': ++dec->cur; return decode_av (dec); case '{': ++dec->cur; return decode_hv (dec); case '-': @@ -1425,10 +1530,12 @@ SV *sv; /* work around bugs in 5.10 where manipulating magic values - * will perl ignore the magic in subsequent accesses + * makes perl ignore the magic in subsequent accesses. + * also make a copy of non-PV values, to get them into a clean + * state (SvPV should do that, but it's buggy, see below). */ /*SvGETMAGIC (string);*/ - if (SvMAGICAL (string)) + if (SvMAGICAL (string) || !SvPOK (string)) string = sv_2mortal (newSVsv (string)); SvUPGRADE (string, SVt_PV); @@ -1455,7 +1562,7 @@ (unsigned long)SvCUR (string), (unsigned long)json->max_size); } - if (json->flags & F_UTF8) + if (DECODE_WANTS_OCTETS (json)) sv_utf8_downgrade (string, 0); else sv_utf8_upgrade (string); @@ -1507,7 +1614,7 @@ croak ("%s, at character offset %d (before \"%s\")", dec.err, - ptr_to_index (string, dec.cur), + (int)ptr_to_index (string, dec.cur), dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); } @@ -1527,19 +1634,30 @@ { const char *p = SvPVX (self->incr_text) + self->incr_pos; + // the state machine here is a bit convoluted and could be simplified a lot + // but this would make it slower, so... + for (;;) { //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D switch (self->incr_mode) { - // only used for intiial whitespace skipping + // only used for initial whitespace skipping case INCR_M_WS: for (;;) { if (*p > 0x20) { - self->incr_mode = INCR_M_JSON; - goto incr_m_json; + if (*p == '#') + { + self->incr_mode = INCR_M_C0; + goto incr_m_c; + } + else + { + self->incr_mode = INCR_M_JSON; + goto incr_m_json; + } } else if (!*p) goto interrupt; @@ -1556,6 +1674,25 @@ self->incr_mode = INCR_M_STR; goto incr_m_str; + // inside #-style comments + case INCR_M_C0: + case INCR_M_C1: + incr_m_c: + for (;;) + { + if (*p == '\n') + { + self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; + break; + } + else if (!*p) + goto interrupt; + + ++p; + } + + break; + // inside a string case INCR_M_STR: incr_m_str: @@ -1623,6 +1760,11 @@ case '}': if (--self->incr_nest <= 0) goto interrupt; + break; + + case '#': + self->incr_mode = INCR_M_C1; + goto incr_m_c; } } } @@ -1633,6 +1775,7 @@ interrupt: self->incr_pos = p - SvPVX (self->incr_text); + //printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D } @@ -1671,7 +1814,7 @@ void new (char *klass) PPCODE: { - SV *pv = NEWSV (0, sizeof (JSON)); + SV *pv = NEWSV (0, sizeof (JSON)); SvPOK_only (pv); json_init ((JSON *)SvPVX (pv)); XPUSHs (sv_2mortal (sv_bless ( @@ -1758,7 +1901,7 @@ void filter_json_single_key_object (JSON *self, SV *key, SV *cb = &PL_sv_undef) PPCODE: { - if (!self->cb_sk_object) + if (!self->cb_sk_object) self->cb_sk_object = newHV (); if (SvOK (cb)) @@ -1779,18 +1922,22 @@ void encode (JSON *self, SV *scalar) PPCODE: - XPUSHs (encode_json (scalar, self)); + PUTBACK; scalar = encode_json (scalar, self); SPAGAIN; + XPUSHs (scalar); void decode (JSON *self, SV *jsonstr) PPCODE: - XPUSHs (decode_json (jsonstr, self, 0)); + PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN; + XPUSHs (jsonstr); void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { + SV *sv; char *offset; + PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; EXTEND (SP, 2); - PUSHs (decode_json (jsonstr, self, &offset)); + PUSHs (sv); PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); } @@ -1800,24 +1947,36 @@ if (!self->incr_text) self->incr_text = newSVpvn ("", 0); + /* if utf8-ness doesn't match the decoder, need to upgrade/downgrade */ + if (!DECODE_WANTS_OCTETS (self) == !SvUTF8 (self->incr_text)) + if (DECODE_WANTS_OCTETS (self)) + { + if (self->incr_pos) + self->incr_pos = utf8_length ((U8 *)SvPVX (self->incr_text), + (U8 *)SvPVX (self->incr_text) + self->incr_pos); + + sv_utf8_downgrade (self->incr_text, 0); + } + else + { + sv_utf8_upgrade (self->incr_text); + + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } + // append data, if any if (jsonstr) { - if (SvUTF8 (jsonstr)) - { - if (!SvUTF8 (self->incr_text)) - { - /* utf-8-ness differs, need to upgrade */ - sv_utf8_upgrade (self->incr_text); - - if (self->incr_pos) - self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) - - (U8 *)SvPVX (self->incr_text); - } - } - else if (SvUTF8 (self->incr_text)) - sv_utf8_upgrade (jsonstr); + /* make sure both strings have same encoding */ + if (SvUTF8 (jsonstr) != SvUTF8 (self->incr_text)) + if (SvUTF8 (jsonstr)) + sv_utf8_downgrade (jsonstr, 0); + else + sv_utf8_upgrade (jsonstr); + /* and then just blindly append */ { STRLEN len; const char *str = SvPV (jsonstr, len); @@ -1835,6 +1994,7 @@ if (GIMME_V != G_VOID) do { + SV *sv; char *offset; if (!INCR_DONE (self)) @@ -1846,10 +2006,20 @@ (unsigned long)self->incr_pos, (unsigned long)self->max_size); if (!INCR_DONE (self)) - break; + { + // as an optimisation, do not accumulate white space in the incr buffer + if (self->incr_mode == INCR_M_WS && self->incr_pos) + { + self->incr_pos = 0; + SvCUR_set (self->incr_text, 0); + } + + break; + } } - XPUSHs (decode_json (self->incr_text, self, &offset)); + PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; + XPUSHs (sv); self->incr_pos -= offset - SvPVX (self->incr_text); self->incr_nest = 0; @@ -1911,7 +2081,8 @@ JSON json; json_init (&json); json.flags |= ix; - XPUSHs (encode_json (scalar, &json)); + PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN; + XPUSHs (scalar); } void decode_json (SV *jsonstr) @@ -1923,7 +2094,7 @@ JSON json; json_init (&json); json.flags |= ix; - XPUSHs (decode_json (jsonstr, &json, 0)); + PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN; + XPUSHs (jsonstr); } -