--- JSON-XS/XS.xs 2009/05/30 06:26:05 1.96 +++ JSON-XS/XS.xs 2010/01/19 01:36:34 1.106 @@ -14,12 +14,13 @@ #endif // some old perls do not have this, try to make it work, no -// guarentees, though. if it breaks, you get to keep the pieces. +// guarantees, though. if it breaks, you get to keep the pieces. #ifndef UTF8_MAXBYTES # define UTF8_MAXBYTES 13 #endif -#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) +// three extra for rounding, sign, and end of string +#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) #define F_ASCII 0x00000001UL #define F_LATIN1 0x00000002UL @@ -78,6 +79,8 @@ INCR_M_WS = 0, // initial whitespace skipping, must be 0 INCR_M_STR, // inside string INCR_M_BS, // inside backslash + INCR_M_C0, // inside comment in initial whitespace sequence + INCR_M_C1, // inside comment in other places INCR_M_JSON // outside anything, count nesting }; @@ -187,6 +190,100 @@ } ///////////////////////////////////////////////////////////////////////////// +// fp hell + +// scan a group of digits, and a trailing exponent +static void +json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp, int maxdepth) +{ + UV uaccum = 0; + int eaccum = 0; + + // if we recurse too deep, skip all remaining digits + // to avoid a stack overflow attack + if (expect_false (--maxdepth <= 0)) + while (((U8)*s - '0') < 10) + ++s; + + for (;;) + { + U8 dig = (U8)*s - '0'; + + if (expect_false (dig >= 10)) + { + if (dig == (U8)((U8)'.' - (U8)'0')) + { + ++s; + json_atof_scan1 (s, accum, expo, 1, maxdepth); + } + else if ((dig | ' ') == 'e' - '0') + { + int exp2 = 0; + int neg = 0; + + ++s; + + if (*s == '-') + { + ++s; + neg = 1; + } + else if (*s == '+') + ++s; + + while ((dig = (U8)*s - '0') < 10) + exp2 = exp2 * 10 + *s++ - '0'; + + *expo += neg ? -exp2 : exp2; + } + + break; + } + + ++s; + + uaccum = uaccum * 10 + dig; + ++eaccum; + + // if we have too many digits, then recurse for more + // we actually do this for rather few digits + if (uaccum >= (UV_MAX - 9) / 10) + { + if (postdp) *expo -= eaccum; + json_atof_scan1 (s, accum, expo, postdp, maxdepth); + if (postdp) *expo += eaccum; + + break; + } + } + + // this relies greatly on the quality of the pow () + // implementation of the platform, but a good + // implementation is hard to beat. + if (postdp) *expo -= eaccum; + *accum += uaccum * Perl_pow (10., *expo); + *expo += eaccum; +} + +static NV +json_atof (const char *s) +{ + NV accum = 0.; + int expo = 0; + int neg = 0; + + if (*s == '-') + { + ++s; + neg = 1; + } + + // a recursion depth of ten gives us >>500 bits + json_atof_scan1 (s, &accum, &expo, 0, 10); + + return neg ? -accum : accum; +} +///////////////////////////////////////////////////////////////////////////// // encoder // structure used for encoding JSON @@ -473,7 +570,7 @@ // actually, this is mostly due to the stupid so-called // security workaround added somewhere in 5.8.x // that randomises hash orderings - if (enc->json.flags & F_CANONICAL) + if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv)) { int count = hv_iterinit (hv); @@ -761,6 +858,7 @@ SvPOK_only (enc.sv); encode_sv (&enc, scalar); + encode_nl (&enc); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings @@ -950,11 +1048,10 @@ else if (ch >= 0x80) { STRLEN clen; - UV uch; --dec_cur; - uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen); + decode_utf8 (dec_cur, dec->end - dec_cur, &clen); if (clen == (STRLEN)-1) ERR ("malformed UTF-8 character in JSON string"); @@ -1086,20 +1183,20 @@ if (*start == '-') switch (len) { - case 2: return newSViv (-( start [1] - '0' * 1)); - case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); - case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); - case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); - case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); + case 2: return newSViv (-(IV)( start [1] - '0' * 1)); + case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11)); + case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); + case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); } else switch (len) { - case 1: return newSViv ( start [0] - '0' * 1); - case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); - case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); - case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); - case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); + case 1: return newSViv ( start [0] - '0' * 1); + case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); + case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); + case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); } { @@ -1118,20 +1215,16 @@ len -= *start == '-' ? 1 : 0; // does not fit into IV or UV, try NV - if ((sizeof (NV) == sizeof (double) && DBL_DIG >= len) - #if defined (LDBL_DIG) - || (sizeof (NV) == sizeof (long double) && LDBL_DIG >= len) - #endif - ) + if (len <= NV_DIG) // fits into NV without loss of precision - return newSVnv (Atof (start)); + return newSVnv (json_atof (start)); // everything else fails, convert it to a string return newSVpvn (start, dec->cur - start); } // loss of precision here - return newSVnv (Atof (start)); + return newSVnv (json_atof (start)); fail: return 0; @@ -1527,19 +1620,30 @@ { const char *p = SvPVX (self->incr_text) + self->incr_pos; + // the state machine here is a bit convoluted and could be simplified a lot + // but this would make it slower, so... + for (;;) { //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D switch (self->incr_mode) { - // only used for intiial whitespace skipping + // only used for initial whitespace skipping case INCR_M_WS: for (;;) { if (*p > 0x20) { - self->incr_mode = INCR_M_JSON; - goto incr_m_json; + if (*p == '#') + { + self->incr_mode = INCR_M_C0; + goto incr_m_c; + } + else + { + self->incr_mode = INCR_M_JSON; + goto incr_m_json; + } } else if (!*p) goto interrupt; @@ -1556,6 +1660,25 @@ self->incr_mode = INCR_M_STR; goto incr_m_str; + // inside #-style comments + case INCR_M_C0: + case INCR_M_C1: + incr_m_c: + for (;;) + { + if (*p == '\n') + { + self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; + break; + } + else if (!*p) + goto interrupt; + + ++p; + } + + break; + // inside a string case INCR_M_STR: incr_m_str: @@ -1623,6 +1746,11 @@ case '}': if (--self->incr_nest <= 0) goto interrupt; + break; + + case '#': + self->incr_mode = INCR_M_C1; + goto incr_m_c; } } } @@ -1633,6 +1761,7 @@ interrupt: self->incr_pos = p - SvPVX (self->incr_text); + //printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D } @@ -1657,6 +1786,8 @@ json_true = get_bool ("JSON::XS::true"); json_false = get_bool ("JSON::XS::false"); + + CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */ } PROTOTYPES: DISABLE @@ -1924,4 +2055,3 @@ XPUSHs (decode_json (jsonstr, &json, 0)); } -