--- JSON-XS/XS.xs 2009/08/08 10:06:02 1.99 +++ JSON-XS/XS.xs 2010/01/19 00:31:13 1.104 @@ -19,7 +19,8 @@ # define UTF8_MAXBYTES 13 #endif -#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) +// three extra for rounding, sign, and end of string +#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) #define F_ASCII 0x00000001UL #define F_LATIN1 0x00000002UL @@ -78,6 +79,8 @@ INCR_M_WS = 0, // initial whitespace skipping, must be 0 INCR_M_STR, // inside string INCR_M_BS, // inside backslash + INCR_M_C0, // inside comment in initial whitespace sequence + INCR_M_C1, // inside comment in other places INCR_M_JSON // outside anything, count nesting }; @@ -187,6 +190,90 @@ } ///////////////////////////////////////////////////////////////////////////// +// fp hell + +// scan a group of digits, and a trailing exponent +static void +json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp) +{ + UV uaccum = 0; + int eaccum = 0; + + for (;;) + { + U8 dig = (U8)*s - '0'; + + if (expect_false (dig >= 10)) + { + if (dig == (U8)((U8)'.' - (U8)'0')) + { + ++s; + json_atof_scan1 (s, accum, expo, 1); + } + else if ((dig | ' ') == 'e' - '0') + { + int exp2 = 0; + int neg = 0; + + ++s; + + if (*s == '-') + { + ++s; + neg = 1; + } + else if (*s == '+') + ++s; + + while ((dig = (U8)*s - '0') < 10) + exp2 = exp2 * 10 + *s++ - '0'; + + *expo += neg ? -exp2 : exp2; + } + + break; + } + + ++s; + + uaccum = uaccum * 10 + dig; + ++eaccum; + + // if we have too many digits, then recurse for more + // we actually do this for rather few digits + if (uaccum >= (UV_MAX - 9) / 10) + { + if (postdp) *expo -= eaccum; + json_atof_scan1 (s, accum, expo, postdp); + if (postdp) *expo += eaccum; + + break; + } + } + + if (postdp) *expo -= eaccum; + *accum += uaccum * pow (10., *expo); + *expo += eaccum; +} + +static NV +json_atof (const char *s) +{ + NV accum = 0.; + int expo = 0; + int neg = 0; + + if (*s == '-') + { + ++s; + neg = 1; + } + + json_atof_scan1 (s, &accum, &expo, 0); + + return neg ? -accum : accum; +} +///////////////////////////////////////////////////////////////////////////// // encoder // structure used for encoding JSON @@ -761,6 +848,7 @@ SvPOK_only (enc.sv); encode_sv (&enc, scalar); + encode_nl (&enc); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings @@ -950,11 +1038,10 @@ else if (ch >= 0x80) { STRLEN clen; - UV uch; --dec_cur; - uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen); + decode_utf8 (dec_cur, dec->end - dec_cur, &clen); if (clen == (STRLEN)-1) ERR ("malformed UTF-8 character in JSON string"); @@ -1118,20 +1205,16 @@ len -= *start == '-' ? 1 : 0; // does not fit into IV or UV, try NV - if ((sizeof (NV) == sizeof (double) && DBL_DIG >= len) - #if defined (LDBL_DIG) - || (sizeof (NV) == sizeof (long double) && LDBL_DIG >= len) - #endif - ) + if (len <= NV_DIG) // fits into NV without loss of precision - return newSVnv (Atof (start)); + return newSVnv (json_atof (start)); // everything else fails, convert it to a string return newSVpvn (start, dec->cur - start); } // loss of precision here - return newSVnv (Atof (start)); + return newSVnv (json_atof (start)); fail: return 0; @@ -1527,19 +1610,30 @@ { const char *p = SvPVX (self->incr_text) + self->incr_pos; + // the state machine here is a bit convoluted and could be simplified a lot + // but this would make it slower, so... + for (;;) { //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D switch (self->incr_mode) { - // only used for intiial whitespace skipping + // only used for initial whitespace skipping case INCR_M_WS: for (;;) { if (*p > 0x20) { - self->incr_mode = INCR_M_JSON; - goto incr_m_json; + if (*p == '#') + { + self->incr_mode = INCR_M_C0; + goto incr_m_c; + } + else + { + self->incr_mode = INCR_M_JSON; + goto incr_m_json; + } } else if (!*p) goto interrupt; @@ -1556,6 +1650,25 @@ self->incr_mode = INCR_M_STR; goto incr_m_str; + // inside #-style comments + case INCR_M_C0: + case INCR_M_C1: + incr_m_c: + for (;;) + { + if (*p == '\n') + { + self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; + break; + } + else if (!*p) + goto interrupt; + + ++p; + } + + break; + // inside a string case INCR_M_STR: incr_m_str: @@ -1623,6 +1736,11 @@ case '}': if (--self->incr_nest <= 0) goto interrupt; + break; + + case '#': + self->incr_mode = INCR_M_C1; + goto incr_m_c; } } } @@ -1633,6 +1751,7 @@ interrupt: self->incr_pos = p - SvPVX (self->incr_text); + //printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D } @@ -1926,4 +2045,3 @@ XPUSHs (decode_json (jsonstr, &json, 0)); } -