--- JSON-XS/XS.xs 2008/04/05 18:15:46 1.85 +++ JSON-XS/XS.xs 2009/08/08 10:06:02 1.99 @@ -14,7 +14,7 @@ #endif // some old perls do not have this, try to make it work, no -// guarentees, though. if it breaks, you get to keep the pieces. +// guarantees, though. if it breaks, you get to keep the pieces. #ifndef UTF8_MAXBYTES # define UTF8_MAXBYTES 13 #endif @@ -81,7 +81,7 @@ INCR_M_JSON // outside anything, count nesting }; -#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON) +#define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) typedef struct { U32 flags; @@ -94,7 +94,7 @@ // for the incremental parser SV *incr_text; // the source text so far STRLEN incr_pos; // the current offset into the text - unsigned char incr_nest; // {[]}-nesting level + int incr_nest; // {[]}-nesting level unsigned char incr_mode; } JSON; @@ -123,6 +123,7 @@ shrink (SV *sv) { sv_utf8_downgrade (sv, 1); + if (SvLEN (sv) > SvCUR (sv) + 1) { #ifdef SvPV_shrink_to_cur @@ -176,6 +177,15 @@ return s; } +// convert offset pointer to character index, sv must be string +static STRLEN +ptr_to_index (SV *sv, char *offset) +{ + return SvUTF8 (sv) + ? utf8_distance (offset, SvPVX (sv)) + : offset - SvPVX (sv); +} + ///////////////////////////////////////////////////////////////////////////// // encoder @@ -195,8 +205,8 @@ { if (expect_false (enc->cur + len >= enc->end)) { - STRLEN cur = enc->cur - SvPVX (enc->sv); - SvGROW (enc->sv, cur + len + 1); + STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); + SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); enc->cur = SvPVX (enc->sv) + cur; enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; } @@ -281,14 +291,13 @@ } else { - static char hexdigit [16] = "0123456789abcdef"; need (enc, len += 5); *enc->cur++ = '\\'; *enc->cur++ = 'u'; - *enc->cur++ = hexdigit [ uch >> 12 ]; - *enc->cur++ = hexdigit [(uch >> 8) & 15]; - *enc->cur++ = hexdigit [(uch >> 4) & 15]; - *enc->cur++ = hexdigit [(uch >> 0) & 15]; + *enc->cur++ = PL_hexdigit [ uch >> 12 ]; + *enc->cur++ = PL_hexdigit [(uch >> 8) & 15]; + *enc->cur++ = PL_hexdigit [(uch >> 4) & 15]; + *enc->cur++ = PL_hexdigit [(uch >> 0) & 15]; } str += clen; @@ -462,9 +471,9 @@ // for canonical output we have to sort by keys first // actually, this is mostly due to the stupid so-called - // security workaround added somewhere in 5.8.x. + // security workaround added somewhere in 5.8.x // that randomises hash orderings - if (enc->json.flags & F_CANONICAL) + if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv)) { int count = hv_iterinit (hv); @@ -972,7 +981,11 @@ if (sv) { - SvGROW (sv, SvCUR (sv) + len + 1); + STRLEN cur = SvCUR (sv); + + if (SvLEN (sv) <= cur + len) + SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + memcpy (SvPVX (sv) + SvCUR (sv), buf, len); SvCUR_set (sv, SvCUR (sv) + len); } @@ -1073,20 +1086,20 @@ if (*start == '-') switch (len) { - case 2: return newSViv (-( start [1] - '0' * 1)); - case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); - case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); - case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); - case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); + case 2: return newSViv (-(IV)( start [1] - '0' * 1)); + case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11)); + case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); + case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); } else switch (len) { - case 1: return newSViv ( start [0] - '0' * 1); - case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); - case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); - case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); - case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); + case 1: return newSViv ( start [0] - '0' * 1); + case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); + case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); + case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); } { @@ -1406,18 +1419,41 @@ } static SV * -decode_json (SV *string, JSON *json, STRLEN *offset_return) +decode_json (SV *string, JSON *json, char **offset_return) { dec_t dec; - STRLEN offset; SV *sv; - SvGETMAGIC (string); + /* work around bugs in 5.10 where manipulating magic values + * will perl ignore the magic in subsequent accesses + */ + /*SvGETMAGIC (string);*/ + if (SvMAGICAL (string)) + string = sv_2mortal (newSVsv (string)); + SvUPGRADE (string, SVt_PV); - if (SvCUR (string) > json->max_size && json->max_size) - croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", - (unsigned long)SvCUR (string), (unsigned long)json->max_size); + /* work around a bug in perl 5.10, which causes SvCUR to fail an + * assertion with -DDEBUGGING, although SvCUR is documented to + * return the xpv_cur field which certainly exists after upgrading. + * according to nicholas clark, calling SvPOK fixes this. + * But it doesn't fix it, so try another workaround, call SvPV_nolen + * and hope for the best. + * Damnit, SvPV_nolen still trips over yet another assertion. This + * assertion business is seriously broken, try yet another workaround + * for the broken -DDEBUGGING. + */ + { +#ifdef DEBUGGING + STRLEN offset = SvOK (string) ? sv_len (string) : 0; +#else + STRLEN offset = SvCUR (string); +#endif + + if (offset > json->max_size && json->max_size) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)SvCUR (string), (unsigned long)json->max_size); + } if (json->flags & F_UTF8) sv_utf8_downgrade (string, 0); @@ -1440,6 +1476,9 @@ decode_ws (&dec); sv = decode_sv (&dec); + if (offset_return) + *offset_return = dec.cur; + if (!(offset_return || !sv)) { // check for trailing garbage @@ -1453,16 +1492,6 @@ } } - if (offset_return || !sv) - { - offset = dec.json.flags & F_UTF8 - ? dec.cur - SvPVX (string) - : utf8_distance (dec.cur, SvPVX (string)); - - if (offset_return) - *offset_return = offset; - } - if (!sv) { SV *uni = sv_newmortal (); @@ -1476,9 +1505,9 @@ pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); LEAVE; - croak ("%s, at character offset %d [\"%s\"]", + croak ("%s, at character offset %d (before \"%s\")", dec.err, - (int)offset, + ptr_to_index (string, dec.cur), dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); } @@ -1592,7 +1621,7 @@ case ']': case '}': - if (!--self->incr_nest) + if (--self->incr_nest <= 0) goto interrupt; } } @@ -1628,6 +1657,8 @@ json_true = get_bool ("JSON::XS::true"); json_false = get_bool ("JSON::XS::false"); + + CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */ } PROTOTYPES: DISABLE @@ -1757,10 +1788,10 @@ void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { - STRLEN offset; + char *offset; EXTEND (SP, 2); PUSHs (decode_json (jsonstr, self, &offset)); - PUSHs (sv_2mortal (newSVuv (offset))); + PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); } void incr_parse (JSON *self, SV *jsonstr = 0) @@ -1772,20 +1803,29 @@ // append data, if any if (jsonstr) { - if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text)) + if (SvUTF8 (jsonstr)) { - /* utf-8-ness differs, need to upgrade */ - sv_utf8_upgrade (self->incr_text); + if (!SvUTF8 (self->incr_text)) + { + /* utf-8-ness differs, need to upgrade */ + sv_utf8_upgrade (self->incr_text); - if (self->incr_pos) - self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) - - (U8 *)SvPVX (self->incr_text); + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } } + else if (SvUTF8 (self->incr_text)) + sv_utf8_upgrade (jsonstr); { STRLEN len; const char *str = SvPV (jsonstr, len); - SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1); + STRLEN cur = SvCUR (self->incr_text); + + if (SvLEN (self->incr_text) <= cur + len) + SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + Move (str, SvEND (self->incr_text), len, char); SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there @@ -1795,7 +1835,7 @@ if (GIMME_V != G_VOID) do { - STRLEN offset; + char *offset; if (!INCR_DONE (self)) { @@ -1811,10 +1851,11 @@ XPUSHs (decode_json (self->incr_text, self, &offset)); - sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset); - self->incr_pos -= offset; + self->incr_pos -= offset - SvPVX (self->incr_text); self->incr_nest = 0; self->incr_mode = 0; + + sv_chop (self->incr_text, offset); } while (GIMME_V == G_ARRAY); } @@ -1843,6 +1884,16 @@ } } +void incr_reset (JSON *self) + CODE: +{ + SvREFCNT_dec (self->incr_text); + self->incr_text = 0; + self->incr_pos = 0; + self->incr_nest = 0; + self->incr_mode = 0; +} + void DESTROY (JSON *self) CODE: SvREFCNT_dec (self->cb_sk_object);