--- JSON-XS/XS.xs 2008/09/22 07:29:29 1.92 +++ JSON-XS/XS.xs 2009/07/17 14:33:45 1.98 @@ -177,6 +177,15 @@ return s; } +// convert offset pointer to character index, sv must be string +static STRLEN +ptr_to_index (SV *sv, char *offset) +{ + return SvUTF8 (sv) + ? utf8_distance (offset, SvPVX (sv)) + : offset - SvPVX (sv); +} + ///////////////////////////////////////////////////////////////////////////// // encoder @@ -196,8 +205,8 @@ { if (expect_false (enc->cur + len >= enc->end)) { - STRLEN cur = enc->cur - SvPVX (enc->sv); - SvGROW (enc->sv, cur + len + 1); + STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); + SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); enc->cur = SvPVX (enc->sv) + cur; enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; } @@ -464,7 +473,7 @@ // actually, this is mostly due to the stupid so-called // security workaround added somewhere in 5.8.x // that randomises hash orderings - if (enc->json.flags & F_CANONICAL) + if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv)) { int count = hv_iterinit (hv); @@ -972,7 +981,11 @@ if (sv) { - SvGROW (sv, SvCUR (sv) + len + 1); + STRLEN cur = SvCUR (sv); + + if (SvLEN (sv) <= cur + len) + SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + memcpy (SvPVX (sv) + SvCUR (sv), buf, len); SvCUR_set (sv, SvCUR (sv) + len); } @@ -1073,20 +1086,20 @@ if (*start == '-') switch (len) { - case 2: return newSViv (-( start [1] - '0' * 1)); - case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); - case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); - case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); - case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); + case 2: return newSViv (-(IV)( start [1] - '0' * 1)); + case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11)); + case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); + case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); } else switch (len) { - case 1: return newSViv ( start [0] - '0' * 1); - case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); - case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); - case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); - case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); + case 1: return newSViv ( start [0] - '0' * 1); + case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); + case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); + case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); } { @@ -1406,13 +1419,18 @@ } static SV * -decode_json (SV *string, JSON *json, STRLEN *offset_return) +decode_json (SV *string, JSON *json, char **offset_return) { dec_t dec; - STRLEN offset; SV *sv; - SvGETMAGIC (string); + /* work around bugs in 5.10 where manipulating magic values + * will perl ignore the magic in subsequent accesses + */ + /*SvGETMAGIC (string);*/ + if (SvMAGICAL (string)) + string = sv_2mortal (newSVsv (string)); + SvUPGRADE (string, SVt_PV); /* work around a bug in perl 5.10, which causes SvCUR to fail an @@ -1425,15 +1443,17 @@ * assertion business is seriously broken, try yet another workaround * for the broken -DDEBUGGING. */ + { #ifdef DEBUGGING - offset = SvOK (string) ? sv_len (string) : 0; + STRLEN offset = SvOK (string) ? sv_len (string) : 0; #else - offset = SvCUR (string); + STRLEN offset = SvCUR (string); #endif - if (offset > json->max_size && json->max_size) - croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", - (unsigned long)SvCUR (string), (unsigned long)json->max_size); + if (offset > json->max_size && json->max_size) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)SvCUR (string), (unsigned long)json->max_size); + } if (json->flags & F_UTF8) sv_utf8_downgrade (string, 0); @@ -1456,6 +1476,9 @@ decode_ws (&dec); sv = decode_sv (&dec); + if (offset_return) + *offset_return = dec.cur; + if (!(offset_return || !sv)) { // check for trailing garbage @@ -1469,16 +1492,6 @@ } } - if (offset_return || !sv) - { - offset = dec.json.flags & F_UTF8 - ? dec.cur - SvPVX (string) - : utf8_distance (dec.cur, SvPVX (string)); - - if (offset_return) - *offset_return = offset; - } - if (!sv) { SV *uni = sv_newmortal (); @@ -1492,9 +1505,9 @@ pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); LEAVE; - croak ("%s, at character offset %d [\"%s\"]", + croak ("%s, at character offset %d (before \"%s\")", dec.err, - (int)offset, + ptr_to_index (string, dec.cur), dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); } @@ -1773,10 +1786,10 @@ void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { - STRLEN offset; + char *offset; EXTEND (SP, 2); PUSHs (decode_json (jsonstr, self, &offset)); - PUSHs (sv_2mortal (newSVuv (offset))); + PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); } void incr_parse (JSON *self, SV *jsonstr = 0) @@ -1788,20 +1801,29 @@ // append data, if any if (jsonstr) { - if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text)) + if (SvUTF8 (jsonstr)) { - /* utf-8-ness differs, need to upgrade */ - sv_utf8_upgrade (self->incr_text); + if (!SvUTF8 (self->incr_text)) + { + /* utf-8-ness differs, need to upgrade */ + sv_utf8_upgrade (self->incr_text); - if (self->incr_pos) - self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) - - (U8 *)SvPVX (self->incr_text); + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } } + else if (SvUTF8 (self->incr_text)) + sv_utf8_upgrade (jsonstr); { STRLEN len; const char *str = SvPV (jsonstr, len); - SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1); + STRLEN cur = SvCUR (self->incr_text); + + if (SvLEN (self->incr_text) <= cur + len) + SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + Move (str, SvEND (self->incr_text), len, char); SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there @@ -1811,7 +1833,7 @@ if (GIMME_V != G_VOID) do { - STRLEN offset; + char *offset; if (!INCR_DONE (self)) { @@ -1827,10 +1849,11 @@ XPUSHs (decode_json (self->incr_text, self, &offset)); - sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset); - self->incr_pos -= offset; + self->incr_pos -= offset - SvPVX (self->incr_text); self->incr_nest = 0; self->incr_mode = 0; + + sv_chop (self->incr_text, offset); } while (GIMME_V == G_ARRAY); }