--- JSON-XS/XS.xs 2008/03/25 23:00:31 1.79 +++ JSON-XS/XS.xs 2009/09/05 23:00:56 1.100 @@ -14,7 +14,7 @@ #endif // some old perls do not have this, try to make it work, no -// guarentees, though. if it breaks, you get to keep the pieces. +// guarantees, though. if it breaks, you get to keep the pieces. #ifndef UTF8_MAXBYTES # define UTF8_MAXBYTES 13 #endif @@ -33,18 +33,10 @@ #define F_ALLOW_BLESSED 0x00000400UL #define F_CONV_BLESSED 0x00000800UL #define F_RELAXED 0x00001000UL - -#define F_MAXDEPTH 0xf8000000UL -#define S_MAXDEPTH 27 -#define F_MAXSIZE 0x01f00000UL -#define S_MAXSIZE 20 +#define F_ALLOW_UNKNOWN 0x00002000UL #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing -#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH)) -#define DEC_SIZE(flags) (1UL << ((flags & F_MAXSIZE ) >> S_MAXSIZE )) - #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER -#define F_DEFAULT (9UL << S_MAXDEPTH) #define INIT_SIZE 32 // initial scalar size to be allocated #define INDENT_STEP 3 // spaces per indentation level @@ -69,6 +61,8 @@ ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) +#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)" + #ifdef USE_ITHREADS # define JSON_SLOW 1 # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) @@ -87,10 +81,13 @@ INCR_M_JSON // outside anything, count nesting }; -#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON) +#define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) typedef struct { U32 flags; + U32 max_depth; + STRLEN max_size; + SV *cb_object; HV *cb_sk_object; @@ -98,16 +95,35 @@ SV *incr_text; // the source text so far STRLEN incr_pos; // the current offset into the text int incr_nest; // {[]}-nesting level - int incr_mode; + unsigned char incr_mode; } JSON; +INLINE void +json_init (JSON *json) +{ + Zero (json, 1, JSON); + json->max_depth = 512; +} + ///////////////////////////////////////////////////////////////////////////// // utility functions +INLINE SV * +get_bool (const char *name) +{ + SV *sv = get_sv (name, 1); + + SvREADONLY_on (sv); + SvREADONLY_on (SvRV (sv)); + + return sv; +} + INLINE void shrink (SV *sv) { sv_utf8_downgrade (sv, 1); + if (SvLEN (sv) > SvCUR (sv) + 1) { #ifdef SvPV_shrink_to_cur @@ -161,6 +177,15 @@ return s; } +// convert offset pointer to character index, sv must be string +static STRLEN +ptr_to_index (SV *sv, char *offset) +{ + return SvUTF8 (sv) + ? utf8_distance (offset, SvPVX (sv)) + : offset - SvPVX (sv); +} + ///////////////////////////////////////////////////////////////////////////// // encoder @@ -172,7 +197,6 @@ SV *sv; // result scalar JSON json; U32 indent; // indentation level - U32 maxdepth; // max. indentation/recursion level UV limit; // escape character values >= this value when encoding } enc_t; @@ -181,8 +205,8 @@ { if (expect_false (enc->cur + len >= enc->end)) { - STRLEN cur = enc->cur - SvPVX (enc->sv); - SvGROW (enc->sv, cur + len + 1); + STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); + SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); enc->cur = SvPVX (enc->sv) + cur; enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; } @@ -267,14 +291,13 @@ } else { - static char hexdigit [16] = "0123456789abcdef"; need (enc, len += 5); *enc->cur++ = '\\'; *enc->cur++ = 'u'; - *enc->cur++ = hexdigit [ uch >> 12 ]; - *enc->cur++ = hexdigit [(uch >> 8) & 15]; - *enc->cur++ = hexdigit [(uch >> 4) & 15]; - *enc->cur++ = hexdigit [(uch >> 0) & 15]; + *enc->cur++ = PL_hexdigit [ uch >> 12 ]; + *enc->cur++ = PL_hexdigit [(uch >> 8) & 15]; + *enc->cur++ = PL_hexdigit [(uch >> 4) & 15]; + *enc->cur++ = PL_hexdigit [(uch >> 0) & 15]; } str += clen; @@ -355,8 +378,8 @@ { int i, len = av_len (av); - if (enc->indent >= enc->maxdepth) - croak ("data structure too deep (hit recursion limit)"); + if (enc->indent >= enc->json.max_depth) + croak (ERR_NESTING_EXCEEDED); encode_ch (enc, '['); @@ -441,16 +464,16 @@ { HE *he; - if (enc->indent >= enc->maxdepth) - croak ("data structure too deep (hit recursion limit)"); + if (enc->indent >= enc->json.max_depth) + croak (ERR_NESTING_EXCEEDED); encode_ch (enc, '{'); // for canonical output we have to sort by keys first // actually, this is mostly due to the stupid so-called - // security workaround added somewhere in 5.8.x. + // security workaround added somewhere in 5.8.x // that randomises hash orderings - if (enc->json.flags & F_CANONICAL) + if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv)) { int count = hv_iterinit (hv); @@ -633,10 +656,14 @@ encode_str (enc, "true", 4, 0); else if (len == 1 && *pv == '0') encode_str (enc, "false", 5, 0); + else if (enc->json.flags & F_ALLOW_UNKNOWN) + encode_str (enc, "null", 4, 0); else croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", SvPV_nolen (sv_2mortal (newRV_inc (sv)))); } + else if (enc->json.flags & F_ALLOW_UNKNOWN) + encode_str (enc, "null", 4, 0); else croak ("encountered %s, but JSON can only represent references to arrays or hashes", SvPV_nolen (sv_2mortal (newRV_inc (sv)))); @@ -708,7 +735,7 @@ } else if (SvROK (sv)) encode_rv (enc, SvRV (sv)); - else if (!SvOK (sv)) + else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) encode_str (enc, "null", 4, 0); else croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", @@ -728,13 +755,13 @@ enc.cur = SvPVX (enc.sv); enc.end = SvEND (enc.sv); enc.indent = 0; - enc.maxdepth = DEC_DEPTH (enc.json.flags); enc.limit = enc.json.flags & F_ASCII ? 0x000080UL : enc.json.flags & F_LATIN1 ? 0x000100UL : 0x110000UL; SvPOK_only (enc.sv); encode_sv (&enc, scalar); + encode_nl (&enc); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings @@ -805,7 +832,7 @@ ++dec->cur; \ SE -#define DEC_INC_DEPTH if (++dec->depth > dec->maxdepth) ERR ("json datastructure exceeds maximum nesting level (set a higher max_depth)") +#define DEC_INC_DEPTH if (++dec->depth > dec->json.max_depth) ERR (ERR_NESTING_EXCEEDED) #define DEC_DEC_DEPTH --dec->depth static SV *decode_sv (dec_t *dec); @@ -955,7 +982,11 @@ if (sv) { - SvGROW (sv, SvCUR (sv) + len + 1); + STRLEN cur = SvCUR (sv); + + if (SvLEN (sv) <= cur + len) + SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + memcpy (SvPVX (sv) + SvCUR (sv), buf, len); SvCUR_set (sv, SvCUR (sv) + len); } @@ -1056,20 +1087,20 @@ if (*start == '-') switch (len) { - case 2: return newSViv (-( start [1] - '0' * 1)); - case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); - case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); - case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); - case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); + case 2: return newSViv (-(IV)( start [1] - '0' * 1)); + case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11)); + case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); + case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); } else switch (len) { - case 1: return newSViv ( start [0] - '0' * 1); - case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); - case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); - case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); - case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); + case 1: return newSViv ( start [0] - '0' * 1); + case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); + case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); + case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); } { @@ -1345,9 +1376,9 @@ { dec->cur += 4; #if JSON_SLOW - json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true); + json_true = get_bool ("JSON::XS::true"); #endif - return SvREFCNT_inc (json_true); + return newSVsv (json_true); } else ERR ("'true' expected"); @@ -1359,9 +1390,9 @@ { dec->cur += 5; #if JSON_SLOW - json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); + json_false = get_bool ("JSON::XS::false"); #endif - return SvREFCNT_inc (json_false); + return newSVsv (json_false); } else ERR ("'false' expected"); @@ -1389,18 +1420,41 @@ } static SV * -decode_json (SV *string, JSON *json, STRLEN *offset_return) +decode_json (SV *string, JSON *json, char **offset_return) { dec_t dec; - STRLEN offset; SV *sv; - SvGETMAGIC (string); + /* work around bugs in 5.10 where manipulating magic values + * will perl ignore the magic in subsequent accesses + */ + /*SvGETMAGIC (string);*/ + if (SvMAGICAL (string)) + string = sv_2mortal (newSVsv (string)); + SvUPGRADE (string, SVt_PV); - if (json->flags & F_MAXSIZE && SvCUR (string) > DEC_SIZE (json->flags)) - croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", - (unsigned long)SvCUR (string), (unsigned long)DEC_SIZE (json->flags)); + /* work around a bug in perl 5.10, which causes SvCUR to fail an + * assertion with -DDEBUGGING, although SvCUR is documented to + * return the xpv_cur field which certainly exists after upgrading. + * according to nicholas clark, calling SvPOK fixes this. + * But it doesn't fix it, so try another workaround, call SvPV_nolen + * and hope for the best. + * Damnit, SvPV_nolen still trips over yet another assertion. This + * assertion business is seriously broken, try yet another workaround + * for the broken -DDEBUGGING. + */ + { +#ifdef DEBUGGING + STRLEN offset = SvOK (string) ? sv_len (string) : 0; +#else + STRLEN offset = SvCUR (string); +#endif + + if (offset > json->max_size && json->max_size) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)SvCUR (string), (unsigned long)json->max_size); + } if (json->flags & F_UTF8) sv_utf8_downgrade (string, 0); @@ -1409,12 +1463,11 @@ SvGROW (string, SvCUR (string) + 1); // should basically be a NOP - dec.json = *json; - dec.cur = SvPVX (string); - dec.end = SvEND (string); - dec.err = 0; - dec.depth = 0; - dec.maxdepth = DEC_DEPTH (dec.json.flags); + dec.json = *json; + dec.cur = SvPVX (string); + dec.end = SvEND (string); + dec.err = 0; + dec.depth = 0; if (dec.json.cb_object || dec.json.cb_sk_object) dec.json.flags |= F_HOOK; @@ -1424,6 +1477,9 @@ decode_ws (&dec); sv = decode_sv (&dec); + if (offset_return) + *offset_return = dec.cur; + if (!(offset_return || !sv)) { // check for trailing garbage @@ -1437,16 +1493,6 @@ } } - if (offset_return || !sv) - { - offset = dec.json.flags & F_UTF8 - ? dec.cur - SvPVX (string) - : utf8_distance (dec.cur, SvPVX (string)); - - if (offset_return) - *offset_return = offset; - } - if (!sv) { SV *uni = sv_newmortal (); @@ -1460,9 +1506,9 @@ pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); LEAVE; - croak ("%s, at character offset %d [\"%s\"]", + croak ("%s, at character offset %d (before \"%s\")", dec.err, - (int)offset, + ptr_to_index (string, dec.cur), dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); } @@ -1570,12 +1616,13 @@ case '[': case '{': - ++self->incr_nest; + if (++self->incr_nest > self->max_depth) + croak (ERR_NESTING_EXCEEDED); break; case ']': case '}': - if (!--self->incr_nest) + if (--self->incr_nest <= 0) goto interrupt; } } @@ -1609,8 +1656,10 @@ json_stash = gv_stashpv ("JSON::XS" , 1); json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1); - json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true ); - json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); + json_true = get_bool ("JSON::XS::true"); + json_false = get_bool ("JSON::XS::false"); + + CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */ } PROTOTYPES: DISABLE @@ -1625,8 +1674,7 @@ { SV *pv = NEWSV (0, sizeof (JSON)); SvPOK_only (pv); - Zero (SvPVX (pv), 1, JSON); - ((JSON *)SvPVX (pv))->flags = F_DEFAULT; + json_init ((JSON *)SvPVX (pv)); XPUSHs (sv_2mortal (sv_bless ( newRV_noinc (pv), strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) @@ -1648,6 +1696,7 @@ allow_blessed = F_ALLOW_BLESSED convert_blessed = F_CONV_BLESSED relaxed = F_RELAXED + allow_unknown = F_ALLOW_UNKNOWN PPCODE: { if (enable) @@ -1672,49 +1721,29 @@ get_allow_blessed = F_ALLOW_BLESSED get_convert_blessed = F_CONV_BLESSED get_relaxed = F_RELAXED + get_allow_unknown = F_ALLOW_UNKNOWN PPCODE: XPUSHs (boolSV (self->flags & ix)); -void max_depth (JSON *self, UV max_depth = 0x80000000UL) +void max_depth (JSON *self, U32 max_depth = 0x80000000UL) PPCODE: -{ - UV log2 = 0; - - if (max_depth > 0x80000000UL) max_depth = 0x80000000UL; - - while ((1UL << log2) < max_depth) - ++log2; - - self->flags = self->flags & ~F_MAXDEPTH | (log2 << S_MAXDEPTH); - + self->max_depth = max_depth; XPUSHs (ST (0)); -} U32 get_max_depth (JSON *self) CODE: - RETVAL = DEC_DEPTH (self->flags); + RETVAL = self->max_depth; OUTPUT: RETVAL -void max_size (JSON *self, UV max_size = 0) +void max_size (JSON *self, U32 max_size = 0) PPCODE: -{ - UV log2 = 0; - - if (max_size > 0x80000000UL) max_size = 0x80000000UL; - if (max_size == 1) max_size = 2; - - while ((1UL << log2) < max_size) - ++log2; - - self->flags = self->flags & ~F_MAXSIZE | (log2 << S_MAXSIZE); - + self->max_size = max_size; XPUSHs (ST (0)); -} int get_max_size (JSON *self) CODE: - RETVAL = DEC_SIZE (self->flags); + RETVAL = self->max_size; OUTPUT: RETVAL @@ -1760,10 +1789,10 @@ void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { - STRLEN offset; + char *offset; EXTEND (SP, 2); PUSHs (decode_json (jsonstr, self, &offset)); - PUSHs (sv_2mortal (newSVuv (offset))); + PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); } void incr_parse (JSON *self, SV *jsonstr = 0) @@ -1775,20 +1804,29 @@ // append data, if any if (jsonstr) { - if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text)) + if (SvUTF8 (jsonstr)) { - /* utf-8-ness differs, need to upgrade */ - sv_utf8_upgrade (self->incr_text); + if (!SvUTF8 (self->incr_text)) + { + /* utf-8-ness differs, need to upgrade */ + sv_utf8_upgrade (self->incr_text); - if (self->incr_pos) - self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) - - (U8 *)SvPVX (self->incr_text); + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } } + else if (SvUTF8 (self->incr_text)) + sv_utf8_upgrade (jsonstr); { STRLEN len; const char *str = SvPV (jsonstr, len); - SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1); + STRLEN cur = SvCUR (self->incr_text); + + if (SvLEN (self->incr_text) <= cur + len) + SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1); + Move (str, SvEND (self->incr_text), len, char); SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there @@ -1798,19 +1836,27 @@ if (GIMME_V != G_VOID) do { - STRLEN offset; - - incr_parse (self); + char *offset; if (!INCR_DONE (self)) - break; + { + incr_parse (self); + + if (self->incr_pos > self->max_size && self->max_size) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)self->incr_pos, (unsigned long)self->max_size); + + if (!INCR_DONE (self)) + break; + } XPUSHs (decode_json (self->incr_text, self, &offset)); - sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset); - self->incr_pos -= offset; + self->incr_pos -= offset - SvPVX (self->incr_text); self->incr_nest = 0; self->incr_mode = 0; + + sv_chop (self->incr_text, offset); } while (GIMME_V == G_ARRAY); } @@ -1820,7 +1866,7 @@ CODE: { if (self->incr_pos) - croak ("incr_text can only be called after a successful incr_parse call in scalar context"); + croak ("incr_text can not be called when the incremental parser already started parsing"); RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef; } @@ -1830,10 +1876,20 @@ void incr_skip (JSON *self) CODE: { - if (!self->incr_pos || !INCR_DONE (self)) - croak ("incr_text can only be called after an unsuccessful incr_parse call in scalar context");//D + if (self->incr_pos) + { + sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos); + self->incr_pos = 0; + self->incr_nest = 0; + self->incr_mode = 0; + } +} - sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos); +void incr_reset (JSON *self) + CODE: +{ + SvREFCNT_dec (self->incr_text); + self->incr_text = 0; self->incr_pos = 0; self->incr_nest = 0; self->incr_mode = 0; @@ -1853,7 +1909,9 @@ encode_json = F_UTF8 PPCODE: { - JSON json = { F_DEFAULT | ix }; + JSON json; + json_init (&json); + json.flags |= ix; XPUSHs (encode_json (scalar, &json)); } @@ -1863,7 +1921,9 @@ decode_json = F_UTF8 PPCODE: { - JSON json = { F_DEFAULT | ix }; + JSON json; + json_init (&json); + json.flags |= ix; XPUSHs (decode_json (jsonstr, &json, 0)); }