--- JSON-XS/XS.xs 2008/03/19 15:37:54 1.75 +++ JSON-XS/XS.xs 2008/03/25 23:00:31 1.79 @@ -80,10 +80,25 @@ static HV *json_stash, *json_boolean_stash; // JSON::XS:: static SV *json_true, *json_false; +enum { + INCR_M_WS = 0, // initial whitespace skipping, must be 0 + INCR_M_STR, // inside string + INCR_M_BS, // inside backslash + INCR_M_JSON // outside anything, count nesting +}; + +#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON) + typedef struct { U32 flags; SV *cb_object; HV *cb_sk_object; + + // for the incremental parser + SV *incr_text; // the source text so far + STRLEN incr_pos; // the current offset into the text + int incr_nest; // {[]}-nesting level + int incr_mode; } JSON; ///////////////////////////////////////////////////////////////////////////// @@ -425,7 +440,6 @@ encode_hv (enc_t *enc, HV *hv) { HE *he; - int count; if (enc->indent >= enc->maxdepth) croak ("data structure too deep (hit recursion limit)"); @@ -1173,7 +1187,7 @@ for (;;) { - // the >= 0x80 is true on most architectures + // the >= 0x80 is false on most architectures if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') { // slow path, back up and use decode_str @@ -1375,10 +1389,10 @@ } static SV * -decode_json (SV *string, JSON *json, UV *offset_return) +decode_json (SV *string, JSON *json, STRLEN *offset_return) { dec_t dec; - UV offset; + STRLEN offset; SV *sv; SvGETMAGIC (string); @@ -1461,6 +1475,122 @@ } ///////////////////////////////////////////////////////////////////////////// +// incremental parser + +static void +incr_parse (JSON *self) +{ + const char *p = SvPVX (self->incr_text) + self->incr_pos; + + for (;;) + { + //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D + switch (self->incr_mode) + { + // only used for intiial whitespace skipping + case INCR_M_WS: + for (;;) + { + if (*p > 0x20) + { + self->incr_mode = INCR_M_JSON; + goto incr_m_json; + } + else if (!*p) + goto interrupt; + + ++p; + } + + // skip a single char inside a string (for \\-processing) + case INCR_M_BS: + if (!*p) + goto interrupt; + + ++p; + self->incr_mode = INCR_M_STR; + goto incr_m_str; + + // inside a string + case INCR_M_STR: + incr_m_str: + for (;;) + { + if (*p == '"') + { + ++p; + self->incr_mode = INCR_M_JSON; + + if (!self->incr_nest) + goto interrupt; + + goto incr_m_json; + } + else if (*p == '\\') + { + ++p; // "virtually" consumes character after \ + + if (!*p) // if at end of string we have to switch modes + { + self->incr_mode = INCR_M_BS; + goto interrupt; + } + } + else if (!*p) + goto interrupt; + + ++p; + } + + // after initial ws, outside string + case INCR_M_JSON: + incr_m_json: + for (;;) + { + switch (*p++) + { + case 0: + --p; + goto interrupt; + + case 0x09: + case 0x0a: + case 0x0d: + case 0x20: + if (!self->incr_nest) + { + --p; // do not eat the whitespace, let the next round do it + goto interrupt; + } + break; + + case '"': + self->incr_mode = INCR_M_STR; + goto incr_m_str; + + case '[': + case '{': + ++self->incr_nest; + break; + + case ']': + case '}': + if (!--self->incr_nest) + goto interrupt; + } + } + } + + modechange: + ; + } + +interrupt: + self->incr_pos = p - SvPVX (self->incr_text); + //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D +} + +///////////////////////////////////////////////////////////////////////////// // XS interface functions MODULE = JSON::XS PACKAGE = JSON::XS @@ -1630,30 +1760,111 @@ void decode_prefix (JSON *self, SV *jsonstr) PPCODE: { - UV offset; + STRLEN offset; EXTEND (SP, 2); PUSHs (decode_json (jsonstr, self, &offset)); PUSHs (sv_2mortal (newSVuv (offset))); } +void incr_parse (JSON *self, SV *jsonstr = 0) + PPCODE: +{ + if (!self->incr_text) + self->incr_text = newSVpvn ("", 0); + + // append data, if any + if (jsonstr) + { + if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text)) + { + /* utf-8-ness differs, need to upgrade */ + sv_utf8_upgrade (self->incr_text); + + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } + + { + STRLEN len; + const char *str = SvPV (jsonstr, len); + SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1); + Move (str, SvEND (self->incr_text), len, char); + SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); + *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there + } + } + + if (GIMME_V != G_VOID) + do + { + STRLEN offset; + + incr_parse (self); + + if (!INCR_DONE (self)) + break; + + XPUSHs (decode_json (self->incr_text, self, &offset)); + + sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset); + self->incr_pos -= offset; + self->incr_nest = 0; + self->incr_mode = 0; + } + while (GIMME_V == G_ARRAY); +} + +SV *incr_text (JSON *self) + ATTRS: lvalue + CODE: +{ + if (self->incr_pos) + croak ("incr_text can only be called after a successful incr_parse call in scalar context"); + + RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef; +} + OUTPUT: + RETVAL + +void incr_skip (JSON *self) + CODE: +{ + if (!self->incr_pos || !INCR_DONE (self)) + croak ("incr_text can only be called after an unsuccessful incr_parse call in scalar context");//D + + sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos); + self->incr_pos = 0; + self->incr_nest = 0; + self->incr_mode = 0; +} + void DESTROY (JSON *self) CODE: SvREFCNT_dec (self->cb_sk_object); SvREFCNT_dec (self->cb_object); + SvREFCNT_dec (self->incr_text); PROTOTYPES: ENABLE void encode_json (SV *scalar) + ALIAS: + to_json_ = 0 + encode_json = F_UTF8 PPCODE: { - JSON json = { F_DEFAULT | F_UTF8 }; + JSON json = { F_DEFAULT | ix }; XPUSHs (encode_json (scalar, &json)); } void decode_json (SV *jsonstr) + ALIAS: + from_json_ = 0 + decode_json = F_UTF8 PPCODE: { - JSON json = { F_DEFAULT | F_UTF8 }; + JSON json = { F_DEFAULT | ix }; XPUSHs (decode_json (jsonstr, &json, 0)); } +