--- JSON-XS/XS.xs 2018/11/15 20:13:03 1.134 +++ JSON-XS/XS.xs 2019/03/06 07:21:17 1.138 @@ -98,6 +98,8 @@ enum { INCR_M_WS = 0, // initial whitespace skipping, must be 0 + INCR_M_TFN, // inside true/false/null + INCR_M_NUM, // inside number INCR_M_STR, // inside string INCR_M_BS, // inside backslash INCR_M_C0, // inside comment in initial whitespace sequence @@ -127,8 +129,9 @@ INLINE void json_init (JSON *json) { - Zero (json, 1, JSON); - json->max_depth = 512; + static const JSON init = { F_ALLOW_NONREF, 512 }; + + *json = init; } ///////////////////////////////////////////////////////////////////////////// @@ -1503,7 +1506,7 @@ sv = newRV_noinc ((SV *)hv); // check filter callbacks - if (dec->json.flags & F_HOOK) + if (expect_false (dec->json.flags & F_HOOK)) { if (dec->json.cb_sk_object && HvKEYS (hv) == 1) { @@ -1555,15 +1558,12 @@ PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; if (count == 1) - { - sv = newSVsv (POPs); - FREETMPS; LEAVE; - return sv; - } - else if (count) + sv = newSVsv (POPs); + else if (count == 0) + SvREFCNT_inc (sv); + else croak ("filter_json_object callbacks must not return more than one scalar"); - SvREFCNT_inc (sv); FREETMPS; LEAVE; } } @@ -1796,7 +1796,7 @@ // check for trailing garbage decode_ws (&dec); - if (*dec.cur) + if (dec.cur != dec.end) { dec.err = "garbage after JSON object"; SvREFCNT_dec (sv); @@ -1844,9 +1844,17 @@ for (;;) { - //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D switch (self->incr_mode) { + // reached end of a scalar, see if we are inside a nested structure or not + end_of_scalar: + self->incr_mode = INCR_M_JSON; + + if (self->incr_nest) // end of a scalar inside array, object or tag + goto incr_m_json; + else // end of scalar outside structure, json text ends here + goto interrupt; + // only used for initial whitespace skipping case INCR_M_WS: for (;;) @@ -1898,6 +1906,40 @@ break; + // inside true/false/null + case INCR_M_TFN: + incr_m_tfn: + for (;;) + switch (*p++) + { + case 'r': case 'u': case 'e': // tRUE, falsE, nUll + case 'a': case 'l': case 's': // fALSe, nuLL + // allowed + break; + + default: + --p; + goto end_of_scalar; + } + + // inside a number + case INCR_M_NUM: + incr_m_num: + for (;;) + switch (*p++) + { + case 'e': case 'E': case '.': case '+': + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // allowed + break; + + default: + --p; + goto end_of_scalar; + } + // inside a string case INCR_M_STR: incr_m_str: @@ -1906,12 +1948,7 @@ if (*p == '"') { ++p; - self->incr_mode = INCR_M_JSON; - - if (!self->incr_nest) - goto interrupt; - - goto incr_m_json; + goto end_of_scalar; } else if (*p == '\\') { @@ -1951,6 +1988,21 @@ } break; + // the following three blocks handle scalars. this makes the parser + // more strict than required inside arrays or objects, and could + // be moved to a special case on the toplevel (except strings) + case 't': + case 'f': + case 'n': + self->incr_mode = INCR_M_TFN; + goto incr_m_tfn; + + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + self->incr_mode = INCR_M_NUM; + goto incr_m_num; + case '"': self->incr_mode = INCR_M_STR; goto incr_m_str;