--- JSON-XS/XS.xs 2007/11/28 14:01:01 1.67 +++ JSON-XS/XS.xs 2008/03/19 15:17:53 1.74 @@ -52,16 +52,20 @@ #define SE } while (0) #if __GNUC__ >= 3 -# define expect(expr,value) __builtin_expect ((expr),(value)) -# define inline inline +# define expect(expr,value) __builtin_expect ((expr), (value)) +# define INLINE static inline #else # define expect(expr,value) (expr) -# define inline static +# define INLINE static #endif #define expect_false(expr) expect ((expr) != 0, 0) #define expect_true(expr) expect ((expr) != 0, 1) +#define IN_RANGE_INC(type,val,beg,end) \ + ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ + <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) + #ifdef USE_ITHREADS # define JSON_SLOW 1 # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) @@ -82,7 +86,7 @@ ///////////////////////////////////////////////////////////////////////////// // utility functions -inline void +INLINE void shrink (SV *sv) { sv_utf8_downgrade (sv, 1); @@ -101,21 +105,42 @@ // we special-case "safe" characters from U+80 .. U+7FF, // but use the very good perl function to parse anything else. // note that we never call this function for a ascii codepoints -inline UV +INLINE UV decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) { - if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) - return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); - else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) + if (expect_true (len >= 2 + && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) + && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) { *clen = 2; return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); } else - { - *clen = (STRLEN)-1; - return (UV)-1; - } + return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); +} + +// likewise for encoding, also never called for ascii codepoints +// this function takes advantage of this fact, although current gccs +// seem to optimise the check for >= 0x80 away anyways +INLINE unsigned char * +encode_utf8 (unsigned char *s, UV ch) +{ + if (expect_false (ch < 0x000080)) + *s++ = ch; + else if (expect_true (ch < 0x000800)) + *s++ = 0xc0 | ( ch >> 6), + *s++ = 0x80 | ( ch & 0x3f); + else if ( ch < 0x010000) + *s++ = 0xe0 | ( ch >> 12), + *s++ = 0x80 | ((ch >> 6) & 0x3f), + *s++ = 0x80 | ( ch & 0x3f); + else if ( ch < 0x110000) + *s++ = 0xf0 | ( ch >> 18), + *s++ = 0x80 | ((ch >> 12) & 0x3f), + *s++ = 0x80 | ((ch >> 6) & 0x3f), + *s++ = 0x80 | ( ch & 0x3f); + + return s; } ///////////////////////////////////////////////////////////////////////////// @@ -130,9 +155,10 @@ JSON json; U32 indent; // indentation level U32 maxdepth; // max. indentation/recursion level + UV limit; // escape character values >= this value when encoding } enc_t; -inline void +INLINE void need (enc_t *enc, STRLEN len) { if (expect_false (enc->cur + len >= enc->end)) @@ -144,7 +170,7 @@ } } -inline void +INLINE void encode_ch (enc_t *enc, char ch) { need (enc, 1); @@ -208,13 +234,13 @@ clen = 1; } - if (uch > 0x10FFFFUL) - croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); - - if (uch < 0x80 || enc->json.flags & F_ASCII || (enc->json.flags & F_LATIN1 && uch > 0xFF)) + if (uch < 0x80/*0x20*/ || uch >= enc->limit) { - if (uch > 0xFFFFUL) + if (uch >= 0x10000UL) { + if (uch >= 0x110000UL) + croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); + need (enc, len += 11); sprintf (enc->cur, "\\u%04x\\u%04x", (int)((uch - 0x10000) / 0x400 + 0xD800), @@ -252,7 +278,7 @@ else { need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed - enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); + enc->cur = encode_utf8 (enc->cur, uch); ++str; } } @@ -263,7 +289,7 @@ } } -inline void +INLINE void encode_indent (enc_t *enc) { if (enc->json.flags & F_INDENT) @@ -276,14 +302,14 @@ } } -inline void +INLINE void encode_space (enc_t *enc) { need (enc, 1); encode_ch (enc, ' '); } -inline void +INLINE void encode_nl (enc_t *enc) { if (enc->json.flags & F_INDENT) @@ -293,7 +319,7 @@ } } -inline void +INLINE void encode_comma (enc_t *enc) { encode_ch (enc, ','); @@ -621,11 +647,14 @@ } else if (SvIOKp (sv)) { - // we assume we can always read an IV as a UV - if (SvUV (sv) & ~(UV)0x7fff) + // we assume we can always read an IV as a UV and vice versa + // we assume two's complement + // we assume no aliasing issues in the union + if (SvIsUV (sv) ? SvUVX (sv) > 59000 + : SvIVX (sv) > 59000 || SvIVX (sv) < -59000) { // large integer, use the (rather slow) snprintf way. - need (enc, sizeof (UV) * 3); + need (enc, sizeof (UV) * 5 / 2 + 1); // CHAR_BIT is at least 8 enc->cur += SvIsUV(sv) ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) @@ -635,7 +664,8 @@ { // optimise the "small number case" // code will likely be branchless and use only a single multiplication - I32 i = SvIV (sv); + // works for numbers up to 59074 + I32 i = SvIVX (sv); U32 u; char digit, nz = 0; @@ -682,6 +712,9 @@ enc.end = SvEND (enc.sv); enc.indent = 0; enc.maxdepth = DEC_DEPTH (enc.json.flags); + enc.limit = enc.json.flags & F_ASCII ? 0x000080UL + : enc.json.flags & F_LATIN1 ? 0x000100UL + : 0x110000UL; SvPOK_only (enc.sv); encode_sv (&enc, scalar); @@ -712,7 +745,7 @@ U32 maxdepth; // recursion depth limit } dec_t; -inline void +INLINE void decode_comment (dec_t *dec) { // only '#'-style comments allowed a.t.m. @@ -721,7 +754,7 @@ ++dec->cur; } -inline void +INLINE void decode_ws (dec_t *dec) { for (;;) @@ -857,7 +890,7 @@ { utf8 = 1; - cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); + cur = encode_utf8 (cur, hi); } else *cur++ = hi; @@ -869,7 +902,7 @@ ERR ("illegal backslash escape sequence in string"); } } - else if (expect_true (ch >= 0x20 && ch <= 0x7f)) + else if (expect_true (ch >= 0x20 && ch < 0x80)) *cur++ = ch; else if (ch >= 0x80) { @@ -1002,22 +1035,24 @@ { int len = dec->cur - start; - // special case the rather common 1..4-digit-int case, assumes 32 bit ints or so + // special case the rather common 1..5-digit-int case if (*start == '-') switch (len) { - case 2: return newSViv (-( start [1] - '0' * 1)); - case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); - case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); - case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 2: return newSViv (-( start [1] - '0' * 1)); + case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); + case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); + case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); } else switch (len) { - case 1: return newSViv ( start [0] - '0' * 1); - case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); - case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); - case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 1: return newSViv ( start [0] - '0' * 1); + case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); + case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); + case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); } { @@ -1276,12 +1311,12 @@ decode_sv (dec_t *dec) { // the beauty of JSON: you need exactly one character lookahead - // to parse anything. + // to parse everything. switch (*dec->cur) { case '"': ++dec->cur; return decode_str (dec); - case '[': ++dec->cur; return decode_av (dec); - case '{': ++dec->cur; return decode_hv (dec); + case '[': ++dec->cur; return decode_av (dec); + case '{': ++dec->cur; return decode_hv (dec); case '-': case '0': case '1': case '2': case '3': case '4': @@ -1459,7 +1494,10 @@ SvPOK_only (pv); Zero (SvPVX (pv), 1, JSON); ((JSON *)SvPVX (pv))->flags = F_DEFAULT; - XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), JSON_STASH))); + XPUSHs (sv_2mortal (sv_bless ( + newRV_noinc (pv), + strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) + ))); } void ascii (JSON *self, int enable = 1) @@ -1496,7 +1534,6 @@ get_canonical = F_CANONICAL get_space_before = F_SPACE_BEFORE get_space_after = F_SPACE_AFTER - get_pretty = F_PRETTY get_allow_nonref = F_ALLOW_NONREF get_shrink = F_SHRINK get_allow_blessed = F_ALLOW_BLESSED @@ -1603,14 +1640,14 @@ PROTOTYPES: ENABLE -void to_json (SV *scalar) +void encode_json (SV *scalar) PPCODE: { JSON json = { F_DEFAULT | F_UTF8 }; XPUSHs (encode_json (scalar, &json)); } -void from_json (SV *jsonstr) +void decode_json (SV *jsonstr) PPCODE: { JSON json = { F_DEFAULT | F_UTF8 };