--- JSON-XS/XS.xs 2008/03/19 03:17:38 1.71 +++ JSON-XS/XS.xs 2008/03/19 04:08:22 1.72 @@ -62,6 +62,10 @@ #define expect_false(expr) expect ((expr) != 0, 0) #define expect_true(expr) expect ((expr) != 0, 1) +#define IN_RANGE_INC(type,val,beg,end) \ + ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ + <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) + #ifdef USE_ITHREADS # define JSON_SLOW 1 # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) @@ -104,18 +108,32 @@ INLINE UV decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) { - if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) - return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); - else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) + if (expect_true (len >= 2 + && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) + && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) { *clen = 2; return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); } else + return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); +} + +// likewise for encoding, also never called for ascii codepoints +// this function takes advantage of this fact, although current gccs +// seem to optimise the check for >= 0x80 away anyways +INLINE unsigned char * +encode_utf8 (unsigned char *s, UV ch) +{ + if (ch <= 0x7FF) { - *clen = (STRLEN)-1; - return (UV)-1; + *s++ = (ch >> 6) | 0xc0; + *s++ = (ch & 0x3f) | 0x80; } + else + s = uvuni_to_utf8_flags (s, ch, 0); + + return s; } ///////////////////////////////////////////////////////////////////////////// @@ -209,7 +227,7 @@ clen = 1; } - if (uch < 0x20 || uch >= enc->limit) + if (uch < 0x80/*0x20*/ || uch >= enc->limit) { if (uch > 0xFFFFUL) { @@ -253,7 +271,7 @@ else { need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed - enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); + enc->cur = encode_utf8 (enc->cur, uch); ++str; } } @@ -861,7 +879,7 @@ { utf8 = 1; - cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); + cur = encode_utf8 (cur, hi); } else *cur++ = hi;