--- JSON-XS/XS.xs 2007/03/24 22:10:08 1.12 +++ JSON-XS/XS.xs 2007/03/24 22:55:16 1.13 @@ -55,6 +55,25 @@ } } +// decode an utf-8 character and return it, or (UV)-1 in +// case of an error. +// we special-case "safe" characters from U+80 .. U+7FF, +// but use the very good perl function to parse anything else. +// note that we never call this function for a ascii codepoints +static UV +decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) +{ + if (s[0] > 0xdf || s[0] < 0xc2) + return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); + else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) + { + *clen = 2; + return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); + } + else + return (UV)-1; +} + ///////////////////////////////////////////////////////////////////////////// // encoder @@ -135,7 +154,8 @@ if (is_utf8) { - uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); + //uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); + uch = decode_utf8 (str, end - str, &clen); if (clen == (STRLEN)-1) croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str); } @@ -621,7 +641,7 @@ --dec->cur; STRLEN clen; - UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); + UV uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen); if (clen == (STRLEN)-1) ERR ("malformed UTF-8 character in JSON string");