--- JSON-XS/XS.xs 2007/03/22 23:24:18 1.5 +++ JSON-XS/XS.xs 2007/03/23 15:57:18 1.7 @@ -14,6 +14,7 @@ #define F_SPACE_AFTER 0x00000020 #define F_JSON_RPC 0x00000040 #define F_ALLOW_NONREF 0x00000080 +#define F_SHRINK 0x00000100 #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER #define F_DEFAULT 0 @@ -55,6 +56,15 @@ return &SvUVX (SvRV (sv)); } +static void +shrink (SV *sv) +{ + sv_utf8_downgrade (sv, 1); +#ifdef SvPV_shrink_to_cur + SvPV_shrink_to_cur (sv); +#endif +} + ///////////////////////////////////////////////////////////////////////////// static void @@ -87,94 +97,92 @@ { unsigned char ch = *(unsigned char *)str; - if (ch == '"') - { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = '"'; - ++str; - } - else if (ch == '\\') - { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = '\\'; - ++str; - } - else if (ch >= 0x20 && ch < 0x80) // most common case - { - *enc->cur++ = ch; - ++str; - } - else if (ch == '\015') + if (ch >= 0x20 && ch < 0x80) // most common case { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = 'r'; - ++str; - } - else if (ch == '\012') - { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = 'n'; - ++str; - } - else - { - STRLEN clen; - UV uch; - - if (is_utf8) + if (ch == '"') // but with slow exceptions { - uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); - if (clen == (STRLEN)-1) - croak ("malformed UTF-8 character in string, cannot convert to JSON"); + need (enc, len += 1); + *enc->cur++ = '\\'; + *enc->cur++ = '"'; } - else + else if (ch == '\\') { - uch = ch; - clen = 1; + need (enc, len += 1); + *enc->cur++ = '\\'; + *enc->cur++ = '\\'; } + else + *enc->cur++ = ch; - if (uch < 0x80 || enc->flags & F_ASCII) + ++str; + } + else + { + switch (ch) { - if (uch > 0xFFFFUL) - { - need (enc, len += 11); - sprintf (enc->cur, "\\u%04x\\u%04x", - (uch - 0x10000) / 0x400 + 0xD800, - (uch - 0x10000) % 0x400 + 0xDC00); - enc->cur += 12; - } - else - { - static char hexdigit [16] = "0123456789abcdef"; - need (enc, len += 5); - *enc->cur++ = '\\'; - *enc->cur++ = 'u'; - *enc->cur++ = hexdigit [ uch >> 12 ]; - *enc->cur++ = hexdigit [(uch >> 8) & 15]; - *enc->cur++ = hexdigit [(uch >> 4) & 15]; - *enc->cur++ = hexdigit [(uch >> 0) & 15]; - } + case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; + case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; + case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; + case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; + case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; - str += clen; - } - else if (is_utf8) - { - need (enc, len += clen); - do + default: { - *enc->cur++ = *str++; + STRLEN clen; + UV uch; + + if (is_utf8) + { + uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); + if (clen == (STRLEN)-1) + croak ("malformed UTF-8 character in string, cannot convert to JSON"); + } + else + { + uch = ch; + clen = 1; + } + + if (uch < 0x80 || enc->flags & F_ASCII) + { + if (uch > 0xFFFFUL) + { + need (enc, len += 11); + sprintf (enc->cur, "\\u%04x\\u%04x", + (uch - 0x10000) / 0x400 + 0xD800, + (uch - 0x10000) % 0x400 + 0xDC00); + enc->cur += 12; + } + else + { + static char hexdigit [16] = "0123456789abcdef"; + need (enc, len += 5); + *enc->cur++ = '\\'; + *enc->cur++ = 'u'; + *enc->cur++ = hexdigit [ uch >> 12 ]; + *enc->cur++ = hexdigit [(uch >> 8) & 15]; + *enc->cur++ = hexdigit [(uch >> 4) & 15]; + *enc->cur++ = hexdigit [(uch >> 0) & 15]; + } + + str += clen; + } + else if (is_utf8) + { + need (enc, len += clen); + do + { + *enc->cur++ = *str++; + } + while (--clen); + } + else + { + need (enc, len += 10); // never more than 11 bytes needed + enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); + ++str; + } } - while (--clen); - } - else - { - need (enc, 10); // never more than 11 bytes needed - enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); - ++str; } } @@ -427,6 +435,10 @@ SvUTF8_on (enc.sv); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); + + if (enc.flags & F_SHRINK) + shrink (enc.sv); + return enc.sv; } @@ -574,7 +586,7 @@ STRLEN clen; UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); if (clen == (STRLEN)-1) - ERR ("malformed UTF-8 character in string, cannot convert to JSON"); + ERR ("malformed UTF-8 character in JSON string"); APPEND_GROW (clen); do @@ -601,6 +613,9 @@ if (utf8) SvUTF8_on (sv); + if (dec->flags & F_SHRINK) + shrink (sv); + return sv; fail: @@ -831,7 +846,7 @@ break; default: - ERR ("malformed json string"); + ERR ("malformed json string, neither array, object, number, string or atom"); break; } @@ -861,7 +876,9 @@ if (!sv) { - IV offset = utf8_distance (dec.cur, SvPVX (string)); + IV offset = dec.flags & F_UTF8 + ? dec.cur - SvPVX (string) + : utf8_distance (dec.cur, SvPVX (string)); SV *uni = sv_newmortal (); // horrible hack to silence warning inside pv_uni_display COP cop; @@ -912,7 +929,7 @@ OUTPUT: RETVAL -SV *ascii (SV *self, int enable) +SV *ascii (SV *self, int enable = 1) ALIAS: ascii = F_ASCII utf8 = F_UTF8 @@ -923,6 +940,7 @@ json_rpc = F_JSON_RPC pretty = F_PRETTY allow_nonref = F_ALLOW_NONREF + shrink = F_SHRINK CODE: { UV *uv = SvJSON (self);