--- JSON-XS/XS.xs 2007/03/22 23:24:18 1.5 +++ JSON-XS/XS.xs 2007/03/24 01:15:22 1.10 @@ -12,8 +12,8 @@ #define F_CANONICAL 0x00000008 #define F_SPACE_BEFORE 0x00000010 #define F_SPACE_AFTER 0x00000020 -#define F_JSON_RPC 0x00000040 #define F_ALLOW_NONREF 0x00000080 +#define F_SHRINK 0x00000100 #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER #define F_DEFAULT 0 @@ -55,6 +55,15 @@ return &SvUVX (SvRV (sv)); } +static void +shrink (SV *sv) +{ + sv_utf8_downgrade (sv, 1); +#ifdef SvPV_shrink_to_cur + SvPV_shrink_to_cur (sv); +#endif +} + ///////////////////////////////////////////////////////////////////////////// static void @@ -87,94 +96,95 @@ { unsigned char ch = *(unsigned char *)str; - if (ch == '"') - { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = '"'; - ++str; - } - else if (ch == '\\') - { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = '\\'; - ++str; - } - else if (ch >= 0x20 && ch < 0x80) // most common case - { - *enc->cur++ = ch; - ++str; - } - else if (ch == '\015') - { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = 'r'; - ++str; - } - else if (ch == '\012') + if (ch >= 0x20 && ch < 0x80) // most common case { - need (enc, len += 1); - *enc->cur++ = '\\'; - *enc->cur++ = 'n'; - ++str; - } - else - { - STRLEN clen; - UV uch; - - if (is_utf8) + if (ch == '"') // but with slow exceptions { - uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); - if (clen == (STRLEN)-1) - croak ("malformed UTF-8 character in string, cannot convert to JSON"); + need (enc, len += 1); + *enc->cur++ = '\\'; + *enc->cur++ = '"'; } - else + else if (ch == '\\') { - uch = ch; - clen = 1; + need (enc, len += 1); + *enc->cur++ = '\\'; + *enc->cur++ = '\\'; } + else + *enc->cur++ = ch; - if (uch < 0x80 || enc->flags & F_ASCII) + ++str; + } + else + { + switch (ch) { - if (uch > 0xFFFFUL) - { - need (enc, len += 11); - sprintf (enc->cur, "\\u%04x\\u%04x", - (uch - 0x10000) / 0x400 + 0xD800, - (uch - 0x10000) % 0x400 + 0xDC00); - enc->cur += 12; - } - else - { - static char hexdigit [16] = "0123456789abcdef"; - need (enc, len += 5); - *enc->cur++ = '\\'; - *enc->cur++ = 'u'; - *enc->cur++ = hexdigit [ uch >> 12 ]; - *enc->cur++ = hexdigit [(uch >> 8) & 15]; - *enc->cur++ = hexdigit [(uch >> 4) & 15]; - *enc->cur++ = hexdigit [(uch >> 0) & 15]; - } + case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; + case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; + case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; + case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; + case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; - str += clen; - } - else if (is_utf8) - { - need (enc, len += clen); - do + default: { - *enc->cur++ = *str++; + STRLEN clen; + UV uch; + + if (is_utf8) + { + uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); + if (clen == (STRLEN)-1) + croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str); + } + else + { + uch = ch; + clen = 1; + } + + if (uch > 0x10FFFFUL) + croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); + + if (uch < 0x80 || enc->flags & F_ASCII) + { + if (uch > 0xFFFFUL) + { + need (enc, len += 11); + sprintf (enc->cur, "\\u%04x\\u%04x", + (int)((uch - 0x10000) / 0x400 + 0xD800), + (int)((uch - 0x10000) % 0x400 + 0xDC00)); + enc->cur += 12; + } + else + { + static char hexdigit [16] = "0123456789abcdef"; + need (enc, len += 5); + *enc->cur++ = '\\'; + *enc->cur++ = 'u'; + *enc->cur++ = hexdigit [ uch >> 12 ]; + *enc->cur++ = hexdigit [(uch >> 8) & 15]; + *enc->cur++ = hexdigit [(uch >> 4) & 15]; + *enc->cur++ = hexdigit [(uch >> 0) & 15]; + } + + str += clen; + } + else if (is_utf8) + { + need (enc, len += clen); + do + { + *enc->cur++ = *str++; + } + while (--clen); + } + else + { + need (enc, len += 10); // never more than 11 bytes needed + enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); + ++str; + } } - while (--clen); - } - else - { - need (enc, 10); // never more than 11 bytes needed - enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); - ++str; } } @@ -311,16 +321,20 @@ qsort (hes, count, sizeof (HE *), he_cmp_fast); else { - // hack to disable "use bytes" - COP *oldcop = PL_curcop, cop; + // hack to forcefully disable "use bytes" + COP cop = *PL_curcop; cop.op_private = 0; - PL_curcop = &cop; + ENTER; SAVETMPS; + + SAVEVPTR (PL_curcop); + PL_curcop = &cop; + qsort (hes, count, sizeof (HE *), he_cmp_slow); - FREETMPS; - PL_curcop = oldcop; + FREETMPS; + LEAVE; } for (i = 0; i < count; ++i) @@ -386,31 +400,33 @@ } else if (SvROK (sv)) { + SV *rv = SvRV (sv); + if (!--enc->max_recurse) croak ("data structure too deep (hit recursion limit)"); - sv = SvRV (sv); - - switch (SvTYPE (sv)) + switch (SvTYPE (rv)) { - case SVt_PVAV: encode_av (enc, (AV *)sv); break; - case SVt_PVHV: encode_hv (enc, (HV *)sv); break; + case SVt_PVAV: encode_av (enc, (AV *)rv); break; + case SVt_PVHV: encode_hv (enc, (HV *)rv); break; default: - croak ("JSON can only represent references to arrays or hashes"); + croak ("encountered %s, but JSON can only represent references to arrays or hashes", + SvPV_nolen (sv)); } } else if (!SvOK (sv)) encode_str (enc, "null", 4, 0); else - croak ("encountered perl type that JSON cannot handle"); + croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", + SvPV_nolen (sv), SvFLAGS (sv)); } static SV * encode_json (SV *scalar, UV flags) { if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar)) - croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)"); + croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)"); enc_t enc; enc.flags = flags; @@ -427,6 +443,10 @@ SvUTF8_on (enc.sv); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); + + if (enc.flags & F_SHRINK) + shrink (enc.sv); + return enc.sv; } @@ -574,7 +594,7 @@ STRLEN clen; UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); if (clen == (STRLEN)-1) - ERR ("malformed UTF-8 character in string, cannot convert to JSON"); + ERR ("malformed UTF-8 character in JSON string"); APPEND_GROW (clen); do @@ -601,6 +621,9 @@ if (utf8) SvUTF8_on (sv); + if (dec->flags & F_SHRINK) + shrink (sv); + return sv; fail: @@ -831,7 +854,7 @@ break; default: - ERR ("malformed json string"); + ERR ("malformed json string, neither array, object, number, string or atom"); break; } @@ -861,16 +884,20 @@ if (!sv) { - IV offset = utf8_distance (dec.cur, SvPVX (string)); + IV offset = dec.flags & F_UTF8 + ? dec.cur - SvPVX (string) + : utf8_distance (dec.cur, SvPVX (string)); SV *uni = sv_newmortal (); + // horrible hack to silence warning inside pv_uni_display - COP cop; - memset (&cop, 0, sizeof (cop)); + COP cop = *PL_curcop; cop.cop_warnings = pWARN_NONE; + ENTER; SAVEVPTR (PL_curcop); PL_curcop = &cop; - pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); + LEAVE; + croak ("%s, at character offset %d (%s)", dec.err, (int)offset, @@ -880,7 +907,7 @@ sv = sv_2mortal (sv); if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv)) - croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)"); + croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); return sv; } @@ -912,7 +939,7 @@ OUTPUT: RETVAL -SV *ascii (SV *self, int enable) +SV *ascii (SV *self, int enable = 1) ALIAS: ascii = F_ASCII utf8 = F_UTF8 @@ -920,9 +947,9 @@ canonical = F_CANONICAL space_before = F_SPACE_BEFORE space_after = F_SPACE_AFTER - json_rpc = F_JSON_RPC pretty = F_PRETTY allow_nonref = F_ALLOW_NONREF + shrink = F_SHRINK CODE: { UV *uv = SvJSON (self);