--- JSON-XS/XS.xs 2007/03/23 15:10:55 1.6 +++ JSON-XS/XS.xs 2007/03/24 01:15:22 1.10 @@ -12,7 +12,6 @@ #define F_CANONICAL 0x00000008 #define F_SPACE_BEFORE 0x00000010 #define F_SPACE_AFTER 0x00000020 -#define F_JSON_RPC 0x00000040 #define F_ALLOW_NONREF 0x00000080 #define F_SHRINK 0x00000100 @@ -56,6 +55,15 @@ return &SvUVX (SvRV (sv)); } +static void +shrink (SV *sv) +{ + sv_utf8_downgrade (sv, 1); +#ifdef SvPV_shrink_to_cur + SvPV_shrink_to_cur (sv); +#endif +} + ///////////////////////////////////////////////////////////////////////////// static void @@ -126,7 +134,7 @@ { uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); if (clen == (STRLEN)-1) - croak ("malformed UTF-8 character in string, cannot convert to JSON"); + croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str); } else { @@ -134,14 +142,17 @@ clen = 1; } + if (uch > 0x10FFFFUL) + croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); + if (uch < 0x80 || enc->flags & F_ASCII) { if (uch > 0xFFFFUL) { need (enc, len += 11); sprintf (enc->cur, "\\u%04x\\u%04x", - (uch - 0x10000) / 0x400 + 0xD800, - (uch - 0x10000) % 0x400 + 0xDC00); + (int)((uch - 0x10000) / 0x400 + 0xD800), + (int)((uch - 0x10000) % 0x400 + 0xDC00)); enc->cur += 12; } else @@ -169,7 +180,7 @@ } else { - need (enc, 10); // never more than 11 bytes needed + need (enc, len += 10); // never more than 11 bytes needed enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); ++str; } @@ -310,16 +321,20 @@ qsort (hes, count, sizeof (HE *), he_cmp_fast); else { - // hack to disable "use bytes" - COP *oldcop = PL_curcop, cop; + // hack to forcefully disable "use bytes" + COP cop = *PL_curcop; cop.op_private = 0; - PL_curcop = &cop; + ENTER; SAVETMPS; + + SAVEVPTR (PL_curcop); + PL_curcop = &cop; + qsort (hes, count, sizeof (HE *), he_cmp_slow); - FREETMPS; - PL_curcop = oldcop; + FREETMPS; + LEAVE; } for (i = 0; i < count; ++i) @@ -385,31 +400,33 @@ } else if (SvROK (sv)) { + SV *rv = SvRV (sv); + if (!--enc->max_recurse) croak ("data structure too deep (hit recursion limit)"); - sv = SvRV (sv); - - switch (SvTYPE (sv)) + switch (SvTYPE (rv)) { - case SVt_PVAV: encode_av (enc, (AV *)sv); break; - case SVt_PVHV: encode_hv (enc, (HV *)sv); break; + case SVt_PVAV: encode_av (enc, (AV *)rv); break; + case SVt_PVHV: encode_hv (enc, (HV *)rv); break; default: - croak ("JSON can only represent references to arrays or hashes"); + croak ("encountered %s, but JSON can only represent references to arrays or hashes", + SvPV_nolen (sv)); } } else if (!SvOK (sv)) encode_str (enc, "null", 4, 0); else - croak ("encountered perl type that JSON cannot handle"); + croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", + SvPV_nolen (sv), SvFLAGS (sv)); } static SV * encode_json (SV *scalar, UV flags) { if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar)) - croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)"); + croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)"); enc_t enc; enc.flags = flags; @@ -427,10 +444,9 @@ SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); -#ifdef SvPV_shrink_to_cur if (enc.flags & F_SHRINK) - SvPV_shrink_to_cur (enc.sv); -#endif + shrink (enc.sv); + return enc.sv; } @@ -578,7 +594,7 @@ STRLEN clen; UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); if (clen == (STRLEN)-1) - ERR ("malformed UTF-8 character in string, cannot convert to JSON"); + ERR ("malformed UTF-8 character in JSON string"); APPEND_GROW (clen); do @@ -605,10 +621,8 @@ if (utf8) SvUTF8_on (sv); -#ifdef SvPV_shrink_to_cur if (dec->flags & F_SHRINK) - SvPV_shrink_to_cur (sv); -#endif + shrink (sv); return sv; @@ -840,7 +854,7 @@ break; default: - ERR ("malformed json string"); + ERR ("malformed json string, neither array, object, number, string or atom"); break; } @@ -870,16 +884,20 @@ if (!sv) { - IV offset = utf8_distance (dec.cur, SvPVX (string)); + IV offset = dec.flags & F_UTF8 + ? dec.cur - SvPVX (string) + : utf8_distance (dec.cur, SvPVX (string)); SV *uni = sv_newmortal (); + // horrible hack to silence warning inside pv_uni_display - COP cop; - memset (&cop, 0, sizeof (cop)); + COP cop = *PL_curcop; cop.cop_warnings = pWARN_NONE; + ENTER; SAVEVPTR (PL_curcop); PL_curcop = &cop; - pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); + LEAVE; + croak ("%s, at character offset %d (%s)", dec.err, (int)offset, @@ -889,7 +907,7 @@ sv = sv_2mortal (sv); if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv)) - croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)"); + croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); return sv; } @@ -929,7 +947,6 @@ canonical = F_CANONICAL space_before = F_SPACE_BEFORE space_after = F_SPACE_AFTER - json_rpc = F_JSON_RPC pretty = F_PRETTY allow_nonref = F_ALLOW_NONREF shrink = F_SHRINK