--- JSON-XS/XS.xs 2007/03/22 17:28:50 1.2 +++ JSON-XS/XS.xs 2007/03/23 15:10:55 1.6 @@ -13,6 +13,8 @@ #define F_SPACE_BEFORE 0x00000010 #define F_SPACE_AFTER 0x00000020 #define F_JSON_RPC 0x00000040 +#define F_ALLOW_NONREF 0x00000080 +#define F_SHRINK 0x00000100 #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER #define F_DEFAULT 0 @@ -41,7 +43,7 @@ { char *cur; char *end; - char *err; + const char *err; UV flags; } dec_t; @@ -64,7 +66,7 @@ STRLEN cur = enc->cur - SvPVX (enc->sv); SvGROW (enc->sv, cur + len + 1); enc->cur = SvPVX (enc->sv) + cur; - enc->end = SvEND (enc->sv); + enc->end = SvPVX (enc->sv) + SvLEN (enc->sv); } } @@ -80,59 +82,99 @@ { char *end = str + len; + need (enc, len); + while (str < end) { unsigned char ch = *(unsigned char *)str; + if (ch >= 0x20 && ch < 0x80) // most common case { - *enc->cur++ = ch; - str++; - } - else - { - STRLEN clen; - UV uch; - - if (is_utf8) + if (ch == '"') // but with slow exceptions { - uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); - if (clen < 0) - croak ("malformed UTF-8 character in string, cannot convert to JSON"); + need (enc, len += 1); + *enc->cur++ = '\\'; + *enc->cur++ = '"'; } - else + else if (ch == '\\') { - uch = ch; - clen = 1; + need (enc, len += 1); + *enc->cur++ = '\\'; + *enc->cur++ = '\\'; } + else + *enc->cur++ = ch; - need (enc, len += 6); - - if (uch < 0xa0 || enc->flags & F_ASCII) + ++str; + } + else + { + switch (ch) { - if (uch > 0xFFFFUL) - { - len += 6; - need (enc, len += 6); - sprintf (enc->cur, "\\u%04x\\u%04x", - (uch - 0x10000) / 0x400 + 0xD800, - (uch - 0x10000) % 0x400 + 0xDC00); - enc->cur += 12; - } - else + case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; + case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; + case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; + case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; + case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; + + default: { - sprintf (enc->cur, "\\u%04x", uch); - enc->cur += 6; + STRLEN clen; + UV uch; + + if (is_utf8) + { + uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); + if (clen == (STRLEN)-1) + croak ("malformed UTF-8 character in string, cannot convert to JSON"); + } + else + { + uch = ch; + clen = 1; + } + + if (uch < 0x80 || enc->flags & F_ASCII) + { + if (uch > 0xFFFFUL) + { + need (enc, len += 11); + sprintf (enc->cur, "\\u%04x\\u%04x", + (uch - 0x10000) / 0x400 + 0xD800, + (uch - 0x10000) % 0x400 + 0xDC00); + enc->cur += 12; + } + else + { + static char hexdigit [16] = "0123456789abcdef"; + need (enc, len += 5); + *enc->cur++ = '\\'; + *enc->cur++ = 'u'; + *enc->cur++ = hexdigit [ uch >> 12 ]; + *enc->cur++ = hexdigit [(uch >> 8) & 15]; + *enc->cur++ = hexdigit [(uch >> 4) & 15]; + *enc->cur++ = hexdigit [(uch >> 0) & 15]; + } + + str += clen; + } + else if (is_utf8) + { + need (enc, len += clen); + do + { + *enc->cur++ = *str++; + } + while (--clen); + } + else + { + need (enc, 10); // never more than 11 bytes needed + enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); + ++str; + } } } - else if (is_utf8) - { - memcpy (enc->cur, str, clen); - enc->cur += clen; - } - else - enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); - - str += clen; } --len; @@ -193,7 +235,10 @@ { SV *sv = HeSVKEY (he); STRLEN len; - char *str = SvPV (sv, len); + char *str; + + SvGETMAGIC (sv); + str = SvPV (sv, len); encode_str (enc, str, len, SvUTF8 (sv)); } @@ -314,6 +359,8 @@ static void encode_sv (enc_t *enc, SV *sv) { + SvGETMAGIC (sv); + if (SvPOKp (sv)) { STRLEN len; @@ -361,6 +408,9 @@ static SV * encode_json (SV *scalar, UV flags) { + if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar)) + croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)"); + enc_t enc; enc.flags = flags; enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); @@ -376,6 +426,11 @@ SvUTF8_on (enc.sv); SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); + +#ifdef SvPV_shrink_to_cur + if (enc.flags & F_SHRINK) + SvPV_shrink_to_cur (enc.sv); +#endif return enc.sv; } @@ -400,11 +455,6 @@ static SV *decode_sv (dec_t *dec); -#define APPEND_CH(ch) SB \ - SvGROW (sv, cur + 1 + 1); \ - SvPVX (sv)[cur++] = (ch); \ - SE - static signed char decode_hexdigit[256]; static UV @@ -432,12 +482,28 @@ return (UV)-1; } +#define APPEND_GROW(n) SB \ + if (cur + (n) >= end) \ + { \ + STRLEN ofs = cur - SvPVX (sv); \ + SvGROW (sv, ofs + (n) + 1); \ + cur = SvPVX (sv) + ofs; \ + end = SvEND (sv); \ + } \ + SE + +#define APPEND_CH(ch) SB \ + APPEND_GROW (1); \ + *cur++ = (ch); \ + SE + static SV * decode_str (dec_t *dec) { SV *sv = NEWSV (0,2); - STRLEN cur = 0; int utf8 = 0; + char *cur = SvPVX (sv); + char *end = SvEND (sv); for (;;) { @@ -472,7 +538,7 @@ if (hi >= 0xd800 && hi < 0xdc00) { if (dec->cur [0] != '\\' || dec->cur [1] != 'u') - ERR ("illegal surrogate character"); + ERR ("missing low surrogate character in surrogate pair"); dec->cur += 2; @@ -485,20 +551,24 @@ hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; } - else if (lo >= 0xdc00 && lo < 0xe000) - ERR ("illegal surrogate character"); + else if (hi >= 0xdc00 && hi < 0xe000) + ERR ("missing high surrogate character in surrogate pair"); if (hi >= 0x80) { utf8 = 1; - SvGROW (sv, cur + 4 + 1); // at most 4 bytes for 21 bits - cur = (char *)uvuni_to_utf8_flags (SvPVX (sv) + cur, hi, 0) - SvPVX (sv); + APPEND_GROW (4); // at most 4 bytes for 21 bits + cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); } else APPEND_CH (hi); } break; + + default: + --dec->cur; + ERR ("illegal backslash escape sequence in string"); } } else if (ch >= 0x20 && ch <= 0x7f) @@ -507,27 +577,39 @@ { STRLEN clen; UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); - if (clen < 0) + if (clen == (STRLEN)-1) ERR ("malformed UTF-8 character in string, cannot convert to JSON"); - SvGROW (sv, cur + clen + 1); // at most 4 bytes for 21 bits - memcpy (SvPVX (sv) + cur, dec->cur, clen); - dec->cur += clen; + APPEND_GROW (clen); + do + { + *cur++ = *dec->cur++; + } + while (--clen); + + utf8 = 1; } + else if (dec->cur == dec->end) + ERR ("unexpected end of string while parsing json string"); else ERR ("invalid character encountered"); } ++dec->cur; - SvPOK_only (sv); + SvCUR_set (sv, cur - SvPVX (sv)); - SvCUR_set (sv, cur); + SvPOK_only (sv); *SvEND (sv) = 0; if (utf8) SvUTF8_on (sv); +#ifdef SvPV_shrink_to_cur + if (dec->flags & F_SHRINK) + SvPV_shrink_to_cur (sv); +#endif + return sv; fail: @@ -551,34 +633,50 @@ if (*dec->cur >= '0' && *dec->cur <= '9') ERR ("malformed number (leading zero must not be followed by another digit)"); } - - // int - while (*dec->cur >= '0' && *dec->cur <= '9') - ++dec->cur; + else if (*dec->cur < '0' || *dec->cur > '9') + ERR ("malformed number (no digits after initial minus)"); + else + do + { + ++dec->cur; + } + while (*dec->cur >= '0' && *dec->cur <= '9'); // [frac] if (*dec->cur == '.') { - is_nv = 1; + ++dec->cur; + + if (*dec->cur < '0' || *dec->cur > '9') + ERR ("malformed number (no digits after decimal point)"); do { ++dec->cur; } while (*dec->cur >= '0' && *dec->cur <= '9'); + + is_nv = 1; } // [exp] if (*dec->cur == 'e' || *dec->cur == 'E') { - is_nv = 1; - ++dec->cur; + if (*dec->cur == '-' || *dec->cur == '+') ++dec->cur; - while (*dec->cur >= '0' && *dec->cur <= '9') - ++dec->cur; + if (*dec->cur < '0' || *dec->cur > '9') + ERR ("malformed number (no digits after exp sign)"); + + do + { + ++dec->cur; + } + while (*dec->cur >= '0' && *dec->cur <= '9'); + + is_nv = 1; } if (!is_nv) @@ -606,29 +704,33 @@ { AV *av = newAV (); - for (;;) - { - SV *value; + WS; + if (*dec->cur == ']') + ++dec->cur; + else + for (;;) + { + SV *value; - value = decode_sv (dec); - if (!value) - goto fail; + value = decode_sv (dec); + if (!value) + goto fail; - av_push (av, value); + av_push (av, value); - WS; + WS; - if (*dec->cur == ']') - { - ++dec->cur; - break; - } - - if (*dec->cur != ',') - ERR (", or ] expected while parsing array"); + if (*dec->cur == ']') + { + ++dec->cur; + break; + } + + if (*dec->cur != ',') + ERR (", or ] expected while parsing array"); - ++dec->cur; - } + ++dec->cur; + } return newRV_noinc ((SV *)av); @@ -642,41 +744,45 @@ { HV *hv = newHV (); - for (;;) - { - SV *key, *value; + WS; + if (*dec->cur == '}') + ++dec->cur; + else + for (;;) + { + SV *key, *value; - WS; EXPECT_CH ('"'); + WS; EXPECT_CH ('"'); - key = decode_str (dec); - if (!key) - goto fail; + key = decode_str (dec); + if (!key) + goto fail; - WS; EXPECT_CH (':'); + WS; EXPECT_CH (':'); - value = decode_sv (dec); - if (!value) - { - SvREFCNT_dec (key); - goto fail; - } + value = decode_sv (dec); + if (!value) + { + SvREFCNT_dec (key); + goto fail; + } - //TODO: optimise - hv_store_ent (hv, key, value, 0); + //TODO: optimise + hv_store_ent (hv, key, value, 0); - WS; + WS; - if (*dec->cur == '}') - { - ++dec->cur; - break; - } + if (*dec->cur == '}') + { + ++dec->cur; + break; + } - if (*dec->cur != ',') - ERR (", or } expected while parsing object/hash"); + if (*dec->cur != ',') + ERR (", or } expected while parsing object/hash"); - ++dec->cur; - } + ++dec->cur; + } return newRV_noinc ((SV *)hv); @@ -726,7 +832,7 @@ if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) { dec->cur += 4; - return newSViv (1); + return newSVsv (&PL_sv_undef); } else ERR ("'null' expected"); @@ -747,7 +853,9 @@ { SV *sv; - if (!(flags & F_UTF8)) + if (flags & F_UTF8) + sv_utf8_downgrade (string, 0); + else sv_utf8_upgrade (string); SvGROW (string, SvCUR (string) + 1); // should basically be a NOP @@ -758,24 +866,32 @@ dec.end = SvEND (string); dec.err = 0; - *dec.end = 1; // invalid anywhere sv = decode_sv (&dec); - *dec.end = 0; if (!sv) { IV offset = utf8_distance (dec.cur, SvPVX (string)); SV *uni = sv_newmortal (); + // horrible hack to silence warning inside pv_uni_display + COP cop; + memset (&cop, 0, sizeof (cop)); + cop.cop_warnings = pWARN_NONE; + SAVEVPTR (PL_curcop); + PL_curcop = &cop; pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); - croak ("%s, at character %d (%s)", + croak ("%s, at character offset %d (%s)", dec.err, (int)offset, dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); } - sv_dump (sv);//D - return sv_2mortal (sv); + sv = sv_2mortal (sv); + + if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv)) + croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)"); + + return sv; } MODULE = JSON::XS PACKAGE = JSON::XS @@ -788,7 +904,7 @@ for (i = 10; i--; ) decode_hexdigit ['0' + i] = i; - for (i = 6; --i; ) + for (i = 7; i--; ) { decode_hexdigit ['a' + i] = 10 + i; decode_hexdigit ['A' + i] = 10 + i; @@ -797,13 +913,15 @@ json_stash = gv_stashpv ("JSON::XS", 1); } +PROTOTYPES: DISABLE + SV *new (char *dummy) CODE: RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash); OUTPUT: RETVAL -SV *ascii (SV *self, int enable) +SV *ascii (SV *self, int enable = 1) ALIAS: ascii = F_ASCII utf8 = F_UTF8 @@ -813,6 +931,8 @@ space_after = F_SPACE_AFTER json_rpc = F_JSON_RPC pretty = F_PRETTY + allow_nonref = F_ALLOW_NONREF + shrink = F_SHRINK CODE: { UV *uv = SvJSON (self); @@ -834,6 +954,8 @@ PPCODE: XPUSHs (decode_json (jsonstr, *SvJSON (self))); +PROTOTYPES: ENABLE + void to_json (SV *scalar) PPCODE: XPUSHs (encode_json (scalar, F_UTF8));