--- JSON-XS/XS.xs 2007/06/11 03:18:07 1.38 +++ JSON-XS/XS.xs 2007/06/25 22:11:39 1.46 @@ -11,22 +11,30 @@ # define snprintf _snprintf // C compilers have this in stdio.h #endif -#define F_ASCII 0x00000001UL -#define F_LATIN1 0x00000002UL -#define F_UTF8 0x00000004UL -#define F_INDENT 0x00000008UL -#define F_CANONICAL 0x00000010UL -#define F_SPACE_BEFORE 0x00000020UL -#define F_SPACE_AFTER 0x00000040UL -#define F_ALLOW_NONREF 0x00000100UL -#define F_SHRINK 0x00000200UL -#define F_MAXDEPTH 0xf8000000UL -#define S_MAXDEPTH 27 +// some old perls do not have this, try to make it work, no +// guarentees, though. if it breaks, you get to keep the pieces. +#ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES 13 +#endif -#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH)) +#define F_ASCII 0x00000001UL +#define F_LATIN1 0x00000002UL +#define F_UTF8 0x00000004UL +#define F_INDENT 0x00000008UL +#define F_CANONICAL 0x00000010UL +#define F_SPACE_BEFORE 0x00000020UL +#define F_SPACE_AFTER 0x00000040UL +#define F_ALLOW_NONREF 0x00000100UL +#define F_SHRINK 0x00000200UL +#define F_ALLOW_BLESSED 0x00000400UL +#define F_CONV_BLESSED 0x00000800UL // NYI +#define F_MAXDEPTH 0xf8000000UL +#define S_MAXDEPTH 27 +#define F_MAXSIZE 0x01f00000UL +#define S_MAXSIZE 20 -// F_SELFCONVERT? <=> to_json/toJson -// F_BLESSED? <=> { $__class__$ => } +#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH)) +#define DEC_SIZE(flags) (1UL << ((flags & F_MAXSIZE ) >> S_MAXSIZE )) #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER #define F_DEFAULT (9UL << S_MAXDEPTH) @@ -50,7 +58,8 @@ #define expect_false(expr) expect ((expr) != 0, 0) #define expect_true(expr) expect ((expr) != 0, 1) -static HV *json_stash; // JSON::XS:: +static HV *json_stash, *json_boolean_stash; // JSON::XS:: +static SV *json_true, *json_false; ///////////////////////////////////////////////////////////////////////////// // utility functions @@ -180,7 +189,6 @@ if (is_utf8) { - //uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); uch = decode_utf8 (str, end - str, &clen); if (clen == (STRLEN)-1) croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str); @@ -465,7 +473,61 @@ SvGETMAGIC (sv); svt = SvTYPE (sv); - if (svt == SVt_PVHV) + if (expect_false (SvOBJECT (sv))) + { + if (SvSTASH (sv) == json_boolean_stash) + { + if (SvIV (sv) == 0) + encode_str (enc, "false", 5, 0); + else + encode_str (enc, "true", 4, 0); + } + else + { +#if 0 + if (0 && sv_derived_from (rv, "JSON::Literal")) + { + // not yet + } +#endif + if (enc->flags & F_CONV_BLESSED) + { + // we re-bless the reference to get overload and other niceties right + GV *to_json = gv_fetchmethod_autoload (SvSTASH (sv), "TO_JSON", 1); + + if (to_json) + { + dSP; + ENTER; + SAVETMPS; + PUSHMARK (SP); + XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); + + // calling with G_SCALAR ensures that we always get a 1 reutrn value + // check anyways. + PUTBACK; + assert (1 == call_sv ((SV *)GvCV (to_json), G_SCALAR)); + SPAGAIN; + + encode_sv (enc, POPs); + + FREETMPS; + LEAVE; + } + else if (enc->flags & F_ALLOW_BLESSED) + encode_str (enc, "null", 4, 0); + else + croak ("encountered object '%s', but neither allow_blessed enabled nor TO_JSON method available on it", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + } + else if (enc->flags & F_ALLOW_BLESSED) + encode_str (enc, "null", 4, 0); + else + croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + } + } + else if (svt == SVt_PVHV) encode_hv (enc, (HV *)sv); else if (svt == SVt_PVAV) encode_av (enc, (AV *)sv); @@ -499,6 +561,7 @@ } else if (SvNOKp (sv)) { + // trust that perl will do the right thing w.r.t. JSON syntax. need (enc, NV_DIG + 32); Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); enc->cur += strlen (enc->cur); @@ -508,6 +571,7 @@ // we assume we can always read an IV as a UV if (SvUV (sv) & ~(UV)0x7fff) { + // large integer, use the (rather slow) snprintf way. need (enc, sizeof (UV) * 3); enc->cur += SvIsUV(sv) @@ -520,6 +584,7 @@ // code will likely be branchless and use only a single multiplication I32 i = SvIV (sv); U32 u; + char digit, nz = 0; need (enc, 6); @@ -529,13 +594,16 @@ // convert to 4.28 fixed-point representation u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits - char digit, nz = 0; - + // now output digit by digit, each time masking out the integer part + // and multiplying by 5 while moving the decimal point one to the right, + // resulting in a net multiplication by 10. + // we always write the digit to memory but conditionally increment + // the pointer, to ease the usage of conditional move instructions. digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; - digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; + digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' } } else if (SvROK (sv)) @@ -627,10 +695,10 @@ signed char d1, d2, d3, d4; unsigned char *cur = (unsigned char *)dec->cur; - d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("four hexadecimal digits expected"); - d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("four hexadecimal digits expected"); - d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("four hexadecimal digits expected"); - d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("four hexadecimal digits expected"); + d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected"); + d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected"); + d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected"); + d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected"); dec->cur += 4; @@ -863,7 +931,7 @@ if (*start == '-') switch (dec->cur - start) { - case 2: return newSViv (-( start [1] - '0' )); + case 2: return newSViv (-( start [1] - '0' * 1)); case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); @@ -871,7 +939,7 @@ else switch (dec->cur - start) { - case 1: return newSViv ( start [0] - '0' ); + case 1: return newSViv ( start [0] - '0' * 1); case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); @@ -888,9 +956,12 @@ } else return newSVuv (uv); + + // here would likely be the place for bigint support } } + // if we ever support bigint or bigfloat, this is the place for bigfloat return newSVnv (Atof (start)); fail: @@ -954,25 +1025,62 @@ else for (;;) { - SV *key, *value; - decode_ws (dec); EXPECT_CH ('"'); - key = decode_str (dec); - if (!key) - goto fail; + // heuristic: assume that + // a) decode_str + hv_store_ent are abysmally slow + // b) most hash keys are short, simple ascii text + // so try to "fast-match" such strings to avoid + // the overhead of hv_store_ent. + { + SV *value; + char *p = dec->cur; + char *e = p + 24; // only try up to 24 bytes - decode_ws (dec); EXPECT_CH (':'); + for (;;) + { + if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') + { + // slow path, back up and use decode_str + SV *key = decode_str (dec); + if (!key) + goto fail; - value = decode_sv (dec); - if (!value) - { - SvREFCNT_dec (key); - goto fail; - } + decode_ws (dec); EXPECT_CH (':'); + + value = decode_sv (dec); + if (!value) + { + SvREFCNT_dec (key); + goto fail; + } + + hv_store_ent (hv, key, value, 0); + SvREFCNT_dec (key); + + break; + } + else if (*p == '"') + { + // fast path, got a simple key + char *key = dec->cur; + int len = p - key; + dec->cur = p + 1; - hv_store_ent (hv, key, value, 0); - SvREFCNT_dec (key); + decode_ws (dec); EXPECT_CH (':'); + + value = decode_sv (dec); + if (!value) + goto fail; + + hv_store (hv, key, len, value, 0); + + break; + } + + ++p; + } + } decode_ws (dec); @@ -1001,6 +1109,9 @@ decode_sv (dec_t *dec) { decode_ws (dec); + + // the beauty of JSON: you need exactly one character lookahead + // to parse anything. switch (*dec->cur) { case '"': ++dec->cur; return decode_str (dec); @@ -1016,7 +1127,7 @@ if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) { dec->cur += 4; - return newSViv (1); + return SvREFCNT_inc (json_true); } else ERR ("'true' expected"); @@ -1027,7 +1138,7 @@ if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) { dec->cur += 5; - return newSViv (0); + return SvREFCNT_inc (json_false); } else ERR ("'false' expected"); @@ -1064,6 +1175,10 @@ SvGETMAGIC (string); SvUPGRADE (string, SVt_PV); + if (flags & F_MAXSIZE && SvCUR (string) > DEC_SIZE (flags)) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)SvCUR (string), (unsigned long)DEC_SIZE (flags)); + if (flags & F_UTF8) sv_utf8_downgrade (string, 0); else @@ -1140,8 +1255,6 @@ { int i; - memset (decode_hexdigit, 0xff, 256); - for (i = 0; i < 256; ++i) decode_hexdigit [i] = i >= '0' && i <= '9' ? i - '0' @@ -1149,7 +1262,11 @@ : i >= 'A' && i <= 'F' ? i - 'A' + 10 : -1; - json_stash = gv_stashpv ("JSON::XS", 1); + json_stash = gv_stashpv ("JSON::XS" , 1); + json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1); + + json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true ); + json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); } PROTOTYPES: DISABLE @@ -1162,16 +1279,18 @@ SV *ascii (SV *self, int enable = 1) ALIAS: - ascii = F_ASCII - latin1 = F_LATIN1 - utf8 = F_UTF8 - indent = F_INDENT - canonical = F_CANONICAL - space_before = F_SPACE_BEFORE - space_after = F_SPACE_AFTER - pretty = F_PRETTY - allow_nonref = F_ALLOW_NONREF - shrink = F_SHRINK + ascii = F_ASCII + latin1 = F_LATIN1 + utf8 = F_UTF8 + indent = F_INDENT + canonical = F_CANONICAL + space_before = F_SPACE_BEFORE + space_after = F_SPACE_AFTER + pretty = F_PRETTY + allow_nonref = F_ALLOW_NONREF + shrink = F_SHRINK + allow_blessed = F_ALLOW_BLESSED + convert_blessed = F_CONV_BLESSED CODE: { UV *uv = SvJSON (self); @@ -1200,6 +1319,25 @@ RETVAL = newSVsv (self); } + OUTPUT: + RETVAL + +SV *max_size (SV *self, UV max_size = 0) + CODE: +{ + UV *uv = SvJSON (self); + UV log2 = 0; + + if (max_size > 0x80000000UL) max_size = 0x80000000UL; + if (max_size == 1) max_size = 2; + + while ((1UL << log2) < max_size) + ++log2; + + *uv = *uv & ~F_MAXSIZE | (log2 << S_MAXSIZE); + + RETVAL = newSVsv (self); +} OUTPUT: RETVAL