… | |
… | |
4 | |
4 | |
5 | #include <assert.h> |
5 | #include <assert.h> |
6 | #include <string.h> |
6 | #include <string.h> |
7 | #include <stdlib.h> |
7 | #include <stdlib.h> |
8 | #include <stdio.h> |
8 | #include <stdio.h> |
|
|
9 | #include <limits.h> |
9 | #include <float.h> |
10 | #include <float.h> |
10 | |
11 | |
11 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
12 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
12 | # define snprintf _snprintf // C compilers have this in stdio.h |
13 | # define snprintf _snprintf // C compilers have this in stdio.h |
13 | #endif |
14 | #endif |
… | |
… | |
15 | // some old perls do not have this, try to make it work, no |
16 | // some old perls do not have this, try to make it work, no |
16 | // guarentees, though. if it breaks, you get to keep the pieces. |
17 | // guarentees, though. if it breaks, you get to keep the pieces. |
17 | #ifndef UTF8_MAXBYTES |
18 | #ifndef UTF8_MAXBYTES |
18 | # define UTF8_MAXBYTES 13 |
19 | # define UTF8_MAXBYTES 13 |
19 | #endif |
20 | #endif |
|
|
21 | |
|
|
22 | #define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) |
20 | |
23 | |
21 | #define F_ASCII 0x00000001UL |
24 | #define F_ASCII 0x00000001UL |
22 | #define F_LATIN1 0x00000002UL |
25 | #define F_LATIN1 0x00000002UL |
23 | #define F_UTF8 0x00000004UL |
26 | #define F_UTF8 0x00000004UL |
24 | #define F_INDENT 0x00000008UL |
27 | #define F_INDENT 0x00000008UL |
… | |
… | |
60 | #endif |
63 | #endif |
61 | |
64 | |
62 | #define expect_false(expr) expect ((expr) != 0, 0) |
65 | #define expect_false(expr) expect ((expr) != 0, 0) |
63 | #define expect_true(expr) expect ((expr) != 0, 1) |
66 | #define expect_true(expr) expect ((expr) != 0, 1) |
64 | |
67 | |
|
|
68 | #define IN_RANGE_INC(type,val,beg,end) \ |
|
|
69 | ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ |
|
|
70 | <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) |
|
|
71 | |
65 | #ifdef USE_ITHREADS |
72 | #ifdef USE_ITHREADS |
66 | # define JSON_SLOW 1 |
73 | # define JSON_SLOW 1 |
67 | # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) |
74 | # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) |
68 | #else |
75 | #else |
69 | # define JSON_SLOW 0 |
76 | # define JSON_SLOW 0 |
… | |
… | |
102 | // but use the very good perl function to parse anything else. |
109 | // but use the very good perl function to parse anything else. |
103 | // note that we never call this function for a ascii codepoints |
110 | // note that we never call this function for a ascii codepoints |
104 | INLINE UV |
111 | INLINE UV |
105 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
112 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
106 | { |
113 | { |
107 | if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) |
114 | if (expect_true (len >= 2 |
108 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); |
115 | && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) |
109 | else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) |
116 | && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) |
110 | { |
117 | { |
111 | *clen = 2; |
118 | *clen = 2; |
112 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
119 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
113 | } |
120 | } |
114 | else |
121 | else |
115 | { |
122 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); |
116 | *clen = (STRLEN)-1; |
123 | } |
117 | return (UV)-1; |
124 | |
118 | } |
125 | // likewise for encoding, also never called for ascii codepoints |
|
|
126 | // this function takes advantage of this fact, although current gccs |
|
|
127 | // seem to optimise the check for >= 0x80 away anyways |
|
|
128 | INLINE unsigned char * |
|
|
129 | encode_utf8 (unsigned char *s, UV ch) |
|
|
130 | { |
|
|
131 | if (expect_false (ch < 0x000080)) |
|
|
132 | *s++ = ch; |
|
|
133 | else if (expect_true (ch < 0x000800)) |
|
|
134 | *s++ = 0xc0 | ( ch >> 6), |
|
|
135 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
136 | else if ( ch < 0x010000) |
|
|
137 | *s++ = 0xe0 | ( ch >> 12), |
|
|
138 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
139 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
140 | else if ( ch < 0x110000) |
|
|
141 | *s++ = 0xf0 | ( ch >> 18), |
|
|
142 | *s++ = 0x80 | ((ch >> 12) & 0x3f), |
|
|
143 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
144 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
145 | |
|
|
146 | return s; |
119 | } |
147 | } |
120 | |
148 | |
121 | ///////////////////////////////////////////////////////////////////////////// |
149 | ///////////////////////////////////////////////////////////////////////////// |
122 | // encoder |
150 | // encoder |
123 | |
151 | |
… | |
… | |
207 | { |
235 | { |
208 | uch = ch; |
236 | uch = ch; |
209 | clen = 1; |
237 | clen = 1; |
210 | } |
238 | } |
211 | |
239 | |
212 | if (uch < 0x20 || uch >= enc->limit) |
240 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
213 | { |
241 | { |
214 | if (uch > 0xFFFFUL) |
242 | if (uch >= 0x10000UL) |
215 | { |
243 | { |
216 | if (uch > 0x10FFFFUL) |
244 | if (uch >= 0x110000UL) |
217 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
245 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
218 | |
246 | |
219 | need (enc, len += 11); |
247 | need (enc, len += 11); |
220 | sprintf (enc->cur, "\\u%04x\\u%04x", |
248 | sprintf (enc->cur, "\\u%04x\\u%04x", |
221 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
249 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
… | |
… | |
251 | while (--clen); |
279 | while (--clen); |
252 | } |
280 | } |
253 | else |
281 | else |
254 | { |
282 | { |
255 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
283 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
256 | enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); |
284 | enc->cur = encode_utf8 (enc->cur, uch); |
257 | ++str; |
285 | ++str; |
258 | } |
286 | } |
259 | } |
287 | } |
260 | } |
288 | } |
261 | } |
289 | } |
… | |
… | |
620 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
648 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
621 | enc->cur += strlen (enc->cur); |
649 | enc->cur += strlen (enc->cur); |
622 | } |
650 | } |
623 | else if (SvIOKp (sv)) |
651 | else if (SvIOKp (sv)) |
624 | { |
652 | { |
625 | // we assume we can always read an IV as a UV |
653 | // we assume we can always read an IV as a UV and vice versa |
626 | if (SvUV (sv) & ~(UV)0x7fff) |
654 | // we assume two's complement |
627 | { |
655 | // we assume no aliasing issues in the union |
628 | // large integer, use the (rather slow) snprintf way. |
656 | if (SvIsUV (sv) ? SvUVX (sv) <= 59000 |
629 | need (enc, sizeof (UV) * 3); |
657 | : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) |
630 | enc->cur += |
|
|
631 | SvIsUV(sv) |
|
|
632 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) |
|
|
633 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); |
|
|
634 | } |
|
|
635 | else |
|
|
636 | { |
658 | { |
637 | // optimise the "small number case" |
659 | // optimise the "small number case" |
638 | // code will likely be branchless and use only a single multiplication |
660 | // code will likely be branchless and use only a single multiplication |
|
|
661 | // works for numbers up to 59074 |
639 | I32 i = SvIV (sv); |
662 | I32 i = SvIVX (sv); |
640 | U32 u; |
663 | U32 u; |
641 | char digit, nz = 0; |
664 | char digit, nz = 0; |
642 | |
665 | |
643 | need (enc, 6); |
666 | need (enc, 6); |
644 | |
667 | |
… | |
… | |
650 | |
673 | |
651 | // now output digit by digit, each time masking out the integer part |
674 | // now output digit by digit, each time masking out the integer part |
652 | // and multiplying by 5 while moving the decimal point one to the right, |
675 | // and multiplying by 5 while moving the decimal point one to the right, |
653 | // resulting in a net multiplication by 10. |
676 | // resulting in a net multiplication by 10. |
654 | // we always write the digit to memory but conditionally increment |
677 | // we always write the digit to memory but conditionally increment |
655 | // the pointer, to ease the usage of conditional move instructions. |
678 | // the pointer, to enable the use of conditional move instructions. |
656 | digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; |
679 | digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5; |
657 | digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; |
680 | digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5; |
658 | digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; |
681 | digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5; |
659 | digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; |
682 | digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5; |
660 | digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' |
683 | digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' |
|
|
684 | } |
|
|
685 | else |
|
|
686 | { |
|
|
687 | // large integer, use the (rather slow) snprintf way. |
|
|
688 | need (enc, IVUV_MAXCHARS); |
|
|
689 | enc->cur += |
|
|
690 | SvIsUV(sv) |
|
|
691 | ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) |
|
|
692 | : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); |
661 | } |
693 | } |
662 | } |
694 | } |
663 | else if (SvROK (sv)) |
695 | else if (SvROK (sv)) |
664 | encode_rv (enc, SvRV (sv)); |
696 | encode_rv (enc, SvRV (sv)); |
665 | else if (!SvOK (sv)) |
697 | else if (!SvOK (sv)) |
… | |
… | |
683 | enc.end = SvEND (enc.sv); |
715 | enc.end = SvEND (enc.sv); |
684 | enc.indent = 0; |
716 | enc.indent = 0; |
685 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
717 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
686 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
718 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
687 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
719 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
688 | : 0x10FFFFUL; |
720 | : 0x110000UL; |
689 | |
721 | |
690 | SvPOK_only (enc.sv); |
722 | SvPOK_only (enc.sv); |
691 | encode_sv (&enc, scalar); |
723 | encode_sv (&enc, scalar); |
692 | |
724 | |
693 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
725 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
… | |
… | |
859 | |
891 | |
860 | if (hi >= 0x80) |
892 | if (hi >= 0x80) |
861 | { |
893 | { |
862 | utf8 = 1; |
894 | utf8 = 1; |
863 | |
895 | |
864 | cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); |
896 | cur = encode_utf8 (cur, hi); |
865 | } |
897 | } |
866 | else |
898 | else |
867 | *cur++ = hi; |
899 | *cur++ = hi; |
868 | } |
900 | } |
869 | break; |
901 | break; |
… | |
… | |
871 | default: |
903 | default: |
872 | --dec_cur; |
904 | --dec_cur; |
873 | ERR ("illegal backslash escape sequence in string"); |
905 | ERR ("illegal backslash escape sequence in string"); |
874 | } |
906 | } |
875 | } |
907 | } |
876 | else if (expect_true (ch >= 0x20 && ch <= 0x7f)) |
908 | else if (expect_true (ch >= 0x20 && ch < 0x80)) |
877 | *cur++ = ch; |
909 | *cur++ = ch; |
878 | else if (ch >= 0x80) |
910 | else if (ch >= 0x80) |
879 | { |
911 | { |
880 | STRLEN clen; |
912 | STRLEN clen; |
881 | UV uch; |
913 | UV uch; |
… | |
… | |
1280 | |
1312 | |
1281 | static SV * |
1313 | static SV * |
1282 | decode_sv (dec_t *dec) |
1314 | decode_sv (dec_t *dec) |
1283 | { |
1315 | { |
1284 | // the beauty of JSON: you need exactly one character lookahead |
1316 | // the beauty of JSON: you need exactly one character lookahead |
1285 | // to parse anything. |
1317 | // to parse everything. |
1286 | switch (*dec->cur) |
1318 | switch (*dec->cur) |
1287 | { |
1319 | { |
1288 | case '"': ++dec->cur; return decode_str (dec); |
1320 | case '"': ++dec->cur; return decode_str (dec); |
1289 | case '[': ++dec->cur; return decode_av (dec); |
1321 | case '[': ++dec->cur; return decode_av (dec); |
1290 | case '{': ++dec->cur; return decode_hv (dec); |
1322 | case '{': ++dec->cur; return decode_hv (dec); |
1291 | |
1323 | |
1292 | case '-': |
1324 | case '-': |
1293 | case '0': case '1': case '2': case '3': case '4': |
1325 | case '0': case '1': case '2': case '3': case '4': |
1294 | case '5': case '6': case '7': case '8': case '9': |
1326 | case '5': case '6': case '7': case '8': case '9': |
1295 | return decode_num (dec); |
1327 | return decode_num (dec); |
… | |
… | |
1610 | SvREFCNT_dec (self->cb_object); |
1642 | SvREFCNT_dec (self->cb_object); |
1611 | |
1643 | |
1612 | PROTOTYPES: ENABLE |
1644 | PROTOTYPES: ENABLE |
1613 | |
1645 | |
1614 | void encode_json (SV *scalar) |
1646 | void encode_json (SV *scalar) |
|
|
1647 | ALIAS: |
|
|
1648 | to_json_ = 0 |
|
|
1649 | encode_json = F_UTF8 |
1615 | PPCODE: |
1650 | PPCODE: |
1616 | { |
1651 | { |
1617 | JSON json = { F_DEFAULT | F_UTF8 }; |
1652 | JSON json = { F_DEFAULT | ix }; |
1618 | XPUSHs (encode_json (scalar, &json)); |
1653 | XPUSHs (encode_json (scalar, &json)); |
1619 | } |
1654 | } |
1620 | |
1655 | |
1621 | void decode_json (SV *jsonstr) |
1656 | void decode_json (SV *jsonstr) |
|
|
1657 | ALIAS: |
|
|
1658 | from_json_ = 0 |
|
|
1659 | decode_json = F_UTF8 |
1622 | PPCODE: |
1660 | PPCODE: |
1623 | { |
1661 | { |
1624 | JSON json = { F_DEFAULT | F_UTF8 }; |
1662 | JSON json = { F_DEFAULT | ix }; |
1625 | XPUSHs (decode_json (jsonstr, &json, 0)); |
1663 | XPUSHs (decode_json (jsonstr, &json, 0)); |
1626 | } |
1664 | } |
1627 | |
1665 | |