… | |
… | |
60 | #endif |
60 | #endif |
61 | |
61 | |
62 | #define expect_false(expr) expect ((expr) != 0, 0) |
62 | #define expect_false(expr) expect ((expr) != 0, 0) |
63 | #define expect_true(expr) expect ((expr) != 0, 1) |
63 | #define expect_true(expr) expect ((expr) != 0, 1) |
64 | |
64 | |
|
|
65 | #define IN_RANGE_INC(type,val,beg,end) \ |
|
|
66 | ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ |
|
|
67 | <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) |
|
|
68 | |
65 | #ifdef USE_ITHREADS |
69 | #ifdef USE_ITHREADS |
66 | # define JSON_SLOW 1 |
70 | # define JSON_SLOW 1 |
67 | # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) |
71 | # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) |
68 | #else |
72 | #else |
69 | # define JSON_SLOW 0 |
73 | # define JSON_SLOW 0 |
… | |
… | |
102 | // but use the very good perl function to parse anything else. |
106 | // but use the very good perl function to parse anything else. |
103 | // note that we never call this function for a ascii codepoints |
107 | // note that we never call this function for a ascii codepoints |
104 | INLINE UV |
108 | INLINE UV |
105 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
109 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
106 | { |
110 | { |
107 | if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) |
111 | if (expect_true (len >= 2 |
108 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); |
112 | && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) |
109 | else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) |
113 | && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) |
110 | { |
114 | { |
111 | *clen = 2; |
115 | *clen = 2; |
112 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
116 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
113 | } |
117 | } |
114 | else |
118 | else |
115 | { |
119 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); |
116 | *clen = (STRLEN)-1; |
120 | } |
117 | return (UV)-1; |
121 | |
118 | } |
122 | // likewise for encoding, also never called for ascii codepoints |
|
|
123 | // this function takes advantage of this fact, although current gccs |
|
|
124 | // seem to optimise the check for >= 0x80 away anyways |
|
|
125 | INLINE unsigned char * |
|
|
126 | encode_utf8 (unsigned char *s, UV ch) |
|
|
127 | { |
|
|
128 | if (expect_false (ch < 0x000080)) |
|
|
129 | *s++ = ch; |
|
|
130 | else if (expect_true (ch < 0x000800)) |
|
|
131 | *s++ = 0xc0 | ( ch >> 6), |
|
|
132 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
133 | else if ( ch < 0x010000) |
|
|
134 | *s++ = 0xe0 | ( ch >> 12), |
|
|
135 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
136 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
137 | else if ( ch < 0x110000) |
|
|
138 | *s++ = 0xf0 | ( ch >> 18), |
|
|
139 | *s++ = 0x80 | ((ch >> 12) & 0x3f), |
|
|
140 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
141 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
142 | |
|
|
143 | return s; |
119 | } |
144 | } |
120 | |
145 | |
121 | ///////////////////////////////////////////////////////////////////////////// |
146 | ///////////////////////////////////////////////////////////////////////////// |
122 | // encoder |
147 | // encoder |
123 | |
148 | |
… | |
… | |
207 | { |
232 | { |
208 | uch = ch; |
233 | uch = ch; |
209 | clen = 1; |
234 | clen = 1; |
210 | } |
235 | } |
211 | |
236 | |
212 | if (uch < 0x20 || uch >= enc->limit) |
237 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
213 | { |
238 | { |
214 | if (uch > 0xFFFFUL) |
239 | if (uch >= 0x10000UL) |
215 | { |
240 | { |
216 | if (uch > 0x10FFFFUL) |
241 | if (uch >= 0x110000UL) |
217 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
242 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
218 | |
243 | |
219 | need (enc, len += 11); |
244 | need (enc, len += 11); |
220 | sprintf (enc->cur, "\\u%04x\\u%04x", |
245 | sprintf (enc->cur, "\\u%04x\\u%04x", |
221 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
246 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
… | |
… | |
251 | while (--clen); |
276 | while (--clen); |
252 | } |
277 | } |
253 | else |
278 | else |
254 | { |
279 | { |
255 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
280 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
256 | enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); |
281 | enc->cur = encode_utf8 (enc->cur, uch); |
257 | ++str; |
282 | ++str; |
258 | } |
283 | } |
259 | } |
284 | } |
260 | } |
285 | } |
261 | } |
286 | } |
… | |
… | |
620 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
645 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
621 | enc->cur += strlen (enc->cur); |
646 | enc->cur += strlen (enc->cur); |
622 | } |
647 | } |
623 | else if (SvIOKp (sv)) |
648 | else if (SvIOKp (sv)) |
624 | { |
649 | { |
625 | // we assume we can always read an IV as a UV |
650 | // we assume we can always read an IV as a UV and vice versa |
626 | if (SvUV (sv) & ~(UV)0x7fff) |
651 | // we assume two's complement |
|
|
652 | // we assume no aliasing issues in the union |
|
|
653 | if (SvIsUV (sv) ? SvUVX (sv) > 59000 |
|
|
654 | : SvIVX (sv) > 59000 || SvIVX (sv) < -59000) |
627 | { |
655 | { |
628 | // large integer, use the (rather slow) snprintf way. |
656 | // large integer, use the (rather slow) snprintf way. |
629 | need (enc, sizeof (UV) * 3); |
657 | need (enc, sizeof (UV) * 5 / 2 + 1); // CHAR_BIT is at least 8 |
630 | enc->cur += |
658 | enc->cur += |
631 | SvIsUV(sv) |
659 | SvIsUV(sv) |
632 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) |
660 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) |
633 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); |
661 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); |
634 | } |
662 | } |
635 | else |
663 | else |
636 | { |
664 | { |
637 | // optimise the "small number case" |
665 | // optimise the "small number case" |
638 | // code will likely be branchless and use only a single multiplication |
666 | // code will likely be branchless and use only a single multiplication |
|
|
667 | // works for numbers up to 59074 |
639 | I32 i = SvIV (sv); |
668 | I32 i = SvIVX (sv); |
640 | U32 u; |
669 | U32 u; |
641 | char digit, nz = 0; |
670 | char digit, nz = 0; |
642 | |
671 | |
643 | need (enc, 6); |
672 | need (enc, 6); |
644 | |
673 | |
… | |
… | |
683 | enc.end = SvEND (enc.sv); |
712 | enc.end = SvEND (enc.sv); |
684 | enc.indent = 0; |
713 | enc.indent = 0; |
685 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
714 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
686 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
715 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
687 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
716 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
688 | : 0x10FFFFUL; |
717 | : 0x110000UL; |
689 | |
718 | |
690 | SvPOK_only (enc.sv); |
719 | SvPOK_only (enc.sv); |
691 | encode_sv (&enc, scalar); |
720 | encode_sv (&enc, scalar); |
692 | |
721 | |
693 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
722 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
… | |
… | |
859 | |
888 | |
860 | if (hi >= 0x80) |
889 | if (hi >= 0x80) |
861 | { |
890 | { |
862 | utf8 = 1; |
891 | utf8 = 1; |
863 | |
892 | |
864 | cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); |
893 | cur = encode_utf8 (cur, hi); |
865 | } |
894 | } |
866 | else |
895 | else |
867 | *cur++ = hi; |
896 | *cur++ = hi; |
868 | } |
897 | } |
869 | break; |
898 | break; |
… | |
… | |
871 | default: |
900 | default: |
872 | --dec_cur; |
901 | --dec_cur; |
873 | ERR ("illegal backslash escape sequence in string"); |
902 | ERR ("illegal backslash escape sequence in string"); |
874 | } |
903 | } |
875 | } |
904 | } |
876 | else if (expect_true (ch >= 0x20 && ch <= 0x7f)) |
905 | else if (expect_true (ch >= 0x20 && ch < 0x80)) |
877 | *cur++ = ch; |
906 | *cur++ = ch; |
878 | else if (ch >= 0x80) |
907 | else if (ch >= 0x80) |
879 | { |
908 | { |
880 | STRLEN clen; |
909 | STRLEN clen; |
881 | UV uch; |
910 | UV uch; |
… | |
… | |
1280 | |
1309 | |
1281 | static SV * |
1310 | static SV * |
1282 | decode_sv (dec_t *dec) |
1311 | decode_sv (dec_t *dec) |
1283 | { |
1312 | { |
1284 | // the beauty of JSON: you need exactly one character lookahead |
1313 | // the beauty of JSON: you need exactly one character lookahead |
1285 | // to parse anything. |
1314 | // to parse everything. |
1286 | switch (*dec->cur) |
1315 | switch (*dec->cur) |
1287 | { |
1316 | { |
1288 | case '"': ++dec->cur; return decode_str (dec); |
1317 | case '"': ++dec->cur; return decode_str (dec); |
1289 | case '[': ++dec->cur; return decode_av (dec); |
1318 | case '[': ++dec->cur; return decode_av (dec); |
1290 | case '{': ++dec->cur; return decode_hv (dec); |
1319 | case '{': ++dec->cur; return decode_hv (dec); |
1291 | |
1320 | |
1292 | case '-': |
1321 | case '-': |
1293 | case '0': case '1': case '2': case '3': case '4': |
1322 | case '0': case '1': case '2': case '3': case '4': |
1294 | case '5': case '6': case '7': case '8': case '9': |
1323 | case '5': case '6': case '7': case '8': case '9': |
1295 | return decode_num (dec); |
1324 | return decode_num (dec); |