… | |
… | |
4 | |
4 | |
5 | #include <assert.h> |
5 | #include <assert.h> |
6 | #include <string.h> |
6 | #include <string.h> |
7 | #include <stdlib.h> |
7 | #include <stdlib.h> |
8 | #include <stdio.h> |
8 | #include <stdio.h> |
|
|
9 | #include <limits.h> |
9 | #include <float.h> |
10 | #include <float.h> |
10 | |
11 | |
11 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
12 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
12 | # define snprintf _snprintf // C compilers have this in stdio.h |
13 | # define snprintf _snprintf // C compilers have this in stdio.h |
13 | #endif |
14 | #endif |
… | |
… | |
15 | // some old perls do not have this, try to make it work, no |
16 | // some old perls do not have this, try to make it work, no |
16 | // guarentees, though. if it breaks, you get to keep the pieces. |
17 | // guarentees, though. if it breaks, you get to keep the pieces. |
17 | #ifndef UTF8_MAXBYTES |
18 | #ifndef UTF8_MAXBYTES |
18 | # define UTF8_MAXBYTES 13 |
19 | # define UTF8_MAXBYTES 13 |
19 | #endif |
20 | #endif |
|
|
21 | |
|
|
22 | #define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) |
20 | |
23 | |
21 | #define F_ASCII 0x00000001UL |
24 | #define F_ASCII 0x00000001UL |
22 | #define F_LATIN1 0x00000002UL |
25 | #define F_LATIN1 0x00000002UL |
23 | #define F_UTF8 0x00000004UL |
26 | #define F_UTF8 0x00000004UL |
24 | #define F_INDENT 0x00000008UL |
27 | #define F_INDENT 0x00000008UL |
… | |
… | |
123 | // this function takes advantage of this fact, although current gccs |
126 | // this function takes advantage of this fact, although current gccs |
124 | // seem to optimise the check for >= 0x80 away anyways |
127 | // seem to optimise the check for >= 0x80 away anyways |
125 | INLINE unsigned char * |
128 | INLINE unsigned char * |
126 | encode_utf8 (unsigned char *s, UV ch) |
129 | encode_utf8 (unsigned char *s, UV ch) |
127 | { |
130 | { |
128 | if (ch <= 0x7FF) |
131 | if (expect_false (ch < 0x000080)) |
129 | { |
132 | *s++ = ch; |
130 | *s++ = (ch >> 6) | 0xc0; |
133 | else if (expect_true (ch < 0x000800)) |
131 | *s++ = (ch & 0x3f) | 0x80; |
134 | *s++ = 0xc0 | ( ch >> 6), |
132 | } |
135 | *s++ = 0x80 | ( ch & 0x3f); |
133 | else |
136 | else if ( ch < 0x010000) |
134 | s = uvuni_to_utf8_flags (s, ch, 0); |
137 | *s++ = 0xe0 | ( ch >> 12), |
|
|
138 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
139 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
140 | else if ( ch < 0x110000) |
|
|
141 | *s++ = 0xf0 | ( ch >> 18), |
|
|
142 | *s++ = 0x80 | ((ch >> 12) & 0x3f), |
|
|
143 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
144 | *s++ = 0x80 | ( ch & 0x3f); |
135 | |
145 | |
136 | return s; |
146 | return s; |
137 | } |
147 | } |
138 | |
148 | |
139 | ///////////////////////////////////////////////////////////////////////////// |
149 | ///////////////////////////////////////////////////////////////////////////// |
… | |
… | |
227 | clen = 1; |
237 | clen = 1; |
228 | } |
238 | } |
229 | |
239 | |
230 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
240 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
231 | { |
241 | { |
232 | if (uch > 0xFFFFUL) |
242 | if (uch >= 0x10000UL) |
233 | { |
243 | { |
234 | if (uch > 0x10FFFFUL) |
244 | if (uch >= 0x110000UL) |
235 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
245 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
236 | |
246 | |
237 | need (enc, len += 11); |
247 | need (enc, len += 11); |
238 | sprintf (enc->cur, "\\u%04x\\u%04x", |
248 | sprintf (enc->cur, "\\u%04x\\u%04x", |
239 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
249 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
… | |
… | |
638 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
648 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
639 | enc->cur += strlen (enc->cur); |
649 | enc->cur += strlen (enc->cur); |
640 | } |
650 | } |
641 | else if (SvIOKp (sv)) |
651 | else if (SvIOKp (sv)) |
642 | { |
652 | { |
643 | // we assume we can always read an IV as a UV |
653 | // we assume we can always read an IV as a UV and vice versa |
644 | if (SvUV (sv) & ~(UV)0x7fff) |
654 | // we assume two's complement |
645 | { |
655 | // we assume no aliasing issues in the union |
646 | // large integer, use the (rather slow) snprintf way. |
656 | if (SvIsUV (sv) ? SvUVX (sv) <= 59000 |
647 | need (enc, sizeof (UV) * 3); |
657 | : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) |
648 | enc->cur += |
|
|
649 | SvIsUV(sv) |
|
|
650 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) |
|
|
651 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); |
|
|
652 | } |
|
|
653 | else |
|
|
654 | { |
658 | { |
655 | // optimise the "small number case" |
659 | // optimise the "small number case" |
656 | // code will likely be branchless and use only a single multiplication |
660 | // code will likely be branchless and use only a single multiplication |
|
|
661 | // works for numbers up to 59074 |
657 | I32 i = SvIV (sv); |
662 | I32 i = SvIVX (sv); |
658 | U32 u; |
663 | U32 u; |
659 | char digit, nz = 0; |
664 | char digit, nz = 0; |
660 | |
665 | |
661 | need (enc, 6); |
666 | need (enc, 6); |
662 | |
667 | |
… | |
… | |
668 | |
673 | |
669 | // now output digit by digit, each time masking out the integer part |
674 | // now output digit by digit, each time masking out the integer part |
670 | // and multiplying by 5 while moving the decimal point one to the right, |
675 | // and multiplying by 5 while moving the decimal point one to the right, |
671 | // resulting in a net multiplication by 10. |
676 | // resulting in a net multiplication by 10. |
672 | // we always write the digit to memory but conditionally increment |
677 | // we always write the digit to memory but conditionally increment |
673 | // the pointer, to ease the usage of conditional move instructions. |
678 | // the pointer, to enable the use of conditional move instructions. |
674 | digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; |
679 | digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5; |
675 | digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; |
680 | digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5; |
676 | digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; |
681 | digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5; |
677 | digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; |
682 | digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5; |
678 | digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' |
683 | digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' |
|
|
684 | } |
|
|
685 | else |
|
|
686 | { |
|
|
687 | // large integer, use the (rather slow) snprintf way. |
|
|
688 | need (enc, IVUV_MAXCHARS); |
|
|
689 | enc->cur += |
|
|
690 | SvIsUV(sv) |
|
|
691 | ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) |
|
|
692 | : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); |
679 | } |
693 | } |
680 | } |
694 | } |
681 | else if (SvROK (sv)) |
695 | else if (SvROK (sv)) |
682 | encode_rv (enc, SvRV (sv)); |
696 | encode_rv (enc, SvRV (sv)); |
683 | else if (!SvOK (sv)) |
697 | else if (!SvOK (sv)) |
… | |
… | |
701 | enc.end = SvEND (enc.sv); |
715 | enc.end = SvEND (enc.sv); |
702 | enc.indent = 0; |
716 | enc.indent = 0; |
703 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
717 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
704 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
718 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
705 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
719 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
706 | : 0x10FFFFUL; |
720 | : 0x110000UL; |
707 | |
721 | |
708 | SvPOK_only (enc.sv); |
722 | SvPOK_only (enc.sv); |
709 | encode_sv (&enc, scalar); |
723 | encode_sv (&enc, scalar); |
710 | |
724 | |
711 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
725 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
… | |
… | |
889 | default: |
903 | default: |
890 | --dec_cur; |
904 | --dec_cur; |
891 | ERR ("illegal backslash escape sequence in string"); |
905 | ERR ("illegal backslash escape sequence in string"); |
892 | } |
906 | } |
893 | } |
907 | } |
894 | else if (expect_true (ch >= 0x20 && ch <= 0x7f)) |
908 | else if (expect_true (ch >= 0x20 && ch < 0x80)) |
895 | *cur++ = ch; |
909 | *cur++ = ch; |
896 | else if (ch >= 0x80) |
910 | else if (ch >= 0x80) |
897 | { |
911 | { |
898 | STRLEN clen; |
912 | STRLEN clen; |
899 | UV uch; |
913 | UV uch; |
… | |
… | |
1298 | |
1312 | |
1299 | static SV * |
1313 | static SV * |
1300 | decode_sv (dec_t *dec) |
1314 | decode_sv (dec_t *dec) |
1301 | { |
1315 | { |
1302 | // the beauty of JSON: you need exactly one character lookahead |
1316 | // the beauty of JSON: you need exactly one character lookahead |
1303 | // to parse anything. |
1317 | // to parse everything. |
1304 | switch (*dec->cur) |
1318 | switch (*dec->cur) |
1305 | { |
1319 | { |
1306 | case '"': ++dec->cur; return decode_str (dec); |
1320 | case '"': ++dec->cur; return decode_str (dec); |
1307 | case '[': ++dec->cur; return decode_av (dec); |
1321 | case '[': ++dec->cur; return decode_av (dec); |
1308 | case '{': ++dec->cur; return decode_hv (dec); |
1322 | case '{': ++dec->cur; return decode_hv (dec); |
1309 | |
1323 | |
1310 | case '-': |
1324 | case '-': |
1311 | case '0': case '1': case '2': case '3': case '4': |
1325 | case '0': case '1': case '2': case '3': case '4': |
1312 | case '5': case '6': case '7': case '8': case '9': |
1326 | case '5': case '6': case '7': case '8': case '9': |
1313 | return decode_num (dec); |
1327 | return decode_num (dec); |