… | |
… | |
123 | // this function takes advantage of this fact, although current gccs |
123 | // this function takes advantage of this fact, although current gccs |
124 | // seem to optimise the check for >= 0x80 away anyways |
124 | // seem to optimise the check for >= 0x80 away anyways |
125 | INLINE unsigned char * |
125 | INLINE unsigned char * |
126 | encode_utf8 (unsigned char *s, UV ch) |
126 | encode_utf8 (unsigned char *s, UV ch) |
127 | { |
127 | { |
128 | if (ch <= 0x7FF) |
128 | if (expect_false (ch < 0x000080)) |
129 | { |
129 | *s++ = ch; |
130 | *s++ = (ch >> 6) | 0xc0; |
130 | else if (expect_true (ch < 0x000800)) |
131 | *s++ = (ch & 0x3f) | 0x80; |
131 | *s++ = 0xc0 | ( ch >> 6), |
132 | } |
132 | *s++ = 0x80 | ( ch & 0x3f); |
133 | else |
133 | else if ( ch < 0x010000) |
134 | s = uvuni_to_utf8_flags (s, ch, 0); |
134 | *s++ = 0xe0 | ( ch >> 12), |
|
|
135 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
136 | *s++ = 0x80 | ( ch & 0x3f); |
|
|
137 | else if ( ch < 0x110000) |
|
|
138 | *s++ = 0xf0 | ( ch >> 18), |
|
|
139 | *s++ = 0x80 | ((ch >> 12) & 0x3f), |
|
|
140 | *s++ = 0x80 | ((ch >> 6) & 0x3f), |
|
|
141 | *s++ = 0x80 | ( ch & 0x3f); |
135 | |
142 | |
136 | return s; |
143 | return s; |
137 | } |
144 | } |
138 | |
145 | |
139 | ///////////////////////////////////////////////////////////////////////////// |
146 | ///////////////////////////////////////////////////////////////////////////// |
… | |
… | |
227 | clen = 1; |
234 | clen = 1; |
228 | } |
235 | } |
229 | |
236 | |
230 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
237 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
231 | { |
238 | { |
232 | if (uch > 0xFFFFUL) |
239 | if (uch >= 0x10000UL) |
233 | { |
240 | { |
234 | if (uch > 0x10FFFFUL) |
241 | if (uch >= 0x110000UL) |
235 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
242 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
236 | |
243 | |
237 | need (enc, len += 11); |
244 | need (enc, len += 11); |
238 | sprintf (enc->cur, "\\u%04x\\u%04x", |
245 | sprintf (enc->cur, "\\u%04x\\u%04x", |
239 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
246 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
… | |
… | |
638 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
645 | Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); |
639 | enc->cur += strlen (enc->cur); |
646 | enc->cur += strlen (enc->cur); |
640 | } |
647 | } |
641 | else if (SvIOKp (sv)) |
648 | else if (SvIOKp (sv)) |
642 | { |
649 | { |
643 | // we assume we can always read an IV as a UV |
650 | // we assume we can always read an IV as a UV and vice versa |
644 | if (SvUV (sv) & ~(UV)0x7fff) |
651 | // we assume two's complement |
|
|
652 | // we assume no aliasing issues in the union |
|
|
653 | if (SvIsUV (sv) ? SvUVX (sv) > 59000 |
|
|
654 | : SvIVX (sv) > 59000 || SvIVX (sv) < -59000) |
645 | { |
655 | { |
646 | // large integer, use the (rather slow) snprintf way. |
656 | // large integer, use the (rather slow) snprintf way. |
647 | need (enc, sizeof (UV) * 3); |
657 | need (enc, sizeof (UV) * 5 / 2 + 1); // CHAR_BIT is at least 8 |
648 | enc->cur += |
658 | enc->cur += |
649 | SvIsUV(sv) |
659 | SvIsUV(sv) |
650 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) |
660 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) |
651 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); |
661 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); |
652 | } |
662 | } |
653 | else |
663 | else |
654 | { |
664 | { |
655 | // optimise the "small number case" |
665 | // optimise the "small number case" |
656 | // code will likely be branchless and use only a single multiplication |
666 | // code will likely be branchless and use only a single multiplication |
|
|
667 | // works for numbers up to 59074 |
657 | I32 i = SvIV (sv); |
668 | I32 i = SvIVX (sv); |
658 | U32 u; |
669 | U32 u; |
659 | char digit, nz = 0; |
670 | char digit, nz = 0; |
660 | |
671 | |
661 | need (enc, 6); |
672 | need (enc, 6); |
662 | |
673 | |
… | |
… | |
701 | enc.end = SvEND (enc.sv); |
712 | enc.end = SvEND (enc.sv); |
702 | enc.indent = 0; |
713 | enc.indent = 0; |
703 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
714 | enc.maxdepth = DEC_DEPTH (enc.json.flags); |
704 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
715 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL |
705 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
716 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
706 | : 0x10FFFFUL; |
717 | : 0x110000UL; |
707 | |
718 | |
708 | SvPOK_only (enc.sv); |
719 | SvPOK_only (enc.sv); |
709 | encode_sv (&enc, scalar); |
720 | encode_sv (&enc, scalar); |
710 | |
721 | |
711 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
722 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
… | |
… | |
889 | default: |
900 | default: |
890 | --dec_cur; |
901 | --dec_cur; |
891 | ERR ("illegal backslash escape sequence in string"); |
902 | ERR ("illegal backslash escape sequence in string"); |
892 | } |
903 | } |
893 | } |
904 | } |
894 | else if (expect_true (ch >= 0x20 && ch <= 0x7f)) |
905 | else if (expect_true (ch >= 0x20 && ch < 0x80)) |
895 | *cur++ = ch; |
906 | *cur++ = ch; |
896 | else if (ch >= 0x80) |
907 | else if (ch >= 0x80) |
897 | { |
908 | { |
898 | STRLEN clen; |
909 | STRLEN clen; |
899 | UV uch; |
910 | UV uch; |