… | |
… | |
60 | #endif |
60 | #endif |
61 | |
61 | |
62 | #define expect_false(expr) expect ((expr) != 0, 0) |
62 | #define expect_false(expr) expect ((expr) != 0, 0) |
63 | #define expect_true(expr) expect ((expr) != 0, 1) |
63 | #define expect_true(expr) expect ((expr) != 0, 1) |
64 | |
64 | |
|
|
65 | #define IN_RANGE_INC(type,val,beg,end) \ |
|
|
66 | ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ |
|
|
67 | <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) |
|
|
68 | |
65 | #ifdef USE_ITHREADS |
69 | #ifdef USE_ITHREADS |
66 | # define JSON_SLOW 1 |
70 | # define JSON_SLOW 1 |
67 | # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) |
71 | # define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) |
68 | #else |
72 | #else |
69 | # define JSON_SLOW 0 |
73 | # define JSON_SLOW 0 |
… | |
… | |
102 | // but use the very good perl function to parse anything else. |
106 | // but use the very good perl function to parse anything else. |
103 | // note that we never call this function for a ascii codepoints |
107 | // note that we never call this function for a ascii codepoints |
104 | INLINE UV |
108 | INLINE UV |
105 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
109 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
106 | { |
110 | { |
107 | if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) |
111 | if (expect_true (len >= 2 |
108 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); |
112 | && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) |
109 | else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) |
113 | && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) |
110 | { |
114 | { |
111 | *clen = 2; |
115 | *clen = 2; |
112 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
116 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
113 | } |
117 | } |
114 | else |
118 | else |
115 | { |
119 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); |
116 | *clen = (STRLEN)-1; |
120 | } |
117 | return (UV)-1; |
121 | |
|
|
122 | // likewise for encoding, also never called for ascii codepoints |
|
|
123 | // this function takes advantage of this fact, although current gccs |
|
|
124 | // seem to optimise the check for >= 0x80 away anyways |
|
|
125 | INLINE unsigned char * |
|
|
126 | encode_utf8 (unsigned char *s, UV ch) |
|
|
127 | { |
|
|
128 | if (ch <= 0x7FF) |
118 | } |
129 | { |
|
|
130 | *s++ = (ch >> 6) | 0xc0; |
|
|
131 | *s++ = (ch & 0x3f) | 0x80; |
|
|
132 | } |
|
|
133 | else |
|
|
134 | s = uvuni_to_utf8_flags (s, ch, 0); |
|
|
135 | |
|
|
136 | return s; |
119 | } |
137 | } |
120 | |
138 | |
121 | ///////////////////////////////////////////////////////////////////////////// |
139 | ///////////////////////////////////////////////////////////////////////////// |
122 | // encoder |
140 | // encoder |
123 | |
141 | |
… | |
… | |
207 | { |
225 | { |
208 | uch = ch; |
226 | uch = ch; |
209 | clen = 1; |
227 | clen = 1; |
210 | } |
228 | } |
211 | |
229 | |
212 | if (uch < 0x20 || uch >= enc->limit) |
230 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) |
213 | { |
231 | { |
214 | if (uch > 0xFFFFUL) |
232 | if (uch > 0xFFFFUL) |
215 | { |
233 | { |
216 | if (uch > 0x10FFFFUL) |
234 | if (uch > 0x10FFFFUL) |
217 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
235 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); |
… | |
… | |
251 | while (--clen); |
269 | while (--clen); |
252 | } |
270 | } |
253 | else |
271 | else |
254 | { |
272 | { |
255 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
273 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
256 | enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); |
274 | enc->cur = encode_utf8 (enc->cur, uch); |
257 | ++str; |
275 | ++str; |
258 | } |
276 | } |
259 | } |
277 | } |
260 | } |
278 | } |
261 | } |
279 | } |
… | |
… | |
859 | |
877 | |
860 | if (hi >= 0x80) |
878 | if (hi >= 0x80) |
861 | { |
879 | { |
862 | utf8 = 1; |
880 | utf8 = 1; |
863 | |
881 | |
864 | cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); |
882 | cur = encode_utf8 (cur, hi); |
865 | } |
883 | } |
866 | else |
884 | else |
867 | *cur++ = hi; |
885 | *cur++ = hi; |
868 | } |
886 | } |
869 | break; |
887 | break; |