ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.66 by root, Sun Nov 25 19:02:42 2007 UTC vs.
Revision 1.72 by root, Wed Mar 19 04:08:22 2008 UTC

50 50
51#define SB do { 51#define SB do {
52#define SE } while (0) 52#define SE } while (0)
53 53
54#if __GNUC__ >= 3 54#if __GNUC__ >= 3
55# define expect(expr,value) __builtin_expect ((expr),(value)) 55# define expect(expr,value) __builtin_expect ((expr), (value))
56# define inline inline 56# define INLINE static inline
57#else 57#else
58# define expect(expr,value) (expr) 58# define expect(expr,value) (expr)
59# define inline static 59# define INLINE static
60#endif 60#endif
61 61
62#define expect_false(expr) expect ((expr) != 0, 0) 62#define expect_false(expr) expect ((expr) != 0, 0)
63#define expect_true(expr) expect ((expr) != 0, 1) 63#define expect_true(expr) expect ((expr) != 0, 1)
64
65#define IN_RANGE_INC(type,val,beg,end) \
66 ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
67 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
64 68
65#ifdef USE_ITHREADS 69#ifdef USE_ITHREADS
66# define JSON_SLOW 1 70# define JSON_SLOW 1
67# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) 71# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
68#else 72#else
80} JSON; 84} JSON;
81 85
82///////////////////////////////////////////////////////////////////////////// 86/////////////////////////////////////////////////////////////////////////////
83// utility functions 87// utility functions
84 88
85inline void 89INLINE void
86shrink (SV *sv) 90shrink (SV *sv)
87{ 91{
88 sv_utf8_downgrade (sv, 1); 92 sv_utf8_downgrade (sv, 1);
89 if (SvLEN (sv) > SvCUR (sv) + 1) 93 if (SvLEN (sv) > SvCUR (sv) + 1)
90 { 94 {
99// decode an utf-8 character and return it, or (UV)-1 in 103// decode an utf-8 character and return it, or (UV)-1 in
100// case of an error. 104// case of an error.
101// we special-case "safe" characters from U+80 .. U+7FF, 105// we special-case "safe" characters from U+80 .. U+7FF,
102// but use the very good perl function to parse anything else. 106// but use the very good perl function to parse anything else.
103// note that we never call this function for a ascii codepoints 107// note that we never call this function for a ascii codepoints
104inline UV 108INLINE UV
105decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) 109decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen)
106{ 110{
107 if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) 111 if (expect_true (len >= 2
108 return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); 112 && IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
109 else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) 113 && IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
110 { 114 {
111 *clen = 2; 115 *clen = 2;
112 return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); 116 return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
113 } 117 }
114 else 118 else
115 { 119 return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
116 *clen = (STRLEN)-1; 120}
117 return (UV)-1; 121
122// likewise for encoding, also never called for ascii codepoints
123// this function takes advantage of this fact, although current gccs
124// seem to optimise the check for >= 0x80 away anyways
125INLINE unsigned char *
126encode_utf8 (unsigned char *s, UV ch)
127{
128 if (ch <= 0x7FF)
118 } 129 {
130 *s++ = (ch >> 6) | 0xc0;
131 *s++ = (ch & 0x3f) | 0x80;
132 }
133 else
134 s = uvuni_to_utf8_flags (s, ch, 0);
135
136 return s;
119} 137}
120 138
121///////////////////////////////////////////////////////////////////////////// 139/////////////////////////////////////////////////////////////////////////////
122// encoder 140// encoder
123 141
128 char *end; // SvEND (sv) 146 char *end; // SvEND (sv)
129 SV *sv; // result scalar 147 SV *sv; // result scalar
130 JSON json; 148 JSON json;
131 U32 indent; // indentation level 149 U32 indent; // indentation level
132 U32 maxdepth; // max. indentation/recursion level 150 U32 maxdepth; // max. indentation/recursion level
151 UV limit; // escape character values >= this value when encoding
133} enc_t; 152} enc_t;
134 153
135inline void 154INLINE void
136need (enc_t *enc, STRLEN len) 155need (enc_t *enc, STRLEN len)
137{ 156{
138 if (expect_false (enc->cur + len >= enc->end)) 157 if (expect_false (enc->cur + len >= enc->end))
139 { 158 {
140 STRLEN cur = enc->cur - SvPVX (enc->sv); 159 STRLEN cur = enc->cur - SvPVX (enc->sv);
142 enc->cur = SvPVX (enc->sv) + cur; 161 enc->cur = SvPVX (enc->sv) + cur;
143 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; 162 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
144 } 163 }
145} 164}
146 165
147inline void 166INLINE void
148encode_ch (enc_t *enc, char ch) 167encode_ch (enc_t *enc, char ch)
149{ 168{
150 need (enc, 1); 169 need (enc, 1);
151 *enc->cur++ = ch; 170 *enc->cur++ = ch;
152} 171}
206 { 225 {
207 uch = ch; 226 uch = ch;
208 clen = 1; 227 clen = 1;
209 } 228 }
210 229
211 if (uch > 0x10FFFFUL) 230 if (uch < 0x80/*0x20*/ || uch >= enc->limit)
212 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
213
214 if (uch < 0x80 || enc->json.flags & F_ASCII || (enc->json.flags & F_LATIN1 && uch > 0xFF))
215 { 231 {
216 if (uch > 0xFFFFUL) 232 if (uch > 0xFFFFUL)
217 { 233 {
234 if (uch > 0x10FFFFUL)
235 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
236
218 need (enc, len += 11); 237 need (enc, len += 11);
219 sprintf (enc->cur, "\\u%04x\\u%04x", 238 sprintf (enc->cur, "\\u%04x\\u%04x",
220 (int)((uch - 0x10000) / 0x400 + 0xD800), 239 (int)((uch - 0x10000) / 0x400 + 0xD800),
221 (int)((uch - 0x10000) % 0x400 + 0xDC00)); 240 (int)((uch - 0x10000) % 0x400 + 0xDC00));
222 enc->cur += 12; 241 enc->cur += 12;
250 while (--clen); 269 while (--clen);
251 } 270 }
252 else 271 else
253 { 272 {
254 need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed 273 need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
255 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); 274 enc->cur = encode_utf8 (enc->cur, uch);
256 ++str; 275 ++str;
257 } 276 }
258 } 277 }
259 } 278 }
260 } 279 }
261 280
262 --len; 281 --len;
263 } 282 }
264} 283}
265 284
266inline void 285INLINE void
267encode_indent (enc_t *enc) 286encode_indent (enc_t *enc)
268{ 287{
269 if (enc->json.flags & F_INDENT) 288 if (enc->json.flags & F_INDENT)
270 { 289 {
271 int spaces = enc->indent * INDENT_STEP; 290 int spaces = enc->indent * INDENT_STEP;
274 memset (enc->cur, ' ', spaces); 293 memset (enc->cur, ' ', spaces);
275 enc->cur += spaces; 294 enc->cur += spaces;
276 } 295 }
277} 296}
278 297
279inline void 298INLINE void
280encode_space (enc_t *enc) 299encode_space (enc_t *enc)
281{ 300{
282 need (enc, 1); 301 need (enc, 1);
283 encode_ch (enc, ' '); 302 encode_ch (enc, ' ');
284} 303}
285 304
286inline void 305INLINE void
287encode_nl (enc_t *enc) 306encode_nl (enc_t *enc)
288{ 307{
289 if (enc->json.flags & F_INDENT) 308 if (enc->json.flags & F_INDENT)
290 { 309 {
291 need (enc, 1); 310 need (enc, 1);
292 encode_ch (enc, '\n'); 311 encode_ch (enc, '\n');
293 } 312 }
294} 313}
295 314
296inline void 315INLINE void
297encode_comma (enc_t *enc) 316encode_comma (enc_t *enc)
298{ 317{
299 encode_ch (enc, ','); 318 encode_ch (enc, ',');
300 319
301 if (enc->json.flags & F_INDENT) 320 if (enc->json.flags & F_INDENT)
680 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 699 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
681 enc.cur = SvPVX (enc.sv); 700 enc.cur = SvPVX (enc.sv);
682 enc.end = SvEND (enc.sv); 701 enc.end = SvEND (enc.sv);
683 enc.indent = 0; 702 enc.indent = 0;
684 enc.maxdepth = DEC_DEPTH (enc.json.flags); 703 enc.maxdepth = DEC_DEPTH (enc.json.flags);
704 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
705 : enc.json.flags & F_LATIN1 ? 0x000100UL
706 : 0x10FFFFUL;
685 707
686 SvPOK_only (enc.sv); 708 SvPOK_only (enc.sv);
687 encode_sv (&enc, scalar); 709 encode_sv (&enc, scalar);
688 710
689 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 711 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
710 JSON json; 732 JSON json;
711 U32 depth; // recursion depth 733 U32 depth; // recursion depth
712 U32 maxdepth; // recursion depth limit 734 U32 maxdepth; // recursion depth limit
713} dec_t; 735} dec_t;
714 736
715inline void 737INLINE void
716decode_comment (dec_t *dec) 738decode_comment (dec_t *dec)
717{ 739{
718 // only '#'-style comments allowed a.t.m. 740 // only '#'-style comments allowed a.t.m.
719 741
720 while (*dec->cur && *dec->cur != 0x0a && *dec->cur != 0x0d) 742 while (*dec->cur && *dec->cur != 0x0a && *dec->cur != 0x0d)
721 ++dec->cur; 743 ++dec->cur;
722} 744}
723 745
724inline void 746INLINE void
725decode_ws (dec_t *dec) 747decode_ws (dec_t *dec)
726{ 748{
727 for (;;) 749 for (;;)
728 { 750 {
729 char ch = *dec->cur; 751 char ch = *dec->cur;
855 877
856 if (hi >= 0x80) 878 if (hi >= 0x80)
857 { 879 {
858 utf8 = 1; 880 utf8 = 1;
859 881
860 cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); 882 cur = encode_utf8 (cur, hi);
861 } 883 }
862 else 884 else
863 *cur++ = hi; 885 *cur++ = hi;
864 } 886 }
865 break; 887 break;
1000 1022
1001 if (!is_nv) 1023 if (!is_nv)
1002 { 1024 {
1003 int len = dec->cur - start; 1025 int len = dec->cur - start;
1004 1026
1005 // special case the rather common 1..4-digit-int case, assumes 32 bit ints or so 1027 // special case the rather common 1..5-digit-int case
1006 if (*start == '-') 1028 if (*start == '-')
1007 switch (len) 1029 switch (len)
1008 { 1030 {
1009 case 2: return newSViv (-( start [1] - '0' * 1)); 1031 case 2: return newSViv (-( start [1] - '0' * 1));
1010 case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); 1032 case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
1011 case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); 1033 case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
1012 case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); 1034 case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
1035 case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111));
1013 } 1036 }
1014 else 1037 else
1015 switch (len) 1038 switch (len)
1016 { 1039 {
1017 case 1: return newSViv ( start [0] - '0' * 1); 1040 case 1: return newSViv ( start [0] - '0' * 1);
1018 case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); 1041 case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
1019 case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); 1042 case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
1020 case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); 1043 case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
1044 case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111);
1021 } 1045 }
1022 1046
1023 { 1047 {
1024 UV uv; 1048 UV uv;
1025 int numtype = grok_number (start, len, &uv); 1049 int numtype = grok_number (start, len, &uv);
1457{ 1481{
1458 SV *pv = NEWSV (0, sizeof (JSON)); 1482 SV *pv = NEWSV (0, sizeof (JSON));
1459 SvPOK_only (pv); 1483 SvPOK_only (pv);
1460 Zero (SvPVX (pv), 1, JSON); 1484 Zero (SvPVX (pv), 1, JSON);
1461 ((JSON *)SvPVX (pv))->flags = F_DEFAULT; 1485 ((JSON *)SvPVX (pv))->flags = F_DEFAULT;
1462 XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), JSON_STASH))); 1486 XPUSHs (sv_2mortal (sv_bless (
1487 newRV_noinc (pv),
1488 strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
1489 )));
1463} 1490}
1464 1491
1465void ascii (JSON *self, int enable = 1) 1492void ascii (JSON *self, int enable = 1)
1466 ALIAS: 1493 ALIAS:
1467 ascii = F_ASCII 1494 ascii = F_ASCII
1494 get_utf8 = F_UTF8 1521 get_utf8 = F_UTF8
1495 get_indent = F_INDENT 1522 get_indent = F_INDENT
1496 get_canonical = F_CANONICAL 1523 get_canonical = F_CANONICAL
1497 get_space_before = F_SPACE_BEFORE 1524 get_space_before = F_SPACE_BEFORE
1498 get_space_after = F_SPACE_AFTER 1525 get_space_after = F_SPACE_AFTER
1499 get_pretty = F_PRETTY
1500 get_allow_nonref = F_ALLOW_NONREF 1526 get_allow_nonref = F_ALLOW_NONREF
1501 get_shrink = F_SHRINK 1527 get_shrink = F_SHRINK
1502 get_allow_blessed = F_ALLOW_BLESSED 1528 get_allow_blessed = F_ALLOW_BLESSED
1503 get_convert_blessed = F_CONV_BLESSED 1529 get_convert_blessed = F_CONV_BLESSED
1504 get_relaxed = F_RELAXED 1530 get_relaxed = F_RELAXED
1518 self->flags = self->flags & ~F_MAXDEPTH | (log2 << S_MAXDEPTH); 1544 self->flags = self->flags & ~F_MAXDEPTH | (log2 << S_MAXDEPTH);
1519 1545
1520 XPUSHs (ST (0)); 1546 XPUSHs (ST (0));
1521} 1547}
1522 1548
1523int get_max_depth (JSON *self) 1549U32 get_max_depth (JSON *self)
1524 CODE: 1550 CODE:
1525 RETVAL = DEC_DEPTH (self->flags); 1551 RETVAL = DEC_DEPTH (self->flags);
1526 OUTPUT: 1552 OUTPUT:
1527 RETVAL 1553 RETVAL
1528 1554
1601 SvREFCNT_dec (self->cb_sk_object); 1627 SvREFCNT_dec (self->cb_sk_object);
1602 SvREFCNT_dec (self->cb_object); 1628 SvREFCNT_dec (self->cb_object);
1603 1629
1604PROTOTYPES: ENABLE 1630PROTOTYPES: ENABLE
1605 1631
1606void to_json (SV *scalar) 1632void encode_json (SV *scalar)
1607 PPCODE: 1633 PPCODE:
1608{ 1634{
1609 JSON json = { F_DEFAULT | F_UTF8 }; 1635 JSON json = { F_DEFAULT | F_UTF8 };
1610 XPUSHs (encode_json (scalar, &json)); 1636 XPUSHs (encode_json (scalar, &json));
1611} 1637}
1612 1638
1613void from_json (SV *jsonstr) 1639void decode_json (SV *jsonstr)
1614 PPCODE: 1640 PPCODE:
1615{ 1641{
1616 JSON json = { F_DEFAULT | F_UTF8 }; 1642 JSON json = { F_DEFAULT | F_UTF8 };
1617 XPUSHs (decode_json (jsonstr, &json, 0)); 1643 XPUSHs (decode_json (jsonstr, &json, 0));
1618} 1644}

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines