ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.71 by root, Wed Mar 19 03:17:38 2008 UTC vs.
Revision 1.76 by root, Thu Mar 20 00:56:37 2008 UTC

4 4
5#include <assert.h> 5#include <assert.h>
6#include <string.h> 6#include <string.h>
7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h>
9#include <float.h> 10#include <float.h>
10 11
11#if defined(__BORLANDC__) || defined(_MSC_VER) 12#if defined(__BORLANDC__) || defined(_MSC_VER)
12# define snprintf _snprintf // C compilers have this in stdio.h 13# define snprintf _snprintf // C compilers have this in stdio.h
13#endif 14#endif
15// some old perls do not have this, try to make it work, no 16// some old perls do not have this, try to make it work, no
16// guarentees, though. if it breaks, you get to keep the pieces. 17// guarentees, though. if it breaks, you get to keep the pieces.
17#ifndef UTF8_MAXBYTES 18#ifndef UTF8_MAXBYTES
18# define UTF8_MAXBYTES 13 19# define UTF8_MAXBYTES 13
19#endif 20#endif
21
22#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2)
20 23
21#define F_ASCII 0x00000001UL 24#define F_ASCII 0x00000001UL
22#define F_LATIN1 0x00000002UL 25#define F_LATIN1 0x00000002UL
23#define F_UTF8 0x00000004UL 26#define F_UTF8 0x00000004UL
24#define F_INDENT 0x00000008UL 27#define F_INDENT 0x00000008UL
60#endif 63#endif
61 64
62#define expect_false(expr) expect ((expr) != 0, 0) 65#define expect_false(expr) expect ((expr) != 0, 0)
63#define expect_true(expr) expect ((expr) != 0, 1) 66#define expect_true(expr) expect ((expr) != 0, 1)
64 67
68#define IN_RANGE_INC(type,val,beg,end) \
69 ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
70 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
71
65#ifdef USE_ITHREADS 72#ifdef USE_ITHREADS
66# define JSON_SLOW 1 73# define JSON_SLOW 1
67# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) 74# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
68#else 75#else
69# define JSON_SLOW 0 76# define JSON_SLOW 0
102// but use the very good perl function to parse anything else. 109// but use the very good perl function to parse anything else.
103// note that we never call this function for a ascii codepoints 110// note that we never call this function for a ascii codepoints
104INLINE UV 111INLINE UV
105decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) 112decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen)
106{ 113{
107 if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) 114 if (expect_true (len >= 2
108 return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); 115 && IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
109 else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) 116 && IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
110 { 117 {
111 *clen = 2; 118 *clen = 2;
112 return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); 119 return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
113 } 120 }
114 else 121 else
115 { 122 return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
116 *clen = (STRLEN)-1; 123}
117 return (UV)-1; 124
118 } 125// likewise for encoding, also never called for ascii codepoints
126// this function takes advantage of this fact, although current gccs
127// seem to optimise the check for >= 0x80 away anyways
128INLINE unsigned char *
129encode_utf8 (unsigned char *s, UV ch)
130{
131 if (expect_false (ch < 0x000080))
132 *s++ = ch;
133 else if (expect_true (ch < 0x000800))
134 *s++ = 0xc0 | ( ch >> 6),
135 *s++ = 0x80 | ( ch & 0x3f);
136 else if ( ch < 0x010000)
137 *s++ = 0xe0 | ( ch >> 12),
138 *s++ = 0x80 | ((ch >> 6) & 0x3f),
139 *s++ = 0x80 | ( ch & 0x3f);
140 else if ( ch < 0x110000)
141 *s++ = 0xf0 | ( ch >> 18),
142 *s++ = 0x80 | ((ch >> 12) & 0x3f),
143 *s++ = 0x80 | ((ch >> 6) & 0x3f),
144 *s++ = 0x80 | ( ch & 0x3f);
145
146 return s;
119} 147}
120 148
121///////////////////////////////////////////////////////////////////////////// 149/////////////////////////////////////////////////////////////////////////////
122// encoder 150// encoder
123 151
207 { 235 {
208 uch = ch; 236 uch = ch;
209 clen = 1; 237 clen = 1;
210 } 238 }
211 239
212 if (uch < 0x20 || uch >= enc->limit) 240 if (uch < 0x80/*0x20*/ || uch >= enc->limit)
213 { 241 {
214 if (uch > 0xFFFFUL) 242 if (uch >= 0x10000UL)
215 { 243 {
216 if (uch > 0x10FFFFUL) 244 if (uch >= 0x110000UL)
217 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); 245 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
218 246
219 need (enc, len += 11); 247 need (enc, len += 11);
220 sprintf (enc->cur, "\\u%04x\\u%04x", 248 sprintf (enc->cur, "\\u%04x\\u%04x",
221 (int)((uch - 0x10000) / 0x400 + 0xD800), 249 (int)((uch - 0x10000) / 0x400 + 0xD800),
251 while (--clen); 279 while (--clen);
252 } 280 }
253 else 281 else
254 { 282 {
255 need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed 283 need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
256 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); 284 enc->cur = encode_utf8 (enc->cur, uch);
257 ++str; 285 ++str;
258 } 286 }
259 } 287 }
260 } 288 }
261 } 289 }
620 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); 648 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
621 enc->cur += strlen (enc->cur); 649 enc->cur += strlen (enc->cur);
622 } 650 }
623 else if (SvIOKp (sv)) 651 else if (SvIOKp (sv))
624 { 652 {
625 // we assume we can always read an IV as a UV 653 // we assume we can always read an IV as a UV and vice versa
626 if (SvUV (sv) & ~(UV)0x7fff) 654 // we assume two's complement
627 { 655 // we assume no aliasing issues in the union
628 // large integer, use the (rather slow) snprintf way. 656 if (SvIsUV (sv) ? SvUVX (sv) <= 59000
629 need (enc, sizeof (UV) * 3); 657 : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
630 enc->cur +=
631 SvIsUV(sv)
632 ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
633 : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
634 }
635 else
636 { 658 {
637 // optimise the "small number case" 659 // optimise the "small number case"
638 // code will likely be branchless and use only a single multiplication 660 // code will likely be branchless and use only a single multiplication
661 // works for numbers up to 59074
639 I32 i = SvIV (sv); 662 I32 i = SvIVX (sv);
640 U32 u; 663 U32 u;
641 char digit, nz = 0; 664 char digit, nz = 0;
642 665
643 need (enc, 6); 666 need (enc, 6);
644 667
650 673
651 // now output digit by digit, each time masking out the integer part 674 // now output digit by digit, each time masking out the integer part
652 // and multiplying by 5 while moving the decimal point one to the right, 675 // and multiplying by 5 while moving the decimal point one to the right,
653 // resulting in a net multiplication by 10. 676 // resulting in a net multiplication by 10.
654 // we always write the digit to memory but conditionally increment 677 // we always write the digit to memory but conditionally increment
655 // the pointer, to ease the usage of conditional move instructions. 678 // the pointer, to enable the use of conditional move instructions.
656 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; 679 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5;
657 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; 680 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5;
658 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; 681 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5;
659 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; 682 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5;
660 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' 683 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
684 }
685 else
686 {
687 // large integer, use the (rather slow) snprintf way.
688 need (enc, IVUV_MAXCHARS);
689 enc->cur +=
690 SvIsUV(sv)
691 ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
692 : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
661 } 693 }
662 } 694 }
663 else if (SvROK (sv)) 695 else if (SvROK (sv))
664 encode_rv (enc, SvRV (sv)); 696 encode_rv (enc, SvRV (sv));
665 else if (!SvOK (sv)) 697 else if (!SvOK (sv))
683 enc.end = SvEND (enc.sv); 715 enc.end = SvEND (enc.sv);
684 enc.indent = 0; 716 enc.indent = 0;
685 enc.maxdepth = DEC_DEPTH (enc.json.flags); 717 enc.maxdepth = DEC_DEPTH (enc.json.flags);
686 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL 718 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
687 : enc.json.flags & F_LATIN1 ? 0x000100UL 719 : enc.json.flags & F_LATIN1 ? 0x000100UL
688 : 0x10FFFFUL; 720 : 0x110000UL;
689 721
690 SvPOK_only (enc.sv); 722 SvPOK_only (enc.sv);
691 encode_sv (&enc, scalar); 723 encode_sv (&enc, scalar);
692 724
693 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 725 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
859 891
860 if (hi >= 0x80) 892 if (hi >= 0x80)
861 { 893 {
862 utf8 = 1; 894 utf8 = 1;
863 895
864 cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); 896 cur = encode_utf8 (cur, hi);
865 } 897 }
866 else 898 else
867 *cur++ = hi; 899 *cur++ = hi;
868 } 900 }
869 break; 901 break;
871 default: 903 default:
872 --dec_cur; 904 --dec_cur;
873 ERR ("illegal backslash escape sequence in string"); 905 ERR ("illegal backslash escape sequence in string");
874 } 906 }
875 } 907 }
876 else if (expect_true (ch >= 0x20 && ch <= 0x7f)) 908 else if (expect_true (ch >= 0x20 && ch < 0x80))
877 *cur++ = ch; 909 *cur++ = ch;
878 else if (ch >= 0x80) 910 else if (ch >= 0x80)
879 { 911 {
880 STRLEN clen; 912 STRLEN clen;
881 UV uch; 913 UV uch;
1280 1312
1281static SV * 1313static SV *
1282decode_sv (dec_t *dec) 1314decode_sv (dec_t *dec)
1283{ 1315{
1284 // the beauty of JSON: you need exactly one character lookahead 1316 // the beauty of JSON: you need exactly one character lookahead
1285 // to parse anything. 1317 // to parse everything.
1286 switch (*dec->cur) 1318 switch (*dec->cur)
1287 { 1319 {
1288 case '"': ++dec->cur; return decode_str (dec); 1320 case '"': ++dec->cur; return decode_str (dec);
1289 case '[': ++dec->cur; return decode_av (dec); 1321 case '[': ++dec->cur; return decode_av (dec);
1290 case '{': ++dec->cur; return decode_hv (dec); 1322 case '{': ++dec->cur; return decode_hv (dec);
1291 1323
1292 case '-': 1324 case '-':
1293 case '0': case '1': case '2': case '3': case '4': 1325 case '0': case '1': case '2': case '3': case '4':
1294 case '5': case '6': case '7': case '8': case '9': 1326 case '5': case '6': case '7': case '8': case '9':
1295 return decode_num (dec); 1327 return decode_num (dec);
1610 SvREFCNT_dec (self->cb_object); 1642 SvREFCNT_dec (self->cb_object);
1611 1643
1612PROTOTYPES: ENABLE 1644PROTOTYPES: ENABLE
1613 1645
1614void encode_json (SV *scalar) 1646void encode_json (SV *scalar)
1647 ALIAS:
1648 to_json_ = 0
1649 encode_json = F_UTF8
1615 PPCODE: 1650 PPCODE:
1616{ 1651{
1617 JSON json = { F_DEFAULT | F_UTF8 }; 1652 JSON json = { F_DEFAULT | ix };
1618 XPUSHs (encode_json (scalar, &json)); 1653 XPUSHs (encode_json (scalar, &json));
1619} 1654}
1620 1655
1621void decode_json (SV *jsonstr) 1656void decode_json (SV *jsonstr)
1657 ALIAS:
1658 from_json_ = 0
1659 decode_json = F_UTF8
1622 PPCODE: 1660 PPCODE:
1623{ 1661{
1624 JSON json = { F_DEFAULT | F_UTF8 }; 1662 JSON json = { F_DEFAULT | ix };
1625 XPUSHs (decode_json (jsonstr, &json, 0)); 1663 XPUSHs (decode_json (jsonstr, &json, 0));
1626} 1664}
1627 1665

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines