ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.72 by root, Wed Mar 19 04:08:22 2008 UTC vs.
Revision 1.77 by root, Tue Mar 25 06:37:38 2008 UTC

4 4
5#include <assert.h> 5#include <assert.h>
6#include <string.h> 6#include <string.h>
7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h>
9#include <float.h> 10#include <float.h>
10 11
11#if defined(__BORLANDC__) || defined(_MSC_VER) 12#if defined(__BORLANDC__) || defined(_MSC_VER)
12# define snprintf _snprintf // C compilers have this in stdio.h 13# define snprintf _snprintf // C compilers have this in stdio.h
13#endif 14#endif
15// some old perls do not have this, try to make it work, no 16// some old perls do not have this, try to make it work, no
16// guarentees, though. if it breaks, you get to keep the pieces. 17// guarentees, though. if it breaks, you get to keep the pieces.
17#ifndef UTF8_MAXBYTES 18#ifndef UTF8_MAXBYTES
18# define UTF8_MAXBYTES 13 19# define UTF8_MAXBYTES 13
19#endif 20#endif
21
22#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2)
20 23
21#define F_ASCII 0x00000001UL 24#define F_ASCII 0x00000001UL
22#define F_LATIN1 0x00000002UL 25#define F_LATIN1 0x00000002UL
23#define F_UTF8 0x00000004UL 26#define F_UTF8 0x00000004UL
24#define F_INDENT 0x00000008UL 27#define F_INDENT 0x00000008UL
75#endif 78#endif
76 79
77static HV *json_stash, *json_boolean_stash; // JSON::XS:: 80static HV *json_stash, *json_boolean_stash; // JSON::XS::
78static SV *json_true, *json_false; 81static SV *json_true, *json_false;
79 82
83enum {
84 INCR_M_WS = 0, // initial whitespace skipping, must be 0
85 INCR_M_STR, // inside string
86 INCR_M_BS, // inside backslash
87 INCR_M_JSON // outside anything, count nesting
88};
89
90#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON)
91
80typedef struct { 92typedef struct {
81 U32 flags; 93 U32 flags;
82 SV *cb_object; 94 SV *cb_object;
83 HV *cb_sk_object; 95 HV *cb_sk_object;
96
97 // for the incremental parser
98 SV *incr_text; // the source text so far
99 STRLEN incr_pos; // the current offset into the text
100 int incr_nest; // {[]}-nesting level
101 int incr_mode;
84} JSON; 102} JSON;
85 103
86///////////////////////////////////////////////////////////////////////////// 104/////////////////////////////////////////////////////////////////////////////
87// utility functions 105// utility functions
88 106
123// this function takes advantage of this fact, although current gccs 141// this function takes advantage of this fact, although current gccs
124// seem to optimise the check for >= 0x80 away anyways 142// seem to optimise the check for >= 0x80 away anyways
125INLINE unsigned char * 143INLINE unsigned char *
126encode_utf8 (unsigned char *s, UV ch) 144encode_utf8 (unsigned char *s, UV ch)
127{ 145{
128 if (ch <= 0x7FF) 146 if (expect_false (ch < 0x000080))
129 { 147 *s++ = ch;
130 *s++ = (ch >> 6) | 0xc0; 148 else if (expect_true (ch < 0x000800))
131 *s++ = (ch & 0x3f) | 0x80; 149 *s++ = 0xc0 | ( ch >> 6),
132 } 150 *s++ = 0x80 | ( ch & 0x3f);
133 else 151 else if ( ch < 0x010000)
134 s = uvuni_to_utf8_flags (s, ch, 0); 152 *s++ = 0xe0 | ( ch >> 12),
153 *s++ = 0x80 | ((ch >> 6) & 0x3f),
154 *s++ = 0x80 | ( ch & 0x3f);
155 else if ( ch < 0x110000)
156 *s++ = 0xf0 | ( ch >> 18),
157 *s++ = 0x80 | ((ch >> 12) & 0x3f),
158 *s++ = 0x80 | ((ch >> 6) & 0x3f),
159 *s++ = 0x80 | ( ch & 0x3f);
135 160
136 return s; 161 return s;
137} 162}
138 163
139///////////////////////////////////////////////////////////////////////////// 164/////////////////////////////////////////////////////////////////////////////
227 clen = 1; 252 clen = 1;
228 } 253 }
229 254
230 if (uch < 0x80/*0x20*/ || uch >= enc->limit) 255 if (uch < 0x80/*0x20*/ || uch >= enc->limit)
231 { 256 {
232 if (uch > 0xFFFFUL) 257 if (uch >= 0x10000UL)
233 { 258 {
234 if (uch > 0x10FFFFUL) 259 if (uch >= 0x110000UL)
235 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); 260 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
236 261
237 need (enc, len += 11); 262 need (enc, len += 11);
238 sprintf (enc->cur, "\\u%04x\\u%04x", 263 sprintf (enc->cur, "\\u%04x\\u%04x",
239 (int)((uch - 0x10000) / 0x400 + 0xD800), 264 (int)((uch - 0x10000) / 0x400 + 0xD800),
638 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); 663 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
639 enc->cur += strlen (enc->cur); 664 enc->cur += strlen (enc->cur);
640 } 665 }
641 else if (SvIOKp (sv)) 666 else if (SvIOKp (sv))
642 { 667 {
643 // we assume we can always read an IV as a UV 668 // we assume we can always read an IV as a UV and vice versa
644 if (SvUV (sv) & ~(UV)0x7fff) 669 // we assume two's complement
645 { 670 // we assume no aliasing issues in the union
646 // large integer, use the (rather slow) snprintf way. 671 if (SvIsUV (sv) ? SvUVX (sv) <= 59000
647 need (enc, sizeof (UV) * 3); 672 : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
648 enc->cur +=
649 SvIsUV(sv)
650 ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
651 : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
652 }
653 else
654 { 673 {
655 // optimise the "small number case" 674 // optimise the "small number case"
656 // code will likely be branchless and use only a single multiplication 675 // code will likely be branchless and use only a single multiplication
676 // works for numbers up to 59074
657 I32 i = SvIV (sv); 677 I32 i = SvIVX (sv);
658 U32 u; 678 U32 u;
659 char digit, nz = 0; 679 char digit, nz = 0;
660 680
661 need (enc, 6); 681 need (enc, 6);
662 682
668 688
669 // now output digit by digit, each time masking out the integer part 689 // now output digit by digit, each time masking out the integer part
670 // and multiplying by 5 while moving the decimal point one to the right, 690 // and multiplying by 5 while moving the decimal point one to the right,
671 // resulting in a net multiplication by 10. 691 // resulting in a net multiplication by 10.
672 // we always write the digit to memory but conditionally increment 692 // we always write the digit to memory but conditionally increment
673 // the pointer, to ease the usage of conditional move instructions. 693 // the pointer, to enable the use of conditional move instructions.
674 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; 694 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5;
675 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; 695 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5;
676 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; 696 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5;
677 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; 697 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5;
678 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' 698 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
699 }
700 else
701 {
702 // large integer, use the (rather slow) snprintf way.
703 need (enc, IVUV_MAXCHARS);
704 enc->cur +=
705 SvIsUV(sv)
706 ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
707 : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
679 } 708 }
680 } 709 }
681 else if (SvROK (sv)) 710 else if (SvROK (sv))
682 encode_rv (enc, SvRV (sv)); 711 encode_rv (enc, SvRV (sv));
683 else if (!SvOK (sv)) 712 else if (!SvOK (sv))
701 enc.end = SvEND (enc.sv); 730 enc.end = SvEND (enc.sv);
702 enc.indent = 0; 731 enc.indent = 0;
703 enc.maxdepth = DEC_DEPTH (enc.json.flags); 732 enc.maxdepth = DEC_DEPTH (enc.json.flags);
704 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL 733 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
705 : enc.json.flags & F_LATIN1 ? 0x000100UL 734 : enc.json.flags & F_LATIN1 ? 0x000100UL
706 : 0x10FFFFUL; 735 : 0x110000UL;
707 736
708 SvPOK_only (enc.sv); 737 SvPOK_only (enc.sv);
709 encode_sv (&enc, scalar); 738 encode_sv (&enc, scalar);
710 739
711 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 740 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
889 default: 918 default:
890 --dec_cur; 919 --dec_cur;
891 ERR ("illegal backslash escape sequence in string"); 920 ERR ("illegal backslash escape sequence in string");
892 } 921 }
893 } 922 }
894 else if (expect_true (ch >= 0x20 && ch <= 0x7f)) 923 else if (expect_true (ch >= 0x20 && ch < 0x80))
895 *cur++ = ch; 924 *cur++ = ch;
896 else if (ch >= 0x80) 925 else if (ch >= 0x80)
897 { 926 {
898 STRLEN clen; 927 STRLEN clen;
899 UV uch; 928 UV uch;
1157 char *p = dec->cur; 1186 char *p = dec->cur;
1158 char *e = p + 24; // only try up to 24 bytes 1187 char *e = p + 24; // only try up to 24 bytes
1159 1188
1160 for (;;) 1189 for (;;)
1161 { 1190 {
1162 // the >= 0x80 is true on most architectures 1191 // the >= 0x80 is false on most architectures
1163 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') 1192 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\')
1164 { 1193 {
1165 // slow path, back up and use decode_str 1194 // slow path, back up and use decode_str
1166 SV *key = decode_str (dec); 1195 SV *key = decode_str (dec);
1167 if (!key) 1196 if (!key)
1298 1327
1299static SV * 1328static SV *
1300decode_sv (dec_t *dec) 1329decode_sv (dec_t *dec)
1301{ 1330{
1302 // the beauty of JSON: you need exactly one character lookahead 1331 // the beauty of JSON: you need exactly one character lookahead
1303 // to parse anything. 1332 // to parse everything.
1304 switch (*dec->cur) 1333 switch (*dec->cur)
1305 { 1334 {
1306 case '"': ++dec->cur; return decode_str (dec); 1335 case '"': ++dec->cur; return decode_str (dec);
1307 case '[': ++dec->cur; return decode_av (dec); 1336 case '[': ++dec->cur; return decode_av (dec);
1308 case '{': ++dec->cur; return decode_hv (dec); 1337 case '{': ++dec->cur; return decode_hv (dec);
1309 1338
1310 case '-': 1339 case '-':
1311 case '0': case '1': case '2': case '3': case '4': 1340 case '0': case '1': case '2': case '3': case '4':
1312 case '5': case '6': case '7': case '8': case '9': 1341 case '5': case '6': case '7': case '8': case '9':
1313 return decode_num (dec); 1342 return decode_num (dec);
1359fail: 1388fail:
1360 return 0; 1389 return 0;
1361} 1390}
1362 1391
1363static SV * 1392static SV *
1364decode_json (SV *string, JSON *json, UV *offset_return) 1393decode_json (SV *string, JSON *json, STRLEN *offset_return)
1365{ 1394{
1366 dec_t dec; 1395 dec_t dec;
1367 UV offset; 1396 STRLEN offset;
1368 SV *sv; 1397 SV *sv;
1369 1398
1370 SvGETMAGIC (string); 1399 SvGETMAGIC (string);
1371 SvUPGRADE (string, SVt_PV); 1400 SvUPGRADE (string, SVt_PV);
1372 1401
1442 1471
1443 if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv)) 1472 if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))
1444 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); 1473 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
1445 1474
1446 return sv; 1475 return sv;
1476}
1477
1478/////////////////////////////////////////////////////////////////////////////
1479// incremental parser
1480
1481static void
1482incr_parse (JSON *self)
1483{
1484 const char *p = SvPVX (self->incr_text) + self->incr_pos;
1485
1486 for (;;)
1487 {
1488 //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D
1489 switch (self->incr_mode)
1490 {
1491 // only used for intiial whitespace skipping
1492 case INCR_M_WS:
1493 for (;;)
1494 {
1495 if (*p > 0x20)
1496 {
1497 self->incr_mode = INCR_M_JSON;
1498 goto incr_m_json;
1499 }
1500 else if (!*p)
1501 goto interrupt;
1502
1503 ++p;
1504 }
1505
1506 // skip a single char inside a string (for \\-processing)
1507 case INCR_M_BS:
1508 if (!*p)
1509 goto interrupt;
1510
1511 ++p;
1512 self->incr_mode = INCR_M_STR;
1513 goto incr_m_str;
1514
1515 // inside a string
1516 case INCR_M_STR:
1517 incr_m_str:
1518 for (;;)
1519 {
1520 if (*p == '"')
1521 {
1522 ++p;
1523 self->incr_mode = INCR_M_JSON;
1524
1525 if (!self->incr_nest)
1526 goto interrupt;
1527
1528 goto incr_m_json;
1529 }
1530 else if (*p == '\\')
1531 {
1532 ++p; // "virtually" consumes character after \
1533
1534 if (!*p) // if at end of string we have to switch modes
1535 {
1536 self->incr_mode = INCR_M_BS;
1537 goto interrupt;
1538 }
1539 }
1540 else if (!*p)
1541 goto interrupt;
1542
1543 ++p;
1544 }
1545
1546 // after initial ws, outside string
1547 case INCR_M_JSON:
1548 incr_m_json:
1549 for (;;)
1550 {
1551 switch (*p++)
1552 {
1553 case 0:
1554 --p;
1555 goto interrupt;
1556
1557 case 0x09:
1558 case 0x0a:
1559 case 0x0d:
1560 case 0x20:
1561 if (!self->incr_nest)
1562 {
1563 --p; // do not eat the whitespace, let the next round do it
1564 goto interrupt;
1565 }
1566 break;
1567
1568 case '"':
1569 self->incr_mode = INCR_M_STR;
1570 goto incr_m_str;
1571
1572 case '[':
1573 case '{':
1574 ++self->incr_nest;
1575 break;
1576
1577 case ']':
1578 case '}':
1579 if (!--self->incr_nest)
1580 goto interrupt;
1581 }
1582 }
1583 }
1584
1585 modechange:
1586 ;
1587 }
1588
1589interrupt:
1590 self->incr_pos = p - SvPVX (self->incr_text);
1591 //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
1447} 1592}
1448 1593
1449///////////////////////////////////////////////////////////////////////////// 1594/////////////////////////////////////////////////////////////////////////////
1450// XS interface functions 1595// XS interface functions
1451 1596
1614 XPUSHs (decode_json (jsonstr, self, 0)); 1759 XPUSHs (decode_json (jsonstr, self, 0));
1615 1760
1616void decode_prefix (JSON *self, SV *jsonstr) 1761void decode_prefix (JSON *self, SV *jsonstr)
1617 PPCODE: 1762 PPCODE:
1618{ 1763{
1619 UV offset; 1764 STRLEN offset;
1620 EXTEND (SP, 2); 1765 EXTEND (SP, 2);
1621 PUSHs (decode_json (jsonstr, self, &offset)); 1766 PUSHs (decode_json (jsonstr, self, &offset));
1622 PUSHs (sv_2mortal (newSVuv (offset))); 1767 PUSHs (sv_2mortal (newSVuv (offset)));
1623} 1768}
1769
1770void incr_parse (JSON *self, SV *jsonstr = 0)
1771 PPCODE:
1772{
1773 if (!self->incr_text)
1774 self->incr_text = newSVpvn ("", 0);
1775
1776 // append data, if any
1777 if (jsonstr)
1778 {
1779 if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text))
1780 {
1781 /* utf-8-ness differs, need to upgrade */
1782 sv_utf8_upgrade (self->incr_text);
1783
1784 if (self->incr_pos)
1785 self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
1786 - (U8 *)SvPVX (self->incr_text);
1787 }
1788
1789 {
1790 STRLEN len;
1791 const char *str = SvPV (jsonstr, len);
1792 SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1);
1793 Move (str, SvEND (self->incr_text), len, char);
1794 SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
1795 *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
1796 }
1797 }
1798
1799 if (GIMME_V != G_VOID)
1800 do
1801 {
1802 STRLEN offset;
1803
1804 incr_parse (self);
1805
1806 if (!INCR_DONE (self))
1807 break;
1808
1809 XPUSHs (decode_json (self->incr_text, self, &offset));
1810
1811 sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset);
1812 self->incr_pos -= offset;
1813 self->incr_nest = 0;
1814 self->incr_mode = 0;
1815 }
1816 while (GIMME_V == G_ARRAY);
1817}
1818
1819SV *incr_text (JSON *self)
1820 ATTRS: lvalue
1821 CODE:
1822{
1823 if (self->incr_pos)
1824 croak ("incr_text can only be called after a successful incr_parse call in scalar context %d", self->incr_pos);//D
1825
1826 RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
1827}
1828 OUTPUT:
1829 RETVAL
1624 1830
1625void DESTROY (JSON *self) 1831void DESTROY (JSON *self)
1626 CODE: 1832 CODE:
1627 SvREFCNT_dec (self->cb_sk_object); 1833 SvREFCNT_dec (self->cb_sk_object);
1628 SvREFCNT_dec (self->cb_object); 1834 SvREFCNT_dec (self->cb_object);
1835 SvREFCNT_dec (self->incr_text);
1629 1836
1630PROTOTYPES: ENABLE 1837PROTOTYPES: ENABLE
1631 1838
1632void encode_json (SV *scalar) 1839void encode_json (SV *scalar)
1840 ALIAS:
1841 to_json_ = 0
1842 encode_json = F_UTF8
1633 PPCODE: 1843 PPCODE:
1634{ 1844{
1635 JSON json = { F_DEFAULT | F_UTF8 }; 1845 JSON json = { F_DEFAULT | ix };
1636 XPUSHs (encode_json (scalar, &json)); 1846 XPUSHs (encode_json (scalar, &json));
1637} 1847}
1638 1848
1639void decode_json (SV *jsonstr) 1849void decode_json (SV *jsonstr)
1850 ALIAS:
1851 from_json_ = 0
1852 decode_json = F_UTF8
1640 PPCODE: 1853 PPCODE:
1641{ 1854{
1642 JSON json = { F_DEFAULT | F_UTF8 }; 1855 JSON json = { F_DEFAULT | ix };
1643 XPUSHs (decode_json (jsonstr, &json, 0)); 1856 XPUSHs (decode_json (jsonstr, &json, 0));
1644} 1857}
1645 1858
1859

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines