ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.73 by root, Wed Mar 19 13:44:43 2008 UTC vs.
Revision 1.81 by root, Wed Mar 26 01:40:43 2008 UTC

4 4
5#include <assert.h> 5#include <assert.h>
6#include <string.h> 6#include <string.h>
7#include <stdlib.h> 7#include <stdlib.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h>
9#include <float.h> 10#include <float.h>
10 11
11#if defined(__BORLANDC__) || defined(_MSC_VER) 12#if defined(__BORLANDC__) || defined(_MSC_VER)
12# define snprintf _snprintf // C compilers have this in stdio.h 13# define snprintf _snprintf // C compilers have this in stdio.h
13#endif 14#endif
15// some old perls do not have this, try to make it work, no 16// some old perls do not have this, try to make it work, no
16// guarentees, though. if it breaks, you get to keep the pieces. 17// guarentees, though. if it breaks, you get to keep the pieces.
17#ifndef UTF8_MAXBYTES 18#ifndef UTF8_MAXBYTES
18# define UTF8_MAXBYTES 13 19# define UTF8_MAXBYTES 13
19#endif 20#endif
21
22#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2)
20 23
21#define F_ASCII 0x00000001UL 24#define F_ASCII 0x00000001UL
22#define F_LATIN1 0x00000002UL 25#define F_LATIN1 0x00000002UL
23#define F_UTF8 0x00000004UL 26#define F_UTF8 0x00000004UL
24#define F_INDENT 0x00000008UL 27#define F_INDENT 0x00000008UL
75#endif 78#endif
76 79
77static HV *json_stash, *json_boolean_stash; // JSON::XS:: 80static HV *json_stash, *json_boolean_stash; // JSON::XS::
78static SV *json_true, *json_false; 81static SV *json_true, *json_false;
79 82
83enum {
84 INCR_M_WS = 0, // initial whitespace skipping, must be 0
85 INCR_M_STR, // inside string
86 INCR_M_BS, // inside backslash
87 INCR_M_JSON // outside anything, count nesting
88};
89
90#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON)
91
80typedef struct { 92typedef struct {
81 U32 flags; 93 U32 flags;
82 SV *cb_object; 94 SV *cb_object;
83 HV *cb_sk_object; 95 HV *cb_sk_object;
96
97 // for the incremental parser
98 SV *incr_text; // the source text so far
99 STRLEN incr_pos; // the current offset into the text
100 int incr_nest; // {[]}-nesting level
101 int incr_mode;
84} JSON; 102} JSON;
85 103
86///////////////////////////////////////////////////////////////////////////// 104/////////////////////////////////////////////////////////////////////////////
87// utility functions 105// utility functions
88 106
123// this function takes advantage of this fact, although current gccs 141// this function takes advantage of this fact, although current gccs
124// seem to optimise the check for >= 0x80 away anyways 142// seem to optimise the check for >= 0x80 away anyways
125INLINE unsigned char * 143INLINE unsigned char *
126encode_utf8 (unsigned char *s, UV ch) 144encode_utf8 (unsigned char *s, UV ch)
127{ 145{
128 if (ch <= 0x7FF) 146 if (expect_false (ch < 0x000080))
129 { 147 *s++ = ch;
130 *s++ = (ch >> 6) | 0xc0; 148 else if (expect_true (ch < 0x000800))
131 *s++ = (ch & 0x3f) | 0x80; 149 *s++ = 0xc0 | ( ch >> 6),
132 } 150 *s++ = 0x80 | ( ch & 0x3f);
133 else 151 else if ( ch < 0x010000)
134 s = uvuni_to_utf8_flags (s, ch, 0); 152 *s++ = 0xe0 | ( ch >> 12),
153 *s++ = 0x80 | ((ch >> 6) & 0x3f),
154 *s++ = 0x80 | ( ch & 0x3f);
155 else if ( ch < 0x110000)
156 *s++ = 0xf0 | ( ch >> 18),
157 *s++ = 0x80 | ((ch >> 12) & 0x3f),
158 *s++ = 0x80 | ((ch >> 6) & 0x3f),
159 *s++ = 0x80 | ( ch & 0x3f);
135 160
136 return s; 161 return s;
137} 162}
138 163
139///////////////////////////////////////////////////////////////////////////// 164/////////////////////////////////////////////////////////////////////////////
227 clen = 1; 252 clen = 1;
228 } 253 }
229 254
230 if (uch < 0x80/*0x20*/ || uch >= enc->limit) 255 if (uch < 0x80/*0x20*/ || uch >= enc->limit)
231 { 256 {
232 if (uch > 0xFFFFUL) 257 if (uch >= 0x10000UL)
233 { 258 {
234 if (uch > 0x10FFFFUL) 259 if (uch >= 0x110000UL)
235 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); 260 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
236 261
237 need (enc, len += 11); 262 need (enc, len += 11);
238 sprintf (enc->cur, "\\u%04x\\u%04x", 263 sprintf (enc->cur, "\\u%04x\\u%04x",
239 (int)((uch - 0x10000) / 0x400 + 0xD800), 264 (int)((uch - 0x10000) / 0x400 + 0xD800),
413 438
414static void 439static void
415encode_hv (enc_t *enc, HV *hv) 440encode_hv (enc_t *enc, HV *hv)
416{ 441{
417 HE *he; 442 HE *he;
418 int count;
419 443
420 if (enc->indent >= enc->maxdepth) 444 if (enc->indent >= enc->maxdepth)
421 croak ("data structure too deep (hit recursion limit)"); 445 croak ("data structure too deep (hit recursion limit)");
422 446
423 encode_ch (enc, '{'); 447 encode_ch (enc, '{');
638 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); 662 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
639 enc->cur += strlen (enc->cur); 663 enc->cur += strlen (enc->cur);
640 } 664 }
641 else if (SvIOKp (sv)) 665 else if (SvIOKp (sv))
642 { 666 {
643 // we assume we can always read an IV as a UV 667 // we assume we can always read an IV as a UV and vice versa
644 if (SvUV (sv) & ~(UV)0x7fff) 668 // we assume two's complement
645 { 669 // we assume no aliasing issues in the union
646 // large integer, use the (rather slow) snprintf way. 670 if (SvIsUV (sv) ? SvUVX (sv) <= 59000
647 need (enc, sizeof (UV) * 3); 671 : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
648 enc->cur +=
649 SvIsUV(sv)
650 ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
651 : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
652 }
653 else
654 { 672 {
655 // optimise the "small number case" 673 // optimise the "small number case"
656 // code will likely be branchless and use only a single multiplication 674 // code will likely be branchless and use only a single multiplication
675 // works for numbers up to 59074
657 I32 i = SvIV (sv); 676 I32 i = SvIVX (sv);
658 U32 u; 677 U32 u;
659 char digit, nz = 0; 678 char digit, nz = 0;
660 679
661 need (enc, 6); 680 need (enc, 6);
662 681
668 687
669 // now output digit by digit, each time masking out the integer part 688 // now output digit by digit, each time masking out the integer part
670 // and multiplying by 5 while moving the decimal point one to the right, 689 // and multiplying by 5 while moving the decimal point one to the right,
671 // resulting in a net multiplication by 10. 690 // resulting in a net multiplication by 10.
672 // we always write the digit to memory but conditionally increment 691 // we always write the digit to memory but conditionally increment
673 // the pointer, to ease the usage of conditional move instructions. 692 // the pointer, to enable the use of conditional move instructions.
674 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; 693 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5;
675 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; 694 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5;
676 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; 695 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5;
677 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; 696 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5;
678 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' 697 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
698 }
699 else
700 {
701 // large integer, use the (rather slow) snprintf way.
702 need (enc, IVUV_MAXCHARS);
703 enc->cur +=
704 SvIsUV(sv)
705 ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
706 : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
679 } 707 }
680 } 708 }
681 else if (SvROK (sv)) 709 else if (SvROK (sv))
682 encode_rv (enc, SvRV (sv)); 710 encode_rv (enc, SvRV (sv));
683 else if (!SvOK (sv)) 711 else if (!SvOK (sv))
701 enc.end = SvEND (enc.sv); 729 enc.end = SvEND (enc.sv);
702 enc.indent = 0; 730 enc.indent = 0;
703 enc.maxdepth = DEC_DEPTH (enc.json.flags); 731 enc.maxdepth = DEC_DEPTH (enc.json.flags);
704 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL 732 enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
705 : enc.json.flags & F_LATIN1 ? 0x000100UL 733 : enc.json.flags & F_LATIN1 ? 0x000100UL
706 : 0x10FFFFUL; 734 : 0x110000UL;
707 735
708 SvPOK_only (enc.sv); 736 SvPOK_only (enc.sv);
709 encode_sv (&enc, scalar); 737 encode_sv (&enc, scalar);
710 738
711 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 739 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
889 default: 917 default:
890 --dec_cur; 918 --dec_cur;
891 ERR ("illegal backslash escape sequence in string"); 919 ERR ("illegal backslash escape sequence in string");
892 } 920 }
893 } 921 }
894 else if (expect_true (ch >= 0x20 && ch <= 0x7f)) 922 else if (expect_true (ch >= 0x20 && ch < 0x80))
895 *cur++ = ch; 923 *cur++ = ch;
896 else if (ch >= 0x80) 924 else if (ch >= 0x80)
897 { 925 {
898 STRLEN clen; 926 STRLEN clen;
899 UV uch; 927 UV uch;
1157 char *p = dec->cur; 1185 char *p = dec->cur;
1158 char *e = p + 24; // only try up to 24 bytes 1186 char *e = p + 24; // only try up to 24 bytes
1159 1187
1160 for (;;) 1188 for (;;)
1161 { 1189 {
1162 // the >= 0x80 is true on most architectures 1190 // the >= 0x80 is false on most architectures
1163 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') 1191 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\')
1164 { 1192 {
1165 // slow path, back up and use decode_str 1193 // slow path, back up and use decode_str
1166 SV *key = decode_str (dec); 1194 SV *key = decode_str (dec);
1167 if (!key) 1195 if (!key)
1359fail: 1387fail:
1360 return 0; 1388 return 0;
1361} 1389}
1362 1390
1363static SV * 1391static SV *
1364decode_json (SV *string, JSON *json, UV *offset_return) 1392decode_json (SV *string, JSON *json, STRLEN *offset_return)
1365{ 1393{
1366 dec_t dec; 1394 dec_t dec;
1367 UV offset; 1395 STRLEN offset;
1368 SV *sv; 1396 SV *sv;
1369 1397
1370 SvGETMAGIC (string); 1398 SvGETMAGIC (string);
1371 SvUPGRADE (string, SVt_PV); 1399 SvUPGRADE (string, SVt_PV);
1372 1400
1442 1470
1443 if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv)) 1471 if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))
1444 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); 1472 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
1445 1473
1446 return sv; 1474 return sv;
1475}
1476
1477/////////////////////////////////////////////////////////////////////////////
1478// incremental parser
1479
1480static void
1481incr_parse (JSON *self)
1482{
1483 const char *p = SvPVX (self->incr_text) + self->incr_pos;
1484
1485 for (;;)
1486 {
1487 //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D
1488 switch (self->incr_mode)
1489 {
1490 // only used for intiial whitespace skipping
1491 case INCR_M_WS:
1492 for (;;)
1493 {
1494 if (*p > 0x20)
1495 {
1496 self->incr_mode = INCR_M_JSON;
1497 goto incr_m_json;
1498 }
1499 else if (!*p)
1500 goto interrupt;
1501
1502 ++p;
1503 }
1504
1505 // skip a single char inside a string (for \\-processing)
1506 case INCR_M_BS:
1507 if (!*p)
1508 goto interrupt;
1509
1510 ++p;
1511 self->incr_mode = INCR_M_STR;
1512 goto incr_m_str;
1513
1514 // inside a string
1515 case INCR_M_STR:
1516 incr_m_str:
1517 for (;;)
1518 {
1519 if (*p == '"')
1520 {
1521 ++p;
1522 self->incr_mode = INCR_M_JSON;
1523
1524 if (!self->incr_nest)
1525 goto interrupt;
1526
1527 goto incr_m_json;
1528 }
1529 else if (*p == '\\')
1530 {
1531 ++p; // "virtually" consumes character after \
1532
1533 if (!*p) // if at end of string we have to switch modes
1534 {
1535 self->incr_mode = INCR_M_BS;
1536 goto interrupt;
1537 }
1538 }
1539 else if (!*p)
1540 goto interrupt;
1541
1542 ++p;
1543 }
1544
1545 // after initial ws, outside string
1546 case INCR_M_JSON:
1547 incr_m_json:
1548 for (;;)
1549 {
1550 switch (*p++)
1551 {
1552 case 0:
1553 --p;
1554 goto interrupt;
1555
1556 case 0x09:
1557 case 0x0a:
1558 case 0x0d:
1559 case 0x20:
1560 if (!self->incr_nest)
1561 {
1562 --p; // do not eat the whitespace, let the next round do it
1563 goto interrupt;
1564 }
1565 break;
1566
1567 case '"':
1568 self->incr_mode = INCR_M_STR;
1569 goto incr_m_str;
1570
1571 case '[':
1572 case '{':
1573 ++self->incr_nest;
1574 break;
1575
1576 case ']':
1577 case '}':
1578 if (!--self->incr_nest)
1579 goto interrupt;
1580 }
1581 }
1582 }
1583
1584 modechange:
1585 ;
1586 }
1587
1588interrupt:
1589 self->incr_pos = p - SvPVX (self->incr_text);
1590 //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
1447} 1591}
1448 1592
1449///////////////////////////////////////////////////////////////////////////// 1593/////////////////////////////////////////////////////////////////////////////
1450// XS interface functions 1594// XS interface functions
1451 1595
1614 XPUSHs (decode_json (jsonstr, self, 0)); 1758 XPUSHs (decode_json (jsonstr, self, 0));
1615 1759
1616void decode_prefix (JSON *self, SV *jsonstr) 1760void decode_prefix (JSON *self, SV *jsonstr)
1617 PPCODE: 1761 PPCODE:
1618{ 1762{
1619 UV offset; 1763 STRLEN offset;
1620 EXTEND (SP, 2); 1764 EXTEND (SP, 2);
1621 PUSHs (decode_json (jsonstr, self, &offset)); 1765 PUSHs (decode_json (jsonstr, self, &offset));
1622 PUSHs (sv_2mortal (newSVuv (offset))); 1766 PUSHs (sv_2mortal (newSVuv (offset)));
1767}
1768
1769void incr_parse (JSON *self, SV *jsonstr = 0)
1770 PPCODE:
1771{
1772 if (!self->incr_text)
1773 self->incr_text = newSVpvn ("", 0);
1774
1775 // append data, if any
1776 if (jsonstr)
1777 {
1778 if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text))
1779 {
1780 /* utf-8-ness differs, need to upgrade */
1781 sv_utf8_upgrade (self->incr_text);
1782
1783 if (self->incr_pos)
1784 self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
1785 - (U8 *)SvPVX (self->incr_text);
1786 }
1787
1788 {
1789 STRLEN len;
1790 const char *str = SvPV (jsonstr, len);
1791 SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1);
1792 Move (str, SvEND (self->incr_text), len, char);
1793 SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
1794 *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
1795 }
1796 }
1797
1798 if (GIMME_V != G_VOID)
1799 do
1800 {
1801 STRLEN offset;
1802
1803 incr_parse (self);
1804
1805 if (!INCR_DONE (self))
1806 break;
1807
1808 XPUSHs (decode_json (self->incr_text, self, &offset));
1809
1810 sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset);
1811 self->incr_pos -= offset;
1812 self->incr_nest = 0;
1813 self->incr_mode = 0;
1814 }
1815 while (GIMME_V == G_ARRAY);
1816}
1817
1818SV *incr_text (JSON *self)
1819 ATTRS: lvalue
1820 CODE:
1821{
1822 if (self->incr_pos)
1823 croak ("incr_text can only be called after a successful incr_parse call in scalar context");
1824
1825 RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
1826}
1827 OUTPUT:
1828 RETVAL
1829
1830void incr_skip (JSON *self)
1831 CODE:
1832{
1833 if (self->incr_pos)
1834 {
1835 sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos);
1836 self->incr_pos = 0;
1837 self->incr_nest = 0;
1838 self->incr_mode = 0;
1839 }
1623} 1840}
1624 1841
1625void DESTROY (JSON *self) 1842void DESTROY (JSON *self)
1626 CODE: 1843 CODE:
1627 SvREFCNT_dec (self->cb_sk_object); 1844 SvREFCNT_dec (self->cb_sk_object);
1628 SvREFCNT_dec (self->cb_object); 1845 SvREFCNT_dec (self->cb_object);
1846 SvREFCNT_dec (self->incr_text);
1629 1847
1630PROTOTYPES: ENABLE 1848PROTOTYPES: ENABLE
1631 1849
1632void encode_json (SV *scalar) 1850void encode_json (SV *scalar)
1851 ALIAS:
1852 to_json_ = 0
1853 encode_json = F_UTF8
1633 PPCODE: 1854 PPCODE:
1634{ 1855{
1635 JSON json = { F_DEFAULT | F_UTF8 }; 1856 JSON json = { F_DEFAULT | ix };
1636 XPUSHs (encode_json (scalar, &json)); 1857 XPUSHs (encode_json (scalar, &json));
1637} 1858}
1638 1859
1639void decode_json (SV *jsonstr) 1860void decode_json (SV *jsonstr)
1861 ALIAS:
1862 from_json_ = 0
1863 decode_json = F_UTF8
1640 PPCODE: 1864 PPCODE:
1641{ 1865{
1642 JSON json = { F_DEFAULT | F_UTF8 }; 1866 JSON json = { F_DEFAULT | ix };
1643 XPUSHs (decode_json (jsonstr, &json, 0)); 1867 XPUSHs (decode_json (jsonstr, &json, 0));
1644} 1868}
1645 1869
1870

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines