ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.131 by root, Thu Aug 17 01:42:20 2017 UTC vs.
Revision 1.138 by root, Wed Mar 6 07:21:17 2019 UTC

50#define F_ALLOW_TAGS 0x00004000UL 50#define F_ALLOW_TAGS 0x00004000UL
51#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing 51#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing
52 52
53#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER 53#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
54 54
55#define INIT_SIZE 32 // initial scalar size to be allocated 55#define INIT_SIZE 64 // initial scalar size to be allocated
56#define INDENT_STEP 3 // spaces per indentation level 56#define INDENT_STEP 3 // spaces per indentation level
57 57
58#define SHORT_STRING_LEN 16384 // special-case strings of up to this size 58#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
59 59
60#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) 60#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8)
78 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) 78 <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
79 79
80#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)" 80#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)"
81 81
82#ifdef USE_ITHREADS 82#ifdef USE_ITHREADS
83# define JSON_SLOW 1
84# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) 83# define JSON_STASH (expect_true (json_stash) ? json_stash : gv_stashpv ("JSON::XS", 1))
85# define BOOL_STASH (bool_stash ? bool_stash : gv_stashpv ("Types::Serialiser::Boolean", 1)) 84# define BOOL_STASH (expect_true (bool_stash) ? bool_stash : gv_stashpv ("Types::Serialiser::Boolean", 1))
85# define GET_BOOL(value) (expect_true (bool_ ## value) ? bool_ ## value : get_bool ("Types::Serialiser::" # value))
86#else 86#else
87# define JSON_SLOW 0
88# define JSON_STASH json_stash 87# define JSON_STASH json_stash
89# define BOOL_STASH bool_stash 88# define BOOL_STASH bool_stash
89# define GET_BOOL(value) bool_ ## value
90#endif 90#endif
91 91
92// the amount of HEs to allocate on the stack, when sorting keys 92// the amount of HEs to allocate on the stack, when sorting keys
93#define STACK_HES 64 93#define STACK_HES 64
94 94
95static HV *json_stash, *bool_stash; // JSON::XS::, Types::Serialiser::Boolean:: 95static HV *json_stash, *bool_stash; // JSON::XS::, Types::Serialiser::Boolean::
96static SV *bool_true, *bool_false, *sv_json; 96static SV *bool_false, *bool_true;
97static SV *sv_json;
97 98
98enum { 99enum {
99 INCR_M_WS = 0, // initial whitespace skipping, must be 0 100 INCR_M_WS = 0, // initial whitespace skipping, must be 0
101 INCR_M_TFN, // inside true/false/null
102 INCR_M_NUM, // inside number
100 INCR_M_STR, // inside string 103 INCR_M_STR, // inside string
101 INCR_M_BS, // inside backslash 104 INCR_M_BS, // inside backslash
102 INCR_M_C0, // inside comment in initial whitespace sequence 105 INCR_M_C0, // inside comment in initial whitespace sequence
103 INCR_M_C1, // inside comment in other places 106 INCR_M_C1, // inside comment in other places
104 INCR_M_JSON // outside anything, count nesting 107 INCR_M_JSON // outside anything, count nesting
117 // for the incremental parser 120 // for the incremental parser
118 SV *incr_text; // the source text so far 121 SV *incr_text; // the source text so far
119 STRLEN incr_pos; // the current offset into the text 122 STRLEN incr_pos; // the current offset into the text
120 int incr_nest; // {[]}-nesting level 123 int incr_nest; // {[]}-nesting level
121 unsigned char incr_mode; 124 unsigned char incr_mode;
125
126 SV *v_false, *v_true;
122} JSON; 127} JSON;
123 128
124INLINE void 129INLINE void
125json_init (JSON *json) 130json_init (JSON *json)
126{ 131{
127 Zero (json, 1, JSON); 132 static const JSON init = { F_ALLOW_NONREF, 512 };
128 json->max_depth = 512; 133
134 *json = init;
129} 135}
130 136
131///////////////////////////////////////////////////////////////////////////// 137/////////////////////////////////////////////////////////////////////////////
132// utility functions 138// utility functions
133 139
180 len1 = (len1 | 4095) - 24; 186 len1 = (len1 | 4095) - 24;
181 187
182 return SvGROW (sv, len1); 188 return SvGROW (sv, len1);
183} 189}
184 190
185// decode an utf-8 character and return it, or (UV)-1 in 191// decode a utf-8 character and return it, or (UV)-1 in
186// case of an error. 192// case of an error.
187// we special-case "safe" characters from U+80 .. U+7FF, 193// we special-case "safe" characters from U+80 .. U+7FF,
188// but use the very good perl function to parse anything else. 194// but use the very good perl function to parse anything else.
189// note that we never call this function for a ascii codepoints 195// note that we never call this function for a ascii codepoints
190INLINE UV 196INLINE UV
776 { 782 {
777 HV *stash = SvSTASH (sv); 783 HV *stash = SvSTASH (sv);
778 784
779 if (stash == bool_stash) 785 if (stash == bool_stash)
780 { 786 {
781 if (SvIV (sv))
782 encode_str (enc, "true", 4, 0); 787 if (SvIV (sv)) encode_str (enc, "true" , 4, 0);
783 else
784 encode_str (enc, "false", 5, 0); 788 else encode_str (enc, "false", 5, 0);
785 } 789 }
786 else if ((enc->json.flags & F_ALLOW_TAGS) && (method = gv_fetchmethod_autoload (stash, "FREEZE", 0))) 790 else if ((enc->json.flags & F_ALLOW_TAGS) && (method = gv_fetchmethod_autoload (stash, "FREEZE", 0)))
787 { 791 {
788 int count; 792 int count;
789 dSP; 793 dSP;
790 794
791 ENTER; SAVETMPS; 795 ENTER; SAVETMPS;
792 SAVESTACK_POS ();
793 PUSHMARK (SP); 796 PUSHMARK (SP);
794 EXTEND (SP, 2); 797 EXTEND (SP, 2);
795 // we re-bless the reference to get overload and other niceties right 798 // we re-bless the reference to get overload and other niceties right
796 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); 799 PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
797 PUSHs (sv_json); 800 PUSHs (sv_json);
809 encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); 812 encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash));
810 encode_ch (enc, '"'); 813 encode_ch (enc, '"');
811 encode_ch (enc, ')'); 814 encode_ch (enc, ')');
812 encode_ch (enc, '['); 815 encode_ch (enc, '[');
813 816
814 while (count) 817 if (count)
815 { 818 {
819 int i;
820
821 for (i = 0; i < count - 1; ++i)
822 {
816 encode_sv (enc, SP[1 - count--]); 823 encode_sv (enc, SP[i + 1 - count]);
817
818 if (count)
819 encode_ch (enc, ','); 824 encode_ch (enc, ',');
825 }
826
827 encode_sv (enc, TOPs);
828 SP -= count;
820 } 829 }
821 830
822 encode_ch (enc, ']'); 831 encode_ch (enc, ']');
823 832
824 FREETMPS; LEAVE; 833 FREETMPS; LEAVE;
1495 1504
1496 DEC_DEC_DEPTH; 1505 DEC_DEC_DEPTH;
1497 sv = newRV_noinc ((SV *)hv); 1506 sv = newRV_noinc ((SV *)hv);
1498 1507
1499 // check filter callbacks 1508 // check filter callbacks
1500 if (dec->json.flags & F_HOOK) 1509 if (expect_false (dec->json.flags & F_HOOK))
1501 { 1510 {
1502 if (dec->json.cb_sk_object && HvKEYS (hv) == 1) 1511 if (dec->json.cb_sk_object && HvKEYS (hv) == 1)
1503 { 1512 {
1504 HE *cb, *he; 1513 HE *cb, *he;
1505 1514
1515 { 1524 {
1516 dSP; 1525 dSP;
1517 int count; 1526 int count;
1518 1527
1519 ENTER; SAVETMPS; 1528 ENTER; SAVETMPS;
1520 SAVESTACK_POS ();
1521 PUSHMARK (SP); 1529 PUSHMARK (SP);
1522 XPUSHs (HeVAL (he)); 1530 XPUSHs (HeVAL (he));
1523 sv_2mortal (sv); 1531 sv_2mortal (sv);
1524 1532
1525 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; 1533 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
1528 { 1536 {
1529 sv = newSVsv (POPs); 1537 sv = newSVsv (POPs);
1530 FREETMPS; LEAVE; 1538 FREETMPS; LEAVE;
1531 return sv; 1539 return sv;
1532 } 1540 }
1541 else if (count)
1542 croak ("filter_json_single_key_object callbacks must not return more than one scalar");
1533 1543
1534 SvREFCNT_inc (sv); 1544 SvREFCNT_inc (sv);
1535 FREETMPS; LEAVE; 1545 FREETMPS; LEAVE;
1536 } 1546 }
1537 } 1547 }
1540 { 1550 {
1541 dSP; 1551 dSP;
1542 int count; 1552 int count;
1543 1553
1544 ENTER; SAVETMPS; 1554 ENTER; SAVETMPS;
1545 SAVESTACK_POS ();
1546 PUSHMARK (SP); 1555 PUSHMARK (SP);
1547 XPUSHs (sv_2mortal (sv)); 1556 XPUSHs (sv_2mortal (sv));
1548 1557
1549 PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; 1558 PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN;
1550 1559
1551 if (count == 1) 1560 if (count == 1)
1552 {
1553 sv = newSVsv (POPs); 1561 sv = newSVsv (POPs);
1554 FREETMPS; LEAVE; 1562 else if (count == 0)
1555 return sv;
1556 }
1557
1558 SvREFCNT_inc (sv); 1563 SvREFCNT_inc (sv);
1564 else
1565 croak ("filter_json_object callbacks must not return more than one scalar");
1566
1559 FREETMPS; LEAVE; 1567 FREETMPS; LEAVE;
1560 } 1568 }
1561 } 1569 }
1562 1570
1563 return sv; 1571 return sv;
1666 case '-': 1674 case '-':
1667 case '0': case '1': case '2': case '3': case '4': 1675 case '0': case '1': case '2': case '3': case '4':
1668 case '5': case '6': case '7': case '8': case '9': 1676 case '5': case '6': case '7': case '8': case '9':
1669 return decode_num (dec); 1677 return decode_num (dec);
1670 1678
1679 case 'f':
1680 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
1681 {
1682 dec->cur += 5;
1683
1684 if (expect_false (!dec->json.v_false))
1685 dec->json.v_false = GET_BOOL (false);
1686
1687 return newSVsv (dec->json.v_false);
1688 }
1689 else
1690 ERR ("'false' expected");
1691
1692 break;
1693
1671 case 't': 1694 case 't':
1672 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) 1695 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
1673 { 1696 {
1674 dec->cur += 4; 1697 dec->cur += 4;
1675#if JSON_SLOW 1698
1676 bool_true = get_bool ("Types::Serialiser::true"); 1699 if (expect_false (!dec->json.v_true))
1677#endif 1700 dec->json.v_true = GET_BOOL (true);
1701
1678 return newSVsv (bool_true); 1702 return newSVsv (dec->json.v_true);
1679 } 1703 }
1680 else 1704 else
1681 ERR ("'true' expected"); 1705 ERR ("'true' expected");
1682
1683 break;
1684
1685 case 'f':
1686 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
1687 {
1688 dec->cur += 5;
1689#if JSON_SLOW
1690 bool_false = get_bool ("Types::Serialiser::false");
1691#endif
1692 return newSVsv (bool_false);
1693 }
1694 else
1695 ERR ("'false' expected");
1696 1706
1697 break; 1707 break;
1698 1708
1699 case 'n': 1709 case 'n':
1700 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) 1710 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
1784 else if (sv) 1794 else if (sv)
1785 { 1795 {
1786 // check for trailing garbage 1796 // check for trailing garbage
1787 decode_ws (&dec); 1797 decode_ws (&dec);
1788 1798
1789 if (*dec.cur) 1799 if (dec.cur != dec.end)
1790 { 1800 {
1791 dec.err = "garbage after JSON object"; 1801 dec.err = "garbage after JSON object";
1792 SvREFCNT_dec (sv); 1802 SvREFCNT_dec (sv);
1793 sv = 0; 1803 sv = 0;
1794 } 1804 }
1832 // the state machine here is a bit convoluted and could be simplified a lot 1842 // the state machine here is a bit convoluted and could be simplified a lot
1833 // but this would make it slower, so... 1843 // but this would make it slower, so...
1834 1844
1835 for (;;) 1845 for (;;)
1836 { 1846 {
1837 //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D
1838 switch (self->incr_mode) 1847 switch (self->incr_mode)
1839 { 1848 {
1849 // reached end of a scalar, see if we are inside a nested structure or not
1850 end_of_scalar:
1851 self->incr_mode = INCR_M_JSON;
1852
1853 if (self->incr_nest) // end of a scalar inside array, object or tag
1854 goto incr_m_json;
1855 else // end of scalar outside structure, json text ends here
1856 goto interrupt;
1857
1840 // only used for initial whitespace skipping 1858 // only used for initial whitespace skipping
1841 case INCR_M_WS: 1859 case INCR_M_WS:
1842 for (;;) 1860 for (;;)
1843 { 1861 {
1844 if (*p > 0x20) 1862 if (*p > 0x20)
1886 ++p; 1904 ++p;
1887 } 1905 }
1888 1906
1889 break; 1907 break;
1890 1908
1909 // inside true/false/null
1910 case INCR_M_TFN:
1911 incr_m_tfn:
1912 for (;;)
1913 switch (*p++)
1914 {
1915 case 'r': case 'u': case 'e': // tRUE, falsE, nUll
1916 case 'a': case 'l': case 's': // fALSe, nuLL
1917 // allowed
1918 break;
1919
1920 default:
1921 --p;
1922 goto end_of_scalar;
1923 }
1924
1925 // inside a number
1926 case INCR_M_NUM:
1927 incr_m_num:
1928 for (;;)
1929 switch (*p++)
1930 {
1931 case 'e': case 'E': case '.': case '+':
1932 case '-':
1933 case '0': case '1': case '2': case '3': case '4':
1934 case '5': case '6': case '7': case '8': case '9':
1935 // allowed
1936 break;
1937
1938 default:
1939 --p;
1940 goto end_of_scalar;
1941 }
1942
1891 // inside a string 1943 // inside a string
1892 case INCR_M_STR: 1944 case INCR_M_STR:
1893 incr_m_str: 1945 incr_m_str:
1894 for (;;) 1946 for (;;)
1895 { 1947 {
1896 if (*p == '"') 1948 if (*p == '"')
1897 { 1949 {
1898 ++p; 1950 ++p;
1899 self->incr_mode = INCR_M_JSON;
1900
1901 if (!self->incr_nest)
1902 goto interrupt;
1903
1904 goto incr_m_json; 1951 goto end_of_scalar;
1905 } 1952 }
1906 else if (*p == '\\') 1953 else if (*p == '\\')
1907 { 1954 {
1908 ++p; // "virtually" consumes character after \ 1955 ++p; // "virtually" consumes character after \
1909 1956
1939 --p; // do not eat the whitespace, let the next round do it 1986 --p; // do not eat the whitespace, let the next round do it
1940 goto interrupt; 1987 goto interrupt;
1941 } 1988 }
1942 break; 1989 break;
1943 1990
1991 // the following three blocks handle scalars. this makes the parser
1992 // more strict than required inside arrays or objects, and could
1993 // be moved to a special case on the toplevel (except strings)
1994 case 't':
1995 case 'f':
1996 case 'n':
1997 self->incr_mode = INCR_M_TFN;
1998 goto incr_m_tfn;
1999
2000 case '-':
2001 case '0': case '1': case '2': case '3': case '4':
2002 case '5': case '6': case '7': case '8': case '9':
2003 self->incr_mode = INCR_M_NUM;
2004 goto incr_m_num;
2005
1944 case '"': 2006 case '"':
1945 self->incr_mode = INCR_M_STR; 2007 self->incr_mode = INCR_M_STR;
1946 goto incr_m_str; 2008 goto incr_m_str;
1947 2009
1948 case '[': 2010 case '[':
1995 : i >= 'A' && i <= 'F' ? i - 'A' + 10 2057 : i >= 'A' && i <= 'F' ? i - 'A' + 10
1996 : -1; 2058 : -1;
1997 2059
1998 json_stash = gv_stashpv ("JSON::XS" , 1); 2060 json_stash = gv_stashpv ("JSON::XS" , 1);
1999 bool_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); 2061 bool_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
2062 bool_false = get_bool ("Types::Serialiser::false");
2000 bool_true = get_bool ("Types::Serialiser::true"); 2063 bool_true = get_bool ("Types::Serialiser::true");
2001 bool_false = get_bool ("Types::Serialiser::false");
2002 2064
2003 sv_json = newSVpv ("JSON", 0); 2065 sv_json = newSVpv ("JSON", 0);
2004 SvREADONLY_on (sv_json); 2066 SvREADONLY_on (sv_json);
2005 2067
2006 CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */ 2068 CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */
2008 2070
2009PROTOTYPES: DISABLE 2071PROTOTYPES: DISABLE
2010 2072
2011void CLONE (...) 2073void CLONE (...)
2012 CODE: 2074 CODE:
2075 // as long as these writes are atomic, the race should not matter
2076 // as existing threads either already use 0, or use the old value,
2077 // which is sitll correct for the initial thread.
2013 json_stash = 0; 2078 json_stash = 0;
2014 bool_stash = 0; 2079 bool_stash = 0;
2080 bool_false = 0;
2081 bool_true = 0;
2015 2082
2016void new (char *klass) 2083void new (char *klass)
2017 PPCODE: 2084 PPCODE:
2018{ 2085{
2019 SV *pv = NEWSV (0, sizeof (JSON)); 2086 SV *pv = NEWSV (0, sizeof (JSON));
2022 XPUSHs (sv_2mortal (sv_bless ( 2089 XPUSHs (sv_2mortal (sv_bless (
2023 newRV_noinc (pv), 2090 newRV_noinc (pv),
2024 strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) 2091 strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
2025 ))); 2092 )));
2026} 2093}
2094
2095void boolean_values (JSON *self, SV *v_false = 0, SV *v_true = 0)
2096 PPCODE:
2097 self->v_false = newSVsv (v_false);
2098 self->v_true = newSVsv (v_true);
2099 XPUSHs (ST (0));
2100
2101void get_boolean_values (JSON *self)
2102 PPCODE:
2103 if (self->v_false && self->v_true)
2104 {
2105 EXTEND (SP, 2);
2106 PUSHs (self->v_false);
2107 PUSHs (self->v_true);
2108 }
2027 2109
2028void ascii (JSON *self, int enable = 1) 2110void ascii (JSON *self, int enable = 1)
2029 ALIAS: 2111 ALIAS:
2030 ascii = F_ASCII 2112 ascii = F_ASCII
2031 latin1 = F_LATIN1 2113 latin1 = F_LATIN1
2268 self->incr_mode = 0; 2350 self->incr_mode = 0;
2269} 2351}
2270 2352
2271void DESTROY (JSON *self) 2353void DESTROY (JSON *self)
2272 CODE: 2354 CODE:
2355 SvREFCNT_dec (self->v_false);
2356 SvREFCNT_dec (self->v_true);
2273 SvREFCNT_dec (self->cb_sk_object); 2357 SvREFCNT_dec (self->cb_sk_object);
2274 SvREFCNT_dec (self->cb_object); 2358 SvREFCNT_dec (self->cb_object);
2275 SvREFCNT_dec (self->incr_text); 2359 SvREFCNT_dec (self->incr_text);
2276 2360
2277PROTOTYPES: ENABLE 2361PROTOTYPES: ENABLE

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines