… | |
… | |
50 | #define F_ALLOW_TAGS 0x00004000UL |
50 | #define F_ALLOW_TAGS 0x00004000UL |
51 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
51 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
52 | |
52 | |
53 | #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER |
53 | #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER |
54 | |
54 | |
55 | #define INIT_SIZE 32 // initial scalar size to be allocated |
55 | #define INIT_SIZE 64 // initial scalar size to be allocated |
56 | #define INDENT_STEP 3 // spaces per indentation level |
56 | #define INDENT_STEP 3 // spaces per indentation level |
57 | |
57 | |
58 | #define SHORT_STRING_LEN 16384 // special-case strings of up to this size |
58 | #define SHORT_STRING_LEN 16384 // special-case strings of up to this size |
59 | |
59 | |
60 | #define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) |
60 | #define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) |
… | |
… | |
96 | static SV *bool_false, *bool_true; |
96 | static SV *bool_false, *bool_true; |
97 | static SV *sv_json; |
97 | static SV *sv_json; |
98 | |
98 | |
99 | enum { |
99 | enum { |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
|
|
101 | INCR_M_TFN, // inside true/false/null |
|
|
102 | INCR_M_NUM, // inside number |
101 | INCR_M_STR, // inside string |
103 | INCR_M_STR, // inside string |
102 | INCR_M_BS, // inside backslash |
104 | INCR_M_BS, // inside backslash |
103 | INCR_M_C0, // inside comment in initial whitespace sequence |
105 | INCR_M_C0, // inside comment in initial whitespace sequence |
104 | INCR_M_C1, // inside comment in other places |
106 | INCR_M_C1, // inside comment in other places |
105 | INCR_M_JSON // outside anything, count nesting |
107 | INCR_M_JSON // outside anything, count nesting |
… | |
… | |
125 | } JSON; |
127 | } JSON; |
126 | |
128 | |
127 | INLINE void |
129 | INLINE void |
128 | json_init (JSON *json) |
130 | json_init (JSON *json) |
129 | { |
131 | { |
130 | Zero (json, 1, JSON); |
132 | static const JSON init = { F_ALLOW_NONREF, 512 }; |
131 | json->max_depth = 512; |
133 | |
|
|
134 | *json = init; |
132 | } |
135 | } |
133 | |
136 | |
134 | ///////////////////////////////////////////////////////////////////////////// |
137 | ///////////////////////////////////////////////////////////////////////////// |
135 | // utility functions |
138 | // utility functions |
136 | |
139 | |
… | |
… | |
788 | { |
791 | { |
789 | int count; |
792 | int count; |
790 | dSP; |
793 | dSP; |
791 | |
794 | |
792 | ENTER; SAVETMPS; |
795 | ENTER; SAVETMPS; |
793 | SAVESTACK_POS (); |
|
|
794 | PUSHMARK (SP); |
796 | PUSHMARK (SP); |
795 | EXTEND (SP, 2); |
797 | EXTEND (SP, 2); |
796 | // we re-bless the reference to get overload and other niceties right |
798 | // we re-bless the reference to get overload and other niceties right |
797 | PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); |
799 | PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); |
798 | PUSHs (sv_json); |
800 | PUSHs (sv_json); |
… | |
… | |
810 | encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); |
812 | encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); |
811 | encode_ch (enc, '"'); |
813 | encode_ch (enc, '"'); |
812 | encode_ch (enc, ')'); |
814 | encode_ch (enc, ')'); |
813 | encode_ch (enc, '['); |
815 | encode_ch (enc, '['); |
814 | |
816 | |
815 | while (count) |
817 | if (count) |
816 | { |
818 | { |
|
|
819 | int i; |
|
|
820 | |
|
|
821 | for (i = 0; i < count - 1; ++i) |
|
|
822 | { |
817 | encode_sv (enc, SP[1 - count--]); |
823 | encode_sv (enc, SP[i + 1 - count]); |
818 | |
|
|
819 | if (count) |
|
|
820 | encode_ch (enc, ','); |
824 | encode_ch (enc, ','); |
|
|
825 | } |
|
|
826 | |
|
|
827 | encode_sv (enc, TOPs); |
|
|
828 | SP -= count; |
821 | } |
829 | } |
822 | |
830 | |
823 | encode_ch (enc, ']'); |
831 | encode_ch (enc, ']'); |
824 | |
832 | |
825 | FREETMPS; LEAVE; |
833 | FREETMPS; LEAVE; |
… | |
… | |
1496 | |
1504 | |
1497 | DEC_DEC_DEPTH; |
1505 | DEC_DEC_DEPTH; |
1498 | sv = newRV_noinc ((SV *)hv); |
1506 | sv = newRV_noinc ((SV *)hv); |
1499 | |
1507 | |
1500 | // check filter callbacks |
1508 | // check filter callbacks |
1501 | if (dec->json.flags & F_HOOK) |
1509 | if (expect_false (dec->json.flags & F_HOOK)) |
1502 | { |
1510 | { |
1503 | if (dec->json.cb_sk_object && HvKEYS (hv) == 1) |
1511 | if (dec->json.cb_sk_object && HvKEYS (hv) == 1) |
1504 | { |
1512 | { |
1505 | HE *cb, *he; |
1513 | HE *cb, *he; |
1506 | |
1514 | |
… | |
… | |
1516 | { |
1524 | { |
1517 | dSP; |
1525 | dSP; |
1518 | int count; |
1526 | int count; |
1519 | |
1527 | |
1520 | ENTER; SAVETMPS; |
1528 | ENTER; SAVETMPS; |
1521 | SAVESTACK_POS (); |
|
|
1522 | PUSHMARK (SP); |
1529 | PUSHMARK (SP); |
1523 | XPUSHs (HeVAL (he)); |
1530 | XPUSHs (HeVAL (he)); |
1524 | sv_2mortal (sv); |
1531 | sv_2mortal (sv); |
1525 | |
1532 | |
1526 | PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; |
1533 | PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; |
… | |
… | |
1529 | { |
1536 | { |
1530 | sv = newSVsv (POPs); |
1537 | sv = newSVsv (POPs); |
1531 | FREETMPS; LEAVE; |
1538 | FREETMPS; LEAVE; |
1532 | return sv; |
1539 | return sv; |
1533 | } |
1540 | } |
|
|
1541 | else if (count) |
|
|
1542 | croak ("filter_json_single_key_object callbacks must not return more than one scalar"); |
1534 | |
1543 | |
1535 | SvREFCNT_inc (sv); |
1544 | SvREFCNT_inc (sv); |
1536 | FREETMPS; LEAVE; |
1545 | FREETMPS; LEAVE; |
1537 | } |
1546 | } |
1538 | } |
1547 | } |
… | |
… | |
1541 | { |
1550 | { |
1542 | dSP; |
1551 | dSP; |
1543 | int count; |
1552 | int count; |
1544 | |
1553 | |
1545 | ENTER; SAVETMPS; |
1554 | ENTER; SAVETMPS; |
1546 | SAVESTACK_POS (); |
|
|
1547 | PUSHMARK (SP); |
1555 | PUSHMARK (SP); |
1548 | XPUSHs (sv_2mortal (sv)); |
1556 | XPUSHs (sv_2mortal (sv)); |
1549 | |
1557 | |
1550 | PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; |
1558 | PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; |
1551 | |
1559 | |
1552 | if (count == 1) |
1560 | if (count == 1) |
1553 | { |
|
|
1554 | sv = newSVsv (POPs); |
1561 | sv = newSVsv (POPs); |
1555 | FREETMPS; LEAVE; |
1562 | else if (count == 0) |
1556 | return sv; |
|
|
1557 | } |
|
|
1558 | |
|
|
1559 | SvREFCNT_inc (sv); |
1563 | SvREFCNT_inc (sv); |
|
|
1564 | else |
|
|
1565 | croak ("filter_json_object callbacks must not return more than one scalar"); |
|
|
1566 | |
1560 | FREETMPS; LEAVE; |
1567 | FREETMPS; LEAVE; |
1561 | } |
1568 | } |
1562 | } |
1569 | } |
1563 | |
1570 | |
1564 | return sv; |
1571 | return sv; |
… | |
… | |
1787 | else if (sv) |
1794 | else if (sv) |
1788 | { |
1795 | { |
1789 | // check for trailing garbage |
1796 | // check for trailing garbage |
1790 | decode_ws (&dec); |
1797 | decode_ws (&dec); |
1791 | |
1798 | |
1792 | if (*dec.cur) |
1799 | if (dec.cur != dec.end) |
1793 | { |
1800 | { |
1794 | dec.err = "garbage after JSON object"; |
1801 | dec.err = "garbage after JSON object"; |
1795 | SvREFCNT_dec (sv); |
1802 | SvREFCNT_dec (sv); |
1796 | sv = 0; |
1803 | sv = 0; |
1797 | } |
1804 | } |
… | |
… | |
1835 | // the state machine here is a bit convoluted and could be simplified a lot |
1842 | // the state machine here is a bit convoluted and could be simplified a lot |
1836 | // but this would make it slower, so... |
1843 | // but this would make it slower, so... |
1837 | |
1844 | |
1838 | for (;;) |
1845 | for (;;) |
1839 | { |
1846 | { |
1840 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D |
|
|
1841 | switch (self->incr_mode) |
1847 | switch (self->incr_mode) |
1842 | { |
1848 | { |
|
|
1849 | // reached end of a scalar, see if we are inside a nested structure or not |
|
|
1850 | end_of_scalar: |
|
|
1851 | self->incr_mode = INCR_M_JSON; |
|
|
1852 | |
|
|
1853 | if (self->incr_nest) // end of a scalar inside array, object or tag |
|
|
1854 | goto incr_m_json; |
|
|
1855 | else // end of scalar outside structure, json text ends here |
|
|
1856 | goto interrupt; |
|
|
1857 | |
1843 | // only used for initial whitespace skipping |
1858 | // only used for initial whitespace skipping |
1844 | case INCR_M_WS: |
1859 | case INCR_M_WS: |
1845 | for (;;) |
1860 | for (;;) |
1846 | { |
1861 | { |
1847 | if (*p > 0x20) |
1862 | if (*p > 0x20) |
… | |
… | |
1889 | ++p; |
1904 | ++p; |
1890 | } |
1905 | } |
1891 | |
1906 | |
1892 | break; |
1907 | break; |
1893 | |
1908 | |
|
|
1909 | // inside true/false/null |
|
|
1910 | case INCR_M_TFN: |
|
|
1911 | incr_m_tfn: |
|
|
1912 | for (;;) |
|
|
1913 | switch (*p++) |
|
|
1914 | { |
|
|
1915 | case 'r': case 'u': case 'e': // tRUE, falsE, nUll |
|
|
1916 | case 'a': case 'l': case 's': // fALSe, nuLL |
|
|
1917 | // allowed |
|
|
1918 | break; |
|
|
1919 | |
|
|
1920 | default: |
|
|
1921 | --p; |
|
|
1922 | goto end_of_scalar; |
|
|
1923 | } |
|
|
1924 | |
|
|
1925 | // inside a number |
|
|
1926 | case INCR_M_NUM: |
|
|
1927 | incr_m_num: |
|
|
1928 | for (;;) |
|
|
1929 | switch (*p++) |
|
|
1930 | { |
|
|
1931 | case 'e': case 'E': case '.': case '+': |
|
|
1932 | case '-': |
|
|
1933 | case '0': case '1': case '2': case '3': case '4': |
|
|
1934 | case '5': case '6': case '7': case '8': case '9': |
|
|
1935 | // allowed |
|
|
1936 | break; |
|
|
1937 | |
|
|
1938 | default: |
|
|
1939 | --p; |
|
|
1940 | goto end_of_scalar; |
|
|
1941 | } |
|
|
1942 | |
1894 | // inside a string |
1943 | // inside a string |
1895 | case INCR_M_STR: |
1944 | case INCR_M_STR: |
1896 | incr_m_str: |
1945 | incr_m_str: |
1897 | for (;;) |
1946 | for (;;) |
1898 | { |
1947 | { |
1899 | if (*p == '"') |
1948 | if (*p == '"') |
1900 | { |
1949 | { |
1901 | ++p; |
1950 | ++p; |
1902 | self->incr_mode = INCR_M_JSON; |
|
|
1903 | |
|
|
1904 | if (!self->incr_nest) |
|
|
1905 | goto interrupt; |
|
|
1906 | |
|
|
1907 | goto incr_m_json; |
1951 | goto end_of_scalar; |
1908 | } |
1952 | } |
1909 | else if (*p == '\\') |
1953 | else if (*p == '\\') |
1910 | { |
1954 | { |
1911 | ++p; // "virtually" consumes character after \ |
1955 | ++p; // "virtually" consumes character after \ |
1912 | |
1956 | |
… | |
… | |
1941 | { |
1985 | { |
1942 | --p; // do not eat the whitespace, let the next round do it |
1986 | --p; // do not eat the whitespace, let the next round do it |
1943 | goto interrupt; |
1987 | goto interrupt; |
1944 | } |
1988 | } |
1945 | break; |
1989 | break; |
|
|
1990 | |
|
|
1991 | // the following three blocks handle scalars. this makes the parser |
|
|
1992 | // more strict than required inside arrays or objects, and could |
|
|
1993 | // be moved to a special case on the toplevel (except strings) |
|
|
1994 | case 't': |
|
|
1995 | case 'f': |
|
|
1996 | case 'n': |
|
|
1997 | self->incr_mode = INCR_M_TFN; |
|
|
1998 | goto incr_m_tfn; |
|
|
1999 | |
|
|
2000 | case '-': |
|
|
2001 | case '0': case '1': case '2': case '3': case '4': |
|
|
2002 | case '5': case '6': case '7': case '8': case '9': |
|
|
2003 | self->incr_mode = INCR_M_NUM; |
|
|
2004 | goto incr_m_num; |
1946 | |
2005 | |
1947 | case '"': |
2006 | case '"': |
1948 | self->incr_mode = INCR_M_STR; |
2007 | self->incr_mode = INCR_M_STR; |
1949 | goto incr_m_str; |
2008 | goto incr_m_str; |
1950 | |
2009 | |
… | |
… | |
2147 | XPUSHs (ST (0)); |
2206 | XPUSHs (ST (0)); |
2148 | } |
2207 | } |
2149 | |
2208 | |
2150 | void encode (JSON *self, SV *scalar) |
2209 | void encode (JSON *self, SV *scalar) |
2151 | PPCODE: |
2210 | PPCODE: |
2152 | PUTBACK; scalar = encode_json (scalar, self); SPAGAIN; |
2211 | PUTBACK; XPUSHs (encode_json (scalar, self)); |
2153 | XPUSHs (scalar); |
|
|
2154 | |
2212 | |
2155 | void decode (JSON *self, SV *jsonstr) |
2213 | void decode (JSON *self, SV *jsonstr) |
2156 | PPCODE: |
2214 | PPCODE: |
2157 | PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN; |
2215 | PUTBACK; XPUSHs (decode_json (jsonstr, self, 0)); |
2158 | XPUSHs (jsonstr); |
|
|
2159 | |
2216 | |
2160 | void decode_prefix (JSON *self, SV *jsonstr) |
2217 | void decode_prefix (JSON *self, SV *jsonstr) |
2161 | PPCODE: |
2218 | PPCODE: |
2162 | { |
2219 | { |
2163 | SV *sv; |
2220 | SV *sv; |
2164 | STRLEN offset; |
2221 | STRLEN offset; |
2165 | PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; |
2222 | PUTBACK; sv = decode_json (jsonstr, self, &offset); |
2166 | EXTEND (SP, 2); |
2223 | EXTEND (SP, 2); |
2167 | PUSHs (sv); |
2224 | PUSHs (sv); |
2168 | PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, SvPV_nolen (jsonstr) + offset)))); |
2225 | PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, SvPV_nolen (jsonstr) + offset)))); |
2169 | } |
2226 | } |
2170 | |
2227 | |
… | |
… | |
2243 | |
2300 | |
2244 | break; |
2301 | break; |
2245 | } |
2302 | } |
2246 | } |
2303 | } |
2247 | |
2304 | |
2248 | PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; |
2305 | PUTBACK; sv = decode_json (self->incr_text, self, &offset); |
2249 | XPUSHs (sv); |
2306 | XPUSHs (sv); |
2250 | |
2307 | |
2251 | self->incr_pos -= offset; |
2308 | self->incr_pos -= offset; |
2252 | self->incr_nest = 0; |
2309 | self->incr_nest = 0; |
2253 | self->incr_mode = 0; |
2310 | self->incr_mode = 0; |
… | |
… | |
2305 | PPCODE: |
2362 | PPCODE: |
2306 | { |
2363 | { |
2307 | JSON json; |
2364 | JSON json; |
2308 | json_init (&json); |
2365 | json_init (&json); |
2309 | json.flags |= F_UTF8; |
2366 | json.flags |= F_UTF8; |
2310 | PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN; |
2367 | PUTBACK; XPUSHs (encode_json (scalar, &json)); |
2311 | XPUSHs (scalar); |
|
|
2312 | } |
2368 | } |
2313 | |
2369 | |
2314 | void decode_json (SV *jsonstr) |
2370 | void decode_json (SV *jsonstr) |
2315 | PPCODE: |
2371 | PPCODE: |
2316 | { |
2372 | { |
2317 | JSON json; |
2373 | JSON json; |
2318 | json_init (&json); |
2374 | json_init (&json); |
2319 | json.flags |= F_UTF8; |
2375 | json.flags |= F_UTF8; |
2320 | PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN; |
2376 | PUTBACK; XPUSHs (decode_json (jsonstr, &json, 0)); |
2321 | XPUSHs (jsonstr); |
|
|
2322 | } |
2377 | } |
2323 | |
2378 | |