… | |
… | |
50 | #define F_ALLOW_TAGS 0x00004000UL |
50 | #define F_ALLOW_TAGS 0x00004000UL |
51 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
51 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
52 | |
52 | |
53 | #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER |
53 | #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER |
54 | |
54 | |
55 | #define INIT_SIZE 32 // initial scalar size to be allocated |
55 | #define INIT_SIZE 64 // initial scalar size to be allocated |
56 | #define INDENT_STEP 3 // spaces per indentation level |
56 | #define INDENT_STEP 3 // spaces per indentation level |
57 | |
57 | |
58 | #define SHORT_STRING_LEN 16384 // special-case strings of up to this size |
58 | #define SHORT_STRING_LEN 16384 // special-case strings of up to this size |
59 | |
59 | |
60 | #define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) |
60 | #define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) |
… | |
… | |
96 | static SV *bool_false, *bool_true; |
96 | static SV *bool_false, *bool_true; |
97 | static SV *sv_json; |
97 | static SV *sv_json; |
98 | |
98 | |
99 | enum { |
99 | enum { |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
|
|
101 | INCR_M_TFN, // inside true/false/null |
|
|
102 | INCR_M_NUM, // inside number |
101 | INCR_M_STR, // inside string |
103 | INCR_M_STR, // inside string |
102 | INCR_M_BS, // inside backslash |
104 | INCR_M_BS, // inside backslash |
103 | INCR_M_C0, // inside comment in initial whitespace sequence |
105 | INCR_M_C0, // inside comment in initial whitespace sequence |
104 | INCR_M_C1, // inside comment in other places |
106 | INCR_M_C1, // inside comment in other places |
105 | INCR_M_JSON // outside anything, count nesting |
107 | INCR_M_JSON // outside anything, count nesting |
… | |
… | |
125 | } JSON; |
127 | } JSON; |
126 | |
128 | |
127 | INLINE void |
129 | INLINE void |
128 | json_init (JSON *json) |
130 | json_init (JSON *json) |
129 | { |
131 | { |
130 | Zero (json, 1, JSON); |
132 | static const JSON init = { F_ALLOW_NONREF, 512 }; |
131 | json->max_depth = 512; |
133 | |
|
|
134 | *json = init; |
132 | } |
135 | } |
133 | |
136 | |
134 | ///////////////////////////////////////////////////////////////////////////// |
137 | ///////////////////////////////////////////////////////////////////////////// |
135 | // utility functions |
138 | // utility functions |
136 | |
139 | |
… | |
… | |
159 | #endif |
162 | #endif |
160 | } |
163 | } |
161 | } |
164 | } |
162 | |
165 | |
163 | /* adds two STRLENs together, slow, and with paranoia */ |
166 | /* adds two STRLENs together, slow, and with paranoia */ |
164 | STRLEN |
167 | static STRLEN |
165 | strlen_sum (STRLEN l1, STRLEN l2) |
168 | strlen_sum (STRLEN l1, STRLEN l2) |
166 | { |
169 | { |
167 | size_t sum = l1 + l2; |
170 | size_t sum = l1 + l2; |
168 | |
171 | |
169 | if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) |
172 | if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) |
… | |
… | |
788 | { |
791 | { |
789 | int count; |
792 | int count; |
790 | dSP; |
793 | dSP; |
791 | |
794 | |
792 | ENTER; SAVETMPS; |
795 | ENTER; SAVETMPS; |
793 | SAVESTACK_POS (); |
|
|
794 | PUSHMARK (SP); |
796 | PUSHMARK (SP); |
795 | EXTEND (SP, 2); |
797 | EXTEND (SP, 2); |
796 | // we re-bless the reference to get overload and other niceties right |
798 | // we re-bless the reference to get overload and other niceties right |
797 | PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); |
799 | PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); |
798 | PUSHs (sv_json); |
800 | PUSHs (sv_json); |
… | |
… | |
810 | encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); |
812 | encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); |
811 | encode_ch (enc, '"'); |
813 | encode_ch (enc, '"'); |
812 | encode_ch (enc, ')'); |
814 | encode_ch (enc, ')'); |
813 | encode_ch (enc, '['); |
815 | encode_ch (enc, '['); |
814 | |
816 | |
815 | while (count) |
817 | if (count) |
816 | { |
818 | { |
|
|
819 | int i; |
|
|
820 | |
|
|
821 | for (i = 0; i < count - 1; ++i) |
|
|
822 | { |
817 | encode_sv (enc, SP[1 - count--]); |
823 | encode_sv (enc, SP[i + 1 - count]); |
818 | |
|
|
819 | if (count) |
|
|
820 | encode_ch (enc, ','); |
824 | encode_ch (enc, ','); |
|
|
825 | } |
|
|
826 | |
|
|
827 | encode_sv (enc, TOPs); |
|
|
828 | SP -= count; |
821 | } |
829 | } |
|
|
830 | |
|
|
831 | PUTBACK; |
822 | |
832 | |
823 | encode_ch (enc, ']'); |
833 | encode_ch (enc, ']'); |
824 | |
834 | |
825 | FREETMPS; LEAVE; |
835 | FREETMPS; LEAVE; |
826 | } |
836 | } |
… | |
… | |
1028 | else |
1038 | else |
1029 | break; |
1039 | break; |
1030 | } |
1040 | } |
1031 | else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) |
1041 | else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) |
1032 | break; // parse error, but let higher level handle it, gives better error messages |
1042 | break; // parse error, but let higher level handle it, gives better error messages |
1033 | |
1043 | else |
1034 | ++dec->cur; |
1044 | ++dec->cur; |
1035 | } |
1045 | } |
1036 | } |
1046 | } |
1037 | |
1047 | |
1038 | #define ERR(reason) SB dec->err = reason; goto fail; SE |
1048 | #define ERR(reason) SB dec->err = reason; goto fail; SE |
1039 | |
1049 | |
… | |
… | |
1496 | |
1506 | |
1497 | DEC_DEC_DEPTH; |
1507 | DEC_DEC_DEPTH; |
1498 | sv = newRV_noinc ((SV *)hv); |
1508 | sv = newRV_noinc ((SV *)hv); |
1499 | |
1509 | |
1500 | // check filter callbacks |
1510 | // check filter callbacks |
1501 | if (dec->json.flags & F_HOOK) |
1511 | if (expect_false (dec->json.flags & F_HOOK)) |
1502 | { |
1512 | { |
1503 | if (dec->json.cb_sk_object && HvKEYS (hv) == 1) |
1513 | if (dec->json.cb_sk_object && HvKEYS (hv) == 1) |
1504 | { |
1514 | { |
1505 | HE *cb, *he; |
1515 | HE *cb, *he; |
1506 | |
1516 | |
… | |
… | |
1516 | { |
1526 | { |
1517 | dSP; |
1527 | dSP; |
1518 | int count; |
1528 | int count; |
1519 | |
1529 | |
1520 | ENTER; SAVETMPS; |
1530 | ENTER; SAVETMPS; |
1521 | SAVESTACK_POS (); |
|
|
1522 | PUSHMARK (SP); |
1531 | PUSHMARK (SP); |
1523 | XPUSHs (HeVAL (he)); |
1532 | XPUSHs (HeVAL (he)); |
1524 | sv_2mortal (sv); |
1533 | sv_2mortal (sv); |
1525 | |
1534 | |
1526 | PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; |
1535 | PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; |
1527 | |
1536 | |
1528 | if (count == 1) |
1537 | if (count == 1) |
1529 | { |
1538 | { |
1530 | sv = newSVsv (POPs); |
1539 | sv = newSVsv (POPs); |
|
|
1540 | PUTBACK; |
1531 | FREETMPS; LEAVE; |
1541 | FREETMPS; LEAVE; |
1532 | return sv; |
1542 | return sv; |
1533 | } |
1543 | } |
|
|
1544 | else if (count) |
|
|
1545 | croak ("filter_json_single_key_object callbacks must not return more than one scalar"); |
|
|
1546 | |
|
|
1547 | PUTBACK; |
1534 | |
1548 | |
1535 | SvREFCNT_inc (sv); |
1549 | SvREFCNT_inc (sv); |
|
|
1550 | |
1536 | FREETMPS; LEAVE; |
1551 | FREETMPS; LEAVE; |
1537 | } |
1552 | } |
1538 | } |
1553 | } |
1539 | |
1554 | |
1540 | if (dec->json.cb_object) |
1555 | if (dec->json.cb_object) |
1541 | { |
1556 | { |
1542 | dSP; |
1557 | dSP; |
1543 | int count; |
1558 | int count; |
1544 | |
1559 | |
1545 | ENTER; SAVETMPS; |
1560 | ENTER; SAVETMPS; |
1546 | SAVESTACK_POS (); |
|
|
1547 | PUSHMARK (SP); |
1561 | PUSHMARK (SP); |
1548 | XPUSHs (sv_2mortal (sv)); |
1562 | XPUSHs (sv_2mortal (sv)); |
1549 | |
1563 | |
1550 | PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; |
1564 | PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; |
1551 | |
1565 | |
1552 | if (count == 1) |
1566 | if (count == 1) |
1553 | { |
|
|
1554 | sv = newSVsv (POPs); |
1567 | sv = newSVsv (POPs); |
1555 | FREETMPS; LEAVE; |
1568 | else if (count == 0) |
1556 | return sv; |
|
|
1557 | } |
|
|
1558 | |
|
|
1559 | SvREFCNT_inc (sv); |
1569 | SvREFCNT_inc (sv); |
|
|
1570 | else |
|
|
1571 | croak ("filter_json_object callbacks must not return more than one scalar"); |
|
|
1572 | |
|
|
1573 | PUTBACK; |
|
|
1574 | |
1560 | FREETMPS; LEAVE; |
1575 | FREETMPS; LEAVE; |
1561 | } |
1576 | } |
1562 | } |
1577 | } |
1563 | |
1578 | |
1564 | return sv; |
1579 | return sv; |
… | |
… | |
1787 | else if (sv) |
1802 | else if (sv) |
1788 | { |
1803 | { |
1789 | // check for trailing garbage |
1804 | // check for trailing garbage |
1790 | decode_ws (&dec); |
1805 | decode_ws (&dec); |
1791 | |
1806 | |
1792 | if (*dec.cur) |
1807 | if (dec.cur != dec.end) |
1793 | { |
1808 | { |
1794 | dec.err = "garbage after JSON object"; |
1809 | dec.err = "garbage after JSON object"; |
1795 | SvREFCNT_dec (sv); |
1810 | SvREFCNT_dec (sv); |
1796 | sv = 0; |
1811 | sv = 0; |
1797 | } |
1812 | } |
… | |
… | |
1835 | // the state machine here is a bit convoluted and could be simplified a lot |
1850 | // the state machine here is a bit convoluted and could be simplified a lot |
1836 | // but this would make it slower, so... |
1851 | // but this would make it slower, so... |
1837 | |
1852 | |
1838 | for (;;) |
1853 | for (;;) |
1839 | { |
1854 | { |
1840 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D |
|
|
1841 | switch (self->incr_mode) |
1855 | switch (self->incr_mode) |
1842 | { |
1856 | { |
|
|
1857 | // reached end of a scalar, see if we are inside a nested structure or not |
|
|
1858 | end_of_scalar: |
|
|
1859 | self->incr_mode = INCR_M_JSON; |
|
|
1860 | |
|
|
1861 | if (self->incr_nest) // end of a scalar inside array, object or tag |
|
|
1862 | goto incr_m_json; |
|
|
1863 | else // end of scalar outside structure, json text ends here |
|
|
1864 | goto interrupt; |
|
|
1865 | |
1843 | // only used for initial whitespace skipping |
1866 | // only used for initial whitespace skipping |
1844 | case INCR_M_WS: |
1867 | case INCR_M_WS: |
1845 | for (;;) |
1868 | for (;;) |
1846 | { |
1869 | { |
1847 | if (*p > 0x20) |
1870 | if (*p > 0x20) |
… | |
… | |
1889 | ++p; |
1912 | ++p; |
1890 | } |
1913 | } |
1891 | |
1914 | |
1892 | break; |
1915 | break; |
1893 | |
1916 | |
|
|
1917 | // inside true/false/null |
|
|
1918 | case INCR_M_TFN: |
|
|
1919 | incr_m_tfn: |
|
|
1920 | for (;;) |
|
|
1921 | switch (*p++) |
|
|
1922 | { |
|
|
1923 | case 'r': case 'u': case 'e': // tRUE, falsE, nUll |
|
|
1924 | case 'a': case 'l': case 's': // fALSe, nuLL |
|
|
1925 | // allowed |
|
|
1926 | break; |
|
|
1927 | |
|
|
1928 | default: |
|
|
1929 | --p; |
|
|
1930 | goto end_of_scalar; |
|
|
1931 | } |
|
|
1932 | |
|
|
1933 | // inside a number |
|
|
1934 | case INCR_M_NUM: |
|
|
1935 | incr_m_num: |
|
|
1936 | for (;;) |
|
|
1937 | switch (*p++) |
|
|
1938 | { |
|
|
1939 | case 'e': case 'E': case '.': case '+': |
|
|
1940 | case '-': |
|
|
1941 | case '0': case '1': case '2': case '3': case '4': |
|
|
1942 | case '5': case '6': case '7': case '8': case '9': |
|
|
1943 | // allowed |
|
|
1944 | break; |
|
|
1945 | |
|
|
1946 | default: |
|
|
1947 | --p; |
|
|
1948 | goto end_of_scalar; |
|
|
1949 | } |
|
|
1950 | |
1894 | // inside a string |
1951 | // inside a string |
1895 | case INCR_M_STR: |
1952 | case INCR_M_STR: |
1896 | incr_m_str: |
1953 | incr_m_str: |
1897 | for (;;) |
1954 | for (;;) |
1898 | { |
1955 | { |
1899 | if (*p == '"') |
1956 | if (*p == '"') |
1900 | { |
1957 | { |
1901 | ++p; |
1958 | ++p; |
1902 | self->incr_mode = INCR_M_JSON; |
|
|
1903 | |
|
|
1904 | if (!self->incr_nest) |
|
|
1905 | goto interrupt; |
|
|
1906 | |
|
|
1907 | goto incr_m_json; |
1959 | goto end_of_scalar; |
1908 | } |
1960 | } |
1909 | else if (*p == '\\') |
1961 | else if (*p == '\\') |
1910 | { |
1962 | { |
1911 | ++p; // "virtually" consumes character after \ |
1963 | ++p; // "virtually" consumes character after \ |
1912 | |
1964 | |
… | |
… | |
1941 | { |
1993 | { |
1942 | --p; // do not eat the whitespace, let the next round do it |
1994 | --p; // do not eat the whitespace, let the next round do it |
1943 | goto interrupt; |
1995 | goto interrupt; |
1944 | } |
1996 | } |
1945 | break; |
1997 | break; |
|
|
1998 | |
|
|
1999 | // the following three blocks handle scalars. this makes the parser |
|
|
2000 | // more strict than required inside arrays or objects, and could |
|
|
2001 | // be moved to a special case on the toplevel (except strings) |
|
|
2002 | case 't': |
|
|
2003 | case 'f': |
|
|
2004 | case 'n': |
|
|
2005 | self->incr_mode = INCR_M_TFN; |
|
|
2006 | goto incr_m_tfn; |
|
|
2007 | |
|
|
2008 | case '-': |
|
|
2009 | case '0': case '1': case '2': case '3': case '4': |
|
|
2010 | case '5': case '6': case '7': case '8': case '9': |
|
|
2011 | self->incr_mode = INCR_M_NUM; |
|
|
2012 | goto incr_m_num; |
1946 | |
2013 | |
1947 | case '"': |
2014 | case '"': |
1948 | self->incr_mode = INCR_M_STR; |
2015 | self->incr_mode = INCR_M_STR; |
1949 | goto incr_m_str; |
2016 | goto incr_m_str; |
1950 | |
2017 | |