… | |
… | |
96 | static SV *bool_false, *bool_true; |
96 | static SV *bool_false, *bool_true; |
97 | static SV *sv_json; |
97 | static SV *sv_json; |
98 | |
98 | |
99 | enum { |
99 | enum { |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
|
|
101 | INCR_M_TFN, // inside true/false/null |
|
|
102 | INCR_M_NUM, // inside number |
101 | INCR_M_STR, // inside string |
103 | INCR_M_STR, // inside string |
102 | INCR_M_BS, // inside backslash |
104 | INCR_M_BS, // inside backslash |
103 | INCR_M_C0, // inside comment in initial whitespace sequence |
105 | INCR_M_C0, // inside comment in initial whitespace sequence |
104 | INCR_M_C1, // inside comment in other places |
106 | INCR_M_C1, // inside comment in other places |
105 | INCR_M_JSON // outside anything, count nesting |
107 | INCR_M_JSON // outside anything, count nesting |
… | |
… | |
125 | } JSON; |
127 | } JSON; |
126 | |
128 | |
127 | INLINE void |
129 | INLINE void |
128 | json_init (JSON *json) |
130 | json_init (JSON *json) |
129 | { |
131 | { |
130 | Zero (json, 1, JSON); |
132 | static const JSON init = { F_ALLOW_NONREF, 512 }; |
131 | json->max_depth = 512; |
133 | |
|
|
134 | *json = init; |
132 | } |
135 | } |
133 | |
136 | |
134 | ///////////////////////////////////////////////////////////////////////////// |
137 | ///////////////////////////////////////////////////////////////////////////// |
135 | // utility functions |
138 | // utility functions |
136 | |
139 | |
… | |
… | |
159 | #endif |
162 | #endif |
160 | } |
163 | } |
161 | } |
164 | } |
162 | |
165 | |
163 | /* adds two STRLENs together, slow, and with paranoia */ |
166 | /* adds two STRLENs together, slow, and with paranoia */ |
164 | STRLEN |
167 | static STRLEN |
165 | strlen_sum (STRLEN l1, STRLEN l2) |
168 | strlen_sum (STRLEN l1, STRLEN l2) |
166 | { |
169 | { |
167 | size_t sum = l1 + l2; |
170 | size_t sum = l1 + l2; |
168 | |
171 | |
169 | if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) |
172 | if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) |
… | |
… | |
823 | |
826 | |
824 | encode_sv (enc, TOPs); |
827 | encode_sv (enc, TOPs); |
825 | SP -= count; |
828 | SP -= count; |
826 | } |
829 | } |
827 | |
830 | |
|
|
831 | PUTBACK; |
|
|
832 | |
828 | encode_ch (enc, ']'); |
833 | encode_ch (enc, ']'); |
829 | |
834 | |
830 | FREETMPS; LEAVE; |
835 | FREETMPS; LEAVE; |
831 | } |
836 | } |
832 | else if ((enc->json.flags & F_CONV_BLESSED) && (method = gv_fetchmethod_autoload (stash, "TO_JSON", 0))) |
837 | else if ((enc->json.flags & F_CONV_BLESSED) && (method = gv_fetchmethod_autoload (stash, "TO_JSON", 0))) |
… | |
… | |
913 | if (SvIsUV (sv) ? SvUVX (sv) <= 59000 |
918 | if (SvIsUV (sv) ? SvUVX (sv) <= 59000 |
914 | : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) |
919 | : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) |
915 | { |
920 | { |
916 | // optimise the "small number case" |
921 | // optimise the "small number case" |
917 | // code will likely be branchless and use only a single multiplication |
922 | // code will likely be branchless and use only a single multiplication |
918 | // works for numbers up to 59074 |
923 | // 4.28 works for numbers up to 59074 |
919 | I32 i = SvIVX (sv); |
924 | I32 i = SvIVX (sv); |
920 | U32 u; |
925 | U32 u; |
921 | char digit, nz = 0; |
926 | char digit, nz = 0; |
922 | |
927 | |
923 | need (enc, 6); |
928 | need (enc, 6); |
… | |
… | |
1033 | else |
1038 | else |
1034 | break; |
1039 | break; |
1035 | } |
1040 | } |
1036 | else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) |
1041 | else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) |
1037 | break; // parse error, but let higher level handle it, gives better error messages |
1042 | break; // parse error, but let higher level handle it, gives better error messages |
1038 | |
1043 | else |
1039 | ++dec->cur; |
1044 | ++dec->cur; |
1040 | } |
1045 | } |
1041 | } |
1046 | } |
1042 | |
1047 | |
1043 | #define ERR(reason) SB dec->err = reason; goto fail; SE |
1048 | #define ERR(reason) SB dec->err = reason; goto fail; SE |
1044 | |
1049 | |
… | |
… | |
1501 | |
1506 | |
1502 | DEC_DEC_DEPTH; |
1507 | DEC_DEC_DEPTH; |
1503 | sv = newRV_noinc ((SV *)hv); |
1508 | sv = newRV_noinc ((SV *)hv); |
1504 | |
1509 | |
1505 | // check filter callbacks |
1510 | // check filter callbacks |
1506 | if (dec->json.flags & F_HOOK) |
1511 | if (expect_false (dec->json.flags & F_HOOK)) |
1507 | { |
1512 | { |
1508 | if (dec->json.cb_sk_object && HvKEYS (hv) == 1) |
1513 | if (dec->json.cb_sk_object && HvKEYS (hv) == 1) |
1509 | { |
1514 | { |
1510 | HE *cb, *he; |
1515 | HE *cb, *he; |
1511 | |
1516 | |
… | |
… | |
1530 | PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; |
1535 | PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; |
1531 | |
1536 | |
1532 | if (count == 1) |
1537 | if (count == 1) |
1533 | { |
1538 | { |
1534 | sv = newSVsv (POPs); |
1539 | sv = newSVsv (POPs); |
|
|
1540 | PUTBACK; |
1535 | FREETMPS; LEAVE; |
1541 | FREETMPS; LEAVE; |
1536 | return sv; |
1542 | return sv; |
1537 | } |
1543 | } |
1538 | else if (count) |
1544 | else if (count) |
1539 | croak ("filter_json_single_key_object callbacks must not return more than one scalar"); |
1545 | croak ("filter_json_single_key_object callbacks must not return more than one scalar"); |
1540 | |
1546 | |
|
|
1547 | PUTBACK; |
|
|
1548 | |
1541 | SvREFCNT_inc (sv); |
1549 | SvREFCNT_inc (sv); |
|
|
1550 | |
1542 | FREETMPS; LEAVE; |
1551 | FREETMPS; LEAVE; |
1543 | } |
1552 | } |
1544 | } |
1553 | } |
1545 | |
1554 | |
1546 | if (dec->json.cb_object) |
1555 | if (dec->json.cb_object) |
… | |
… | |
1553 | XPUSHs (sv_2mortal (sv)); |
1562 | XPUSHs (sv_2mortal (sv)); |
1554 | |
1563 | |
1555 | PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; |
1564 | PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; |
1556 | |
1565 | |
1557 | if (count == 1) |
1566 | if (count == 1) |
1558 | { |
|
|
1559 | sv = newSVsv (POPs); |
1567 | sv = newSVsv (POPs); |
1560 | FREETMPS; LEAVE; |
|
|
1561 | return sv; |
|
|
1562 | } |
|
|
1563 | else if (count) |
1568 | else if (count == 0) |
|
|
1569 | SvREFCNT_inc (sv); |
|
|
1570 | else |
1564 | croak ("filter_json_object callbacks must not return more than one scalar"); |
1571 | croak ("filter_json_object callbacks must not return more than one scalar"); |
1565 | |
1572 | |
1566 | SvREFCNT_inc (sv); |
1573 | PUTBACK; |
|
|
1574 | |
1567 | FREETMPS; LEAVE; |
1575 | FREETMPS; LEAVE; |
1568 | } |
1576 | } |
1569 | } |
1577 | } |
1570 | |
1578 | |
1571 | return sv; |
1579 | return sv; |
… | |
… | |
1794 | else if (sv) |
1802 | else if (sv) |
1795 | { |
1803 | { |
1796 | // check for trailing garbage |
1804 | // check for trailing garbage |
1797 | decode_ws (&dec); |
1805 | decode_ws (&dec); |
1798 | |
1806 | |
1799 | if (*dec.cur) |
1807 | if (dec.cur != dec.end) |
1800 | { |
1808 | { |
1801 | dec.err = "garbage after JSON object"; |
1809 | dec.err = "garbage after JSON object"; |
1802 | SvREFCNT_dec (sv); |
1810 | SvREFCNT_dec (sv); |
1803 | sv = 0; |
1811 | sv = 0; |
1804 | } |
1812 | } |
… | |
… | |
1842 | // the state machine here is a bit convoluted and could be simplified a lot |
1850 | // the state machine here is a bit convoluted and could be simplified a lot |
1843 | // but this would make it slower, so... |
1851 | // but this would make it slower, so... |
1844 | |
1852 | |
1845 | for (;;) |
1853 | for (;;) |
1846 | { |
1854 | { |
1847 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D |
|
|
1848 | switch (self->incr_mode) |
1855 | switch (self->incr_mode) |
1849 | { |
1856 | { |
|
|
1857 | // reached end of a scalar, see if we are inside a nested structure or not |
|
|
1858 | end_of_scalar: |
|
|
1859 | self->incr_mode = INCR_M_JSON; |
|
|
1860 | |
|
|
1861 | if (self->incr_nest) // end of a scalar inside array, object or tag |
|
|
1862 | goto incr_m_json; |
|
|
1863 | else // end of scalar outside structure, json text ends here |
|
|
1864 | goto interrupt; |
|
|
1865 | |
1850 | // only used for initial whitespace skipping |
1866 | // only used for initial whitespace skipping |
1851 | case INCR_M_WS: |
1867 | case INCR_M_WS: |
1852 | for (;;) |
1868 | for (;;) |
1853 | { |
1869 | { |
1854 | if (*p > 0x20) |
1870 | if (*p > 0x20) |
… | |
… | |
1896 | ++p; |
1912 | ++p; |
1897 | } |
1913 | } |
1898 | |
1914 | |
1899 | break; |
1915 | break; |
1900 | |
1916 | |
|
|
1917 | // inside true/false/null |
|
|
1918 | case INCR_M_TFN: |
|
|
1919 | incr_m_tfn: |
|
|
1920 | for (;;) |
|
|
1921 | switch (*p++) |
|
|
1922 | { |
|
|
1923 | case 'r': case 'u': case 'e': // tRUE, falsE, nUll |
|
|
1924 | case 'a': case 'l': case 's': // fALSe, nuLL |
|
|
1925 | // allowed |
|
|
1926 | break; |
|
|
1927 | |
|
|
1928 | default: |
|
|
1929 | --p; |
|
|
1930 | goto end_of_scalar; |
|
|
1931 | } |
|
|
1932 | |
|
|
1933 | // inside a number |
|
|
1934 | case INCR_M_NUM: |
|
|
1935 | incr_m_num: |
|
|
1936 | for (;;) |
|
|
1937 | switch (*p++) |
|
|
1938 | { |
|
|
1939 | case 'e': case 'E': case '.': case '+': |
|
|
1940 | case '-': |
|
|
1941 | case '0': case '1': case '2': case '3': case '4': |
|
|
1942 | case '5': case '6': case '7': case '8': case '9': |
|
|
1943 | // allowed |
|
|
1944 | break; |
|
|
1945 | |
|
|
1946 | default: |
|
|
1947 | --p; |
|
|
1948 | goto end_of_scalar; |
|
|
1949 | } |
|
|
1950 | |
1901 | // inside a string |
1951 | // inside a string |
1902 | case INCR_M_STR: |
1952 | case INCR_M_STR: |
1903 | incr_m_str: |
1953 | incr_m_str: |
1904 | for (;;) |
1954 | for (;;) |
1905 | { |
1955 | { |
1906 | if (*p == '"') |
1956 | if (*p == '"') |
1907 | { |
1957 | { |
1908 | ++p; |
1958 | ++p; |
1909 | self->incr_mode = INCR_M_JSON; |
|
|
1910 | |
|
|
1911 | if (!self->incr_nest) |
|
|
1912 | goto interrupt; |
|
|
1913 | |
|
|
1914 | goto incr_m_json; |
1959 | goto end_of_scalar; |
1915 | } |
1960 | } |
1916 | else if (*p == '\\') |
1961 | else if (*p == '\\') |
1917 | { |
1962 | { |
1918 | ++p; // "virtually" consumes character after \ |
1963 | ++p; // "virtually" consumes character after \ |
1919 | |
1964 | |
… | |
… | |
1948 | { |
1993 | { |
1949 | --p; // do not eat the whitespace, let the next round do it |
1994 | --p; // do not eat the whitespace, let the next round do it |
1950 | goto interrupt; |
1995 | goto interrupt; |
1951 | } |
1996 | } |
1952 | break; |
1997 | break; |
|
|
1998 | |
|
|
1999 | // the following three blocks handle scalars. this makes the parser |
|
|
2000 | // more strict than required inside arrays or objects, and could |
|
|
2001 | // be moved to a special case on the toplevel (except strings) |
|
|
2002 | case 't': |
|
|
2003 | case 'f': |
|
|
2004 | case 'n': |
|
|
2005 | self->incr_mode = INCR_M_TFN; |
|
|
2006 | goto incr_m_tfn; |
|
|
2007 | |
|
|
2008 | case '-': |
|
|
2009 | case '0': case '1': case '2': case '3': case '4': |
|
|
2010 | case '5': case '6': case '7': case '8': case '9': |
|
|
2011 | self->incr_mode = INCR_M_NUM; |
|
|
2012 | goto incr_m_num; |
1953 | |
2013 | |
1954 | case '"': |
2014 | case '"': |
1955 | self->incr_mode = INCR_M_STR; |
2015 | self->incr_mode = INCR_M_STR; |
1956 | goto incr_m_str; |
2016 | goto incr_m_str; |
1957 | |
2017 | |