… | |
… | |
96 | static SV *bool_false, *bool_true; |
96 | static SV *bool_false, *bool_true; |
97 | static SV *sv_json; |
97 | static SV *sv_json; |
98 | |
98 | |
99 | enum { |
99 | enum { |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
100 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
|
|
101 | INCR_M_TFN, // inside true/false/null |
|
|
102 | INCR_M_NUM, // inside number |
101 | INCR_M_STR, // inside string |
103 | INCR_M_STR, // inside string |
102 | INCR_M_BS, // inside backslash |
104 | INCR_M_BS, // inside backslash |
103 | INCR_M_C0, // inside comment in initial whitespace sequence |
105 | INCR_M_C0, // inside comment in initial whitespace sequence |
104 | INCR_M_C1, // inside comment in other places |
106 | INCR_M_C1, // inside comment in other places |
105 | INCR_M_JSON // outside anything, count nesting |
107 | INCR_M_JSON // outside anything, count nesting |
… | |
… | |
1843 | // the state machine here is a bit convoluted and could be simplified a lot |
1845 | // the state machine here is a bit convoluted and could be simplified a lot |
1844 | // but this would make it slower, so... |
1846 | // but this would make it slower, so... |
1845 | |
1847 | |
1846 | for (;;) |
1848 | for (;;) |
1847 | { |
1849 | { |
1848 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D |
|
|
1849 | switch (self->incr_mode) |
1850 | switch (self->incr_mode) |
1850 | { |
1851 | { |
|
|
1852 | // reached end of a scalar, see if we are inside a nested structure or not |
|
|
1853 | end_of_scalar: |
|
|
1854 | self->incr_mode = INCR_M_JSON; |
|
|
1855 | |
|
|
1856 | if (self->incr_nest) // end of a scalar inside array, object or tag |
|
|
1857 | goto incr_m_json; |
|
|
1858 | else // end of scalar outside structure, json text ends here |
|
|
1859 | goto interrupt; |
|
|
1860 | |
1851 | // only used for initial whitespace skipping |
1861 | // only used for initial whitespace skipping |
1852 | case INCR_M_WS: |
1862 | case INCR_M_WS: |
1853 | for (;;) |
1863 | for (;;) |
1854 | { |
1864 | { |
1855 | if (*p > 0x20) |
1865 | if (*p > 0x20) |
… | |
… | |
1897 | ++p; |
1907 | ++p; |
1898 | } |
1908 | } |
1899 | |
1909 | |
1900 | break; |
1910 | break; |
1901 | |
1911 | |
|
|
1912 | // inside true/false/null |
|
|
1913 | case INCR_M_TFN: |
|
|
1914 | incr_m_tfn: |
|
|
1915 | for (;;) |
|
|
1916 | switch (*p++) |
|
|
1917 | { |
|
|
1918 | case 'r': case 'u': case 'e': // tRUE, falsE, nUll |
|
|
1919 | case 'a': case 'l': case 's': // fALSe, nuLL |
|
|
1920 | // allowed |
|
|
1921 | break; |
|
|
1922 | |
|
|
1923 | default: |
|
|
1924 | --p; |
|
|
1925 | goto end_of_scalar; |
|
|
1926 | } |
|
|
1927 | |
|
|
1928 | // inside a number |
|
|
1929 | case INCR_M_NUM: |
|
|
1930 | incr_m_num: |
|
|
1931 | for (;;) |
|
|
1932 | switch (*p++) |
|
|
1933 | { |
|
|
1934 | case 'e': case 'E': case '.': case '+': |
|
|
1935 | case '-': |
|
|
1936 | case '0': case '1': case '2': case '3': case '4': |
|
|
1937 | case '5': case '6': case '7': case '8': case '9': |
|
|
1938 | // allowed |
|
|
1939 | break; |
|
|
1940 | |
|
|
1941 | default: |
|
|
1942 | --p; |
|
|
1943 | goto end_of_scalar; |
|
|
1944 | } |
|
|
1945 | |
1902 | // inside a string |
1946 | // inside a string |
1903 | case INCR_M_STR: |
1947 | case INCR_M_STR: |
1904 | incr_m_str: |
1948 | incr_m_str: |
1905 | for (;;) |
1949 | for (;;) |
1906 | { |
1950 | { |
1907 | if (*p == '"') |
1951 | if (*p == '"') |
1908 | { |
1952 | { |
1909 | ++p; |
1953 | ++p; |
1910 | self->incr_mode = INCR_M_JSON; |
|
|
1911 | |
|
|
1912 | if (!self->incr_nest) |
|
|
1913 | goto interrupt; |
|
|
1914 | |
|
|
1915 | goto incr_m_json; |
1954 | goto end_of_scalar; |
1916 | } |
1955 | } |
1917 | else if (*p == '\\') |
1956 | else if (*p == '\\') |
1918 | { |
1957 | { |
1919 | ++p; // "virtually" consumes character after \ |
1958 | ++p; // "virtually" consumes character after \ |
1920 | |
1959 | |
… | |
… | |
1949 | { |
1988 | { |
1950 | --p; // do not eat the whitespace, let the next round do it |
1989 | --p; // do not eat the whitespace, let the next round do it |
1951 | goto interrupt; |
1990 | goto interrupt; |
1952 | } |
1991 | } |
1953 | break; |
1992 | break; |
|
|
1993 | |
|
|
1994 | // the following three blocks handle scalars. this makes the parser |
|
|
1995 | // more strict than required inside arrays or objects, and could |
|
|
1996 | // be moved to a special case on the toplevel (except strings) |
|
|
1997 | case 't': |
|
|
1998 | case 'f': |
|
|
1999 | case 'n': |
|
|
2000 | self->incr_mode = INCR_M_TFN; |
|
|
2001 | goto incr_m_tfn; |
|
|
2002 | |
|
|
2003 | case '-': |
|
|
2004 | case '0': case '1': case '2': case '3': case '4': |
|
|
2005 | case '5': case '6': case '7': case '8': case '9': |
|
|
2006 | self->incr_mode = INCR_M_NUM; |
|
|
2007 | goto incr_m_num; |
1954 | |
2008 | |
1955 | case '"': |
2009 | case '"': |
1956 | self->incr_mode = INCR_M_STR; |
2010 | self->incr_mode = INCR_M_STR; |
1957 | goto incr_m_str; |
2011 | goto incr_m_str; |
1958 | |
2012 | |