… | |
… | |
17 | // guarantees, though. if it breaks, you get to keep the pieces. |
17 | // guarantees, though. if it breaks, you get to keep the pieces. |
18 | #ifndef UTF8_MAXBYTES |
18 | #ifndef UTF8_MAXBYTES |
19 | # define UTF8_MAXBYTES 13 |
19 | # define UTF8_MAXBYTES 13 |
20 | #endif |
20 | #endif |
21 | |
21 | |
|
|
22 | // three extra for rounding, sign, and end of string |
22 | #define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) |
23 | #define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) |
23 | |
24 | |
24 | #define F_ASCII 0x00000001UL |
25 | #define F_ASCII 0x00000001UL |
25 | #define F_LATIN1 0x00000002UL |
26 | #define F_LATIN1 0x00000002UL |
26 | #define F_UTF8 0x00000004UL |
27 | #define F_UTF8 0x00000004UL |
27 | #define F_INDENT 0x00000008UL |
28 | #define F_INDENT 0x00000008UL |
… | |
… | |
76 | |
77 | |
77 | enum { |
78 | enum { |
78 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
79 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 |
79 | INCR_M_STR, // inside string |
80 | INCR_M_STR, // inside string |
80 | INCR_M_BS, // inside backslash |
81 | INCR_M_BS, // inside backslash |
|
|
82 | INCR_M_C0, // inside comment in initial whitespace sequence |
|
|
83 | INCR_M_C1, // inside comment in other places |
81 | INCR_M_JSON // outside anything, count nesting |
84 | INCR_M_JSON // outside anything, count nesting |
82 | }; |
85 | }; |
83 | |
86 | |
84 | #define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) |
87 | #define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) |
85 | |
88 | |
… | |
… | |
759 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
762 | : enc.json.flags & F_LATIN1 ? 0x000100UL |
760 | : 0x110000UL; |
763 | : 0x110000UL; |
761 | |
764 | |
762 | SvPOK_only (enc.sv); |
765 | SvPOK_only (enc.sv); |
763 | encode_sv (&enc, scalar); |
766 | encode_sv (&enc, scalar); |
|
|
767 | encode_nl (&enc); |
764 | |
768 | |
765 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
769 | SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); |
766 | *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings |
770 | *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings |
767 | |
771 | |
768 | if (!(enc.json.flags & (F_ASCII | F_LATIN1 | F_UTF8))) |
772 | if (!(enc.json.flags & (F_ASCII | F_LATIN1 | F_UTF8))) |
… | |
… | |
948 | else if (expect_true (ch >= 0x20 && ch < 0x80)) |
952 | else if (expect_true (ch >= 0x20 && ch < 0x80)) |
949 | *cur++ = ch; |
953 | *cur++ = ch; |
950 | else if (ch >= 0x80) |
954 | else if (ch >= 0x80) |
951 | { |
955 | { |
952 | STRLEN clen; |
956 | STRLEN clen; |
953 | UV uch; |
|
|
954 | |
957 | |
955 | --dec_cur; |
958 | --dec_cur; |
956 | |
959 | |
957 | uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen); |
960 | decode_utf8 (dec_cur, dec->end - dec_cur, &clen); |
958 | if (clen == (STRLEN)-1) |
961 | if (clen == (STRLEN)-1) |
959 | ERR ("malformed UTF-8 character in JSON string"); |
962 | ERR ("malformed UTF-8 character in JSON string"); |
960 | |
963 | |
961 | do |
964 | do |
962 | *cur++ = *dec_cur++; |
965 | *cur++ = *dec_cur++; |
… | |
… | |
1525 | static void |
1528 | static void |
1526 | incr_parse (JSON *self) |
1529 | incr_parse (JSON *self) |
1527 | { |
1530 | { |
1528 | const char *p = SvPVX (self->incr_text) + self->incr_pos; |
1531 | const char *p = SvPVX (self->incr_text) + self->incr_pos; |
1529 | |
1532 | |
|
|
1533 | // the state machine here is a bit convoluted and could be simplified a lot |
|
|
1534 | // but this would make it slower, so... |
|
|
1535 | |
1530 | for (;;) |
1536 | for (;;) |
1531 | { |
1537 | { |
1532 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D |
1538 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D |
1533 | switch (self->incr_mode) |
1539 | switch (self->incr_mode) |
1534 | { |
1540 | { |
1535 | // only used for intiial whitespace skipping |
1541 | // only used for initial whitespace skipping |
1536 | case INCR_M_WS: |
1542 | case INCR_M_WS: |
1537 | for (;;) |
1543 | for (;;) |
1538 | { |
1544 | { |
1539 | if (*p > 0x20) |
1545 | if (*p > 0x20) |
1540 | { |
1546 | { |
|
|
1547 | if (*p == '#') |
|
|
1548 | { |
|
|
1549 | self->incr_mode = INCR_M_C0; |
|
|
1550 | goto incr_m_c; |
|
|
1551 | } |
|
|
1552 | else |
|
|
1553 | { |
1541 | self->incr_mode = INCR_M_JSON; |
1554 | self->incr_mode = INCR_M_JSON; |
1542 | goto incr_m_json; |
1555 | goto incr_m_json; |
|
|
1556 | } |
1543 | } |
1557 | } |
1544 | else if (!*p) |
1558 | else if (!*p) |
1545 | goto interrupt; |
1559 | goto interrupt; |
1546 | |
1560 | |
1547 | ++p; |
1561 | ++p; |
… | |
… | |
1553 | goto interrupt; |
1567 | goto interrupt; |
1554 | |
1568 | |
1555 | ++p; |
1569 | ++p; |
1556 | self->incr_mode = INCR_M_STR; |
1570 | self->incr_mode = INCR_M_STR; |
1557 | goto incr_m_str; |
1571 | goto incr_m_str; |
|
|
1572 | |
|
|
1573 | // inside #-style comments |
|
|
1574 | case INCR_M_C0: |
|
|
1575 | case INCR_M_C1: |
|
|
1576 | incr_m_c: |
|
|
1577 | for (;;) |
|
|
1578 | { |
|
|
1579 | if (*p == '\n') |
|
|
1580 | { |
|
|
1581 | self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; |
|
|
1582 | break; |
|
|
1583 | } |
|
|
1584 | else if (!*p) |
|
|
1585 | goto interrupt; |
|
|
1586 | |
|
|
1587 | ++p; |
|
|
1588 | } |
|
|
1589 | |
|
|
1590 | break; |
1558 | |
1591 | |
1559 | // inside a string |
1592 | // inside a string |
1560 | case INCR_M_STR: |
1593 | case INCR_M_STR: |
1561 | incr_m_str: |
1594 | incr_m_str: |
1562 | for (;;) |
1595 | for (;;) |
… | |
… | |
1621 | |
1654 | |
1622 | case ']': |
1655 | case ']': |
1623 | case '}': |
1656 | case '}': |
1624 | if (--self->incr_nest <= 0) |
1657 | if (--self->incr_nest <= 0) |
1625 | goto interrupt; |
1658 | goto interrupt; |
|
|
1659 | break; |
|
|
1660 | |
|
|
1661 | case '#': |
|
|
1662 | self->incr_mode = INCR_M_C1; |
|
|
1663 | goto incr_m_c; |
1626 | } |
1664 | } |
1627 | } |
1665 | } |
1628 | } |
1666 | } |
1629 | |
1667 | |
1630 | modechange: |
1668 | modechange: |
1631 | ; |
1669 | ; |
1632 | } |
1670 | } |
1633 | |
1671 | |
1634 | interrupt: |
1672 | interrupt: |
1635 | self->incr_pos = p - SvPVX (self->incr_text); |
1673 | self->incr_pos = p - SvPVX (self->incr_text); |
|
|
1674 | //printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D |
1636 | //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D |
1675 | //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D |
1637 | } |
1676 | } |
1638 | |
1677 | |
1639 | ///////////////////////////////////////////////////////////////////////////// |
1678 | ///////////////////////////////////////////////////////////////////////////// |
1640 | // XS interface functions |
1679 | // XS interface functions |