ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.4 by root, Thu Mar 22 21:13:58 2007 UTC vs.
Revision 1.6 by root, Fri Mar 23 15:10:55 2007 UTC

12#define F_CANONICAL 0x00000008 12#define F_CANONICAL 0x00000008
13#define F_SPACE_BEFORE 0x00000010 13#define F_SPACE_BEFORE 0x00000010
14#define F_SPACE_AFTER 0x00000020 14#define F_SPACE_AFTER 0x00000020
15#define F_JSON_RPC 0x00000040 15#define F_JSON_RPC 0x00000040
16#define F_ALLOW_NONREF 0x00000080 16#define F_ALLOW_NONREF 0x00000080
17#define F_SHRINK 0x00000100
17 18
18#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER 19#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
19#define F_DEFAULT 0 20#define F_DEFAULT 0
20 21
21#define INIT_SIZE 32 // initial scalar size to be allocated 22#define INIT_SIZE 32 // initial scalar size to be allocated
85 86
86 while (str < end) 87 while (str < end)
87 { 88 {
88 unsigned char ch = *(unsigned char *)str; 89 unsigned char ch = *(unsigned char *)str;
89 90
90 if (ch == '"') 91 if (ch >= 0x20 && ch < 0x80) // most common case
91 { 92 {
93 if (ch == '"') // but with slow exceptions
94 {
92 need (enc, len += 1); 95 need (enc, len += 1);
93 *enc->cur++ = '\\'; 96 *enc->cur++ = '\\';
94 *enc->cur++ = '"'; 97 *enc->cur++ = '"';
95 ++str;
96 } 98 }
97 else if (ch == '\\') 99 else if (ch == '\\')
98 { 100 {
99 need (enc, len += 1); 101 need (enc, len += 1);
100 *enc->cur++ = '\\'; 102 *enc->cur++ = '\\';
101 *enc->cur++ = '\\'; 103 *enc->cur++ = '\\';
102 ++str;
103 } 104 }
104 else if (ch >= 0x20 && ch < 0x80) // most common case 105 else
105 {
106 *enc->cur++ = ch; 106 *enc->cur++ = ch;
107 ++str; 107
108 }
109 else if (ch == '\015')
110 {
111 need (enc, len += 1);
112 *enc->cur++ = '\\';
113 *enc->cur++ = 'r';
114 ++str;
115 }
116 else if (ch == '\012')
117 {
118 need (enc, len += 1);
119 *enc->cur++ = '\\';
120 *enc->cur++ = 'n';
121 ++str; 108 ++str;
122 } 109 }
123 else 110 else
124 { 111 {
125 STRLEN clen; 112 switch (ch)
126 UV uch;
127
128 if (is_utf8)
129 { 113 {
130 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); 114 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
131 if (clen < 0) 115 case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
132 croak ("malformed UTF-8 character in string, cannot convert to JSON"); 116 case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
133 } 117 case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
134 else 118 case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
135 {
136 uch = ch;
137 clen = 1;
138 }
139 119
140 if (uch < 0x80 || enc->flags & F_ASCII) 120 default:
141 {
142 if (uch > 0xFFFFUL)
143 { 121 {
122 STRLEN clen;
123 UV uch;
124
125 if (is_utf8)
126 {
127 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
128 if (clen == (STRLEN)-1)
129 croak ("malformed UTF-8 character in string, cannot convert to JSON");
130 }
131 else
132 {
133 uch = ch;
134 clen = 1;
135 }
136
137 if (uch < 0x80 || enc->flags & F_ASCII)
138 {
139 if (uch > 0xFFFFUL)
140 {
144 need (enc, len += 11); 141 need (enc, len += 11);
145 sprintf (enc->cur, "\\u%04x\\u%04x", 142 sprintf (enc->cur, "\\u%04x\\u%04x",
146 (uch - 0x10000) / 0x400 + 0xD800, 143 (uch - 0x10000) / 0x400 + 0xD800,
147 (uch - 0x10000) % 0x400 + 0xDC00); 144 (uch - 0x10000) % 0x400 + 0xDC00);
148 enc->cur += 12; 145 enc->cur += 12;
146 }
147 else
148 {
149 static char hexdigit [16] = "0123456789abcdef";
150 need (enc, len += 5);
151 *enc->cur++ = '\\';
152 *enc->cur++ = 'u';
153 *enc->cur++ = hexdigit [ uch >> 12 ];
154 *enc->cur++ = hexdigit [(uch >> 8) & 15];
155 *enc->cur++ = hexdigit [(uch >> 4) & 15];
156 *enc->cur++ = hexdigit [(uch >> 0) & 15];
157 }
158
159 str += clen;
160 }
161 else if (is_utf8)
162 {
163 need (enc, len += clen);
164 do
165 {
166 *enc->cur++ = *str++;
167 }
168 while (--clen);
169 }
170 else
171 {
172 need (enc, 10); // never more than 11 bytes needed
173 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
174 ++str;
175 }
149 } 176 }
150 else
151 {
152 static char hexdigit [16] = "0123456789abcdef";
153 need (enc, len += 5);
154 *enc->cur++ = '\\';
155 *enc->cur++ = 'u';
156 *enc->cur++ = hexdigit [ uch >> 12 ];
157 *enc->cur++ = hexdigit [(uch >> 8) & 15];
158 *enc->cur++ = hexdigit [(uch >> 4) & 15];
159 *enc->cur++ = hexdigit [(uch >> 0) & 15];
160 }
161
162 str += clen;
163 }
164 else if (is_utf8)
165 {
166 need (enc, len += clen);
167 while (clen--)
168 *enc->cur++ = *str++;
169 }
170 else
171 {
172 need (enc, 10); // never more than 11 bytes needed
173 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
174 ++str;
175 } 177 }
176 } 178 }
177 179
178 --len; 180 --len;
179 } 181 }
422 424
423 if (!(flags & (F_ASCII | F_UTF8))) 425 if (!(flags & (F_ASCII | F_UTF8)))
424 SvUTF8_on (enc.sv); 426 SvUTF8_on (enc.sv);
425 427
426 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 428 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
429
430#ifdef SvPV_shrink_to_cur
431 if (enc.flags & F_SHRINK)
432 SvPV_shrink_to_cur (enc.sv);
433#endif
427 return enc.sv; 434 return enc.sv;
428} 435}
429 436
430///////////////////////////////////////////////////////////////////////////// 437/////////////////////////////////////////////////////////////////////////////
431 438
529 536
530 // possibly a surrogate pair 537 // possibly a surrogate pair
531 if (hi >= 0xd800 && hi < 0xdc00) 538 if (hi >= 0xd800 && hi < 0xdc00)
532 { 539 {
533 if (dec->cur [0] != '\\' || dec->cur [1] != 'u') 540 if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
534 ERR ("illegal surrogate character"); 541 ERR ("missing low surrogate character in surrogate pair");
535 542
536 dec->cur += 2; 543 dec->cur += 2;
537 544
538 lo = decode_4hex (dec); 545 lo = decode_4hex (dec);
539 if (lo == (UV)-1) 546 if (lo == (UV)-1)
542 if (lo < 0xdc00 || lo >= 0xe000) 549 if (lo < 0xdc00 || lo >= 0xe000)
543 ERR ("surrogate pair expected"); 550 ERR ("surrogate pair expected");
544 551
545 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; 552 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
546 } 553 }
547 else if (lo >= 0xdc00 && lo < 0xe000) 554 else if (hi >= 0xdc00 && hi < 0xe000)
548 ERR ("illegal surrogate character"); 555 ERR ("missing high surrogate character in surrogate pair");
549 556
550 if (hi >= 0x80) 557 if (hi >= 0x80)
551 { 558 {
552 utf8 = 1; 559 utf8 = 1;
553 560
556 } 563 }
557 else 564 else
558 APPEND_CH (hi); 565 APPEND_CH (hi);
559 } 566 }
560 break; 567 break;
568
569 default:
570 --dec->cur;
571 ERR ("illegal backslash escape sequence in string");
561 } 572 }
562 } 573 }
563 else if (ch >= 0x20 && ch <= 0x7f) 574 else if (ch >= 0x20 && ch <= 0x7f)
564 APPEND_CH (*dec->cur++); 575 APPEND_CH (*dec->cur++);
565 else if (ch >= 0x80) 576 else if (ch >= 0x80)
566 { 577 {
567 STRLEN clen; 578 STRLEN clen;
568 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); 579 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
569 if (clen < 0) 580 if (clen == (STRLEN)-1)
570 ERR ("malformed UTF-8 character in string, cannot convert to JSON"); 581 ERR ("malformed UTF-8 character in string, cannot convert to JSON");
571 582
572 APPEND_GROW (clen); 583 APPEND_GROW (clen);
573 memcpy (cur, dec->cur, clen); 584 do
574 cur += clen; 585 {
575 dec->cur += clen; 586 *cur++ = *dec->cur++;
587 }
588 while (--clen);
589
590 utf8 = 1;
576 } 591 }
592 else if (dec->cur == dec->end)
593 ERR ("unexpected end of string while parsing json string");
577 else 594 else
578 ERR ("invalid character encountered"); 595 ERR ("invalid character encountered");
579 } 596 }
580 597
581 ++dec->cur; 598 ++dec->cur;
585 SvPOK_only (sv); 602 SvPOK_only (sv);
586 *SvEND (sv) = 0; 603 *SvEND (sv) = 0;
587 604
588 if (utf8) 605 if (utf8)
589 SvUTF8_on (sv); 606 SvUTF8_on (sv);
607
608#ifdef SvPV_shrink_to_cur
609 if (dec->flags & F_SHRINK)
610 SvPV_shrink_to_cur (sv);
611#endif
590 612
591 return sv; 613 return sv;
592 614
593fail: 615fail:
594 SvREFCNT_dec (sv); 616 SvREFCNT_dec (sv);
609 { 631 {
610 ++dec->cur; 632 ++dec->cur;
611 if (*dec->cur >= '0' && *dec->cur <= '9') 633 if (*dec->cur >= '0' && *dec->cur <= '9')
612 ERR ("malformed number (leading zero must not be followed by another digit)"); 634 ERR ("malformed number (leading zero must not be followed by another digit)");
613 } 635 }
614 636 else if (*dec->cur < '0' || *dec->cur > '9')
615 // int 637 ERR ("malformed number (no digits after initial minus)");
638 else
639 do
640 {
641 ++dec->cur;
642 }
616 while (*dec->cur >= '0' && *dec->cur <= '9') 643 while (*dec->cur >= '0' && *dec->cur <= '9');
617 ++dec->cur;
618 644
619 // [frac] 645 // [frac]
620 if (*dec->cur == '.') 646 if (*dec->cur == '.')
621 { 647 {
622 is_nv = 1; 648 ++dec->cur;
649
650 if (*dec->cur < '0' || *dec->cur > '9')
651 ERR ("malformed number (no digits after decimal point)");
623 652
624 do 653 do
625 { 654 {
626 ++dec->cur; 655 ++dec->cur;
627 } 656 }
628 while (*dec->cur >= '0' && *dec->cur <= '9'); 657 while (*dec->cur >= '0' && *dec->cur <= '9');
658
659 is_nv = 1;
629 } 660 }
630 661
631 // [exp] 662 // [exp]
632 if (*dec->cur == 'e' || *dec->cur == 'E') 663 if (*dec->cur == 'e' || *dec->cur == 'E')
633 { 664 {
634 is_nv = 1;
635
636 ++dec->cur; 665 ++dec->cur;
666
637 if (*dec->cur == '-' || *dec->cur == '+') 667 if (*dec->cur == '-' || *dec->cur == '+')
638 ++dec->cur; 668 ++dec->cur;
639 669
670 if (*dec->cur < '0' || *dec->cur > '9')
671 ERR ("malformed number (no digits after exp sign)");
672
673 do
674 {
675 ++dec->cur;
676 }
640 while (*dec->cur >= '0' && *dec->cur <= '9') 677 while (*dec->cur >= '0' && *dec->cur <= '9');
641 ++dec->cur; 678
679 is_nv = 1;
642 } 680 }
643 681
644 if (!is_nv) 682 if (!is_nv)
645 { 683 {
646 UV uv; 684 UV uv;
664static SV * 702static SV *
665decode_av (dec_t *dec) 703decode_av (dec_t *dec)
666{ 704{
667 AV *av = newAV (); 705 AV *av = newAV ();
668 706
707 WS;
708 if (*dec->cur == ']')
709 ++dec->cur;
710 else
669 for (;;) 711 for (;;)
670 { 712 {
671 SV *value; 713 SV *value;
672 714
673 value = decode_sv (dec); 715 value = decode_sv (dec);
674 if (!value) 716 if (!value)
675 goto fail; 717 goto fail;
676 718
677 av_push (av, value); 719 av_push (av, value);
678 720
679 WS; 721 WS;
680 722
681 if (*dec->cur == ']') 723 if (*dec->cur == ']')
682 { 724 {
683 ++dec->cur; 725 ++dec->cur;
684 break; 726 break;
727 }
685 } 728
686
687 if (*dec->cur != ',') 729 if (*dec->cur != ',')
688 ERR (", or ] expected while parsing array"); 730 ERR (", or ] expected while parsing array");
689 731
690 ++dec->cur; 732 ++dec->cur;
691 } 733 }
692 734
693 return newRV_noinc ((SV *)av); 735 return newRV_noinc ((SV *)av);
694 736
695fail: 737fail:
696 SvREFCNT_dec (av); 738 SvREFCNT_dec (av);
700static SV * 742static SV *
701decode_hv (dec_t *dec) 743decode_hv (dec_t *dec)
702{ 744{
703 HV *hv = newHV (); 745 HV *hv = newHV ();
704 746
747 WS;
748 if (*dec->cur == '}')
749 ++dec->cur;
750 else
705 for (;;) 751 for (;;)
706 { 752 {
707 SV *key, *value; 753 SV *key, *value;
708 754
709 WS; EXPECT_CH ('"'); 755 WS; EXPECT_CH ('"');
710 756
711 key = decode_str (dec); 757 key = decode_str (dec);
712 if (!key) 758 if (!key)
713 goto fail;
714
715 WS; EXPECT_CH (':');
716
717 value = decode_sv (dec);
718 if (!value)
719 {
720 SvREFCNT_dec (key);
721 goto fail; 759 goto fail;
760
761 WS; EXPECT_CH (':');
762
763 value = decode_sv (dec);
764 if (!value)
765 {
766 SvREFCNT_dec (key);
767 goto fail;
722 } 768 }
723 769
724 //TODO: optimise 770 //TODO: optimise
725 hv_store_ent (hv, key, value, 0); 771 hv_store_ent (hv, key, value, 0);
726 772
727 WS; 773 WS;
728 774
729 if (*dec->cur == '}') 775 if (*dec->cur == '}')
730 { 776 {
731 ++dec->cur; 777 ++dec->cur;
732 break; 778 break;
733 } 779 }
734 780
735 if (*dec->cur != ',') 781 if (*dec->cur != ',')
736 ERR (", or } expected while parsing object/hash"); 782 ERR (", or } expected while parsing object/hash");
737 783
738 ++dec->cur; 784 ++dec->cur;
739 } 785 }
740 786
741 return newRV_noinc ((SV *)hv); 787 return newRV_noinc ((SV *)hv);
742 788
743fail: 789fail:
744 SvREFCNT_dec (hv); 790 SvREFCNT_dec (hv);
784 830
785 case 'n': 831 case 'n':
786 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) 832 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
787 { 833 {
788 dec->cur += 4; 834 dec->cur += 4;
789 return newSViv (1); 835 return newSVsv (&PL_sv_undef);
790 } 836 }
791 else 837 else
792 ERR ("'null' expected"); 838 ERR ("'null' expected");
793 839
794 break; 840 break;
805static SV * 851static SV *
806decode_json (SV *string, UV flags) 852decode_json (SV *string, UV flags)
807{ 853{
808 SV *sv; 854 SV *sv;
809 855
810 if (!(flags & F_UTF8)) 856 if (flags & F_UTF8)
857 sv_utf8_downgrade (string, 0);
858 else
811 sv_utf8_upgrade (string); 859 sv_utf8_upgrade (string);
812 860
813 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP 861 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
814 862
815 dec_t dec; 863 dec_t dec;
816 dec.flags = flags; 864 dec.flags = flags;
817 dec.cur = SvPVX (string); 865 dec.cur = SvPVX (string);
818 dec.end = SvEND (string); 866 dec.end = SvEND (string);
819 dec.err = 0; 867 dec.err = 0;
820 868
821 *dec.end = 1; // invalid anywhere
822 sv = decode_sv (&dec); 869 sv = decode_sv (&dec);
823 *dec.end = 0;
824 870
825 if (!sv) 871 if (!sv)
826 { 872 {
827 IV offset = utf8_distance (dec.cur, SvPVX (string)); 873 IV offset = utf8_distance (dec.cur, SvPVX (string));
828 SV *uni = sv_newmortal (); 874 SV *uni = sv_newmortal ();
875 // horrible hack to silence warning inside pv_uni_display
876 COP cop;
877 memset (&cop, 0, sizeof (cop));
878 cop.cop_warnings = pWARN_NONE;
879 SAVEVPTR (PL_curcop);
880 PL_curcop = &cop;
829 881
830 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 882 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
831 croak ("%s, at character %d (%s)", 883 croak ("%s, at character offset %d (%s)",
832 dec.err, 884 dec.err,
833 (int)offset, 885 (int)offset,
834 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 886 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
835 } 887 }
836 888
867 CODE: 919 CODE:
868 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash); 920 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
869 OUTPUT: 921 OUTPUT:
870 RETVAL 922 RETVAL
871 923
872SV *ascii (SV *self, int enable) 924SV *ascii (SV *self, int enable = 1)
873 ALIAS: 925 ALIAS:
874 ascii = F_ASCII 926 ascii = F_ASCII
875 utf8 = F_UTF8 927 utf8 = F_UTF8
876 indent = F_INDENT 928 indent = F_INDENT
877 canonical = F_CANONICAL 929 canonical = F_CANONICAL
878 space_before = F_SPACE_BEFORE 930 space_before = F_SPACE_BEFORE
879 space_after = F_SPACE_AFTER 931 space_after = F_SPACE_AFTER
880 json_rpc = F_JSON_RPC 932 json_rpc = F_JSON_RPC
881 pretty = F_PRETTY 933 pretty = F_PRETTY
882 allow_nonref = F_ALLOW_NONREF 934 allow_nonref = F_ALLOW_NONREF
935 shrink = F_SHRINK
883 CODE: 936 CODE:
884{ 937{
885 UV *uv = SvJSON (self); 938 UV *uv = SvJSON (self);
886 if (enable) 939 if (enable)
887 *uv |= ix; 940 *uv |= ix;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines