ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.4 by root, Thu Mar 22 21:13:58 2007 UTC vs.
Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC

12#define F_CANONICAL 0x00000008 12#define F_CANONICAL 0x00000008
13#define F_SPACE_BEFORE 0x00000010 13#define F_SPACE_BEFORE 0x00000010
14#define F_SPACE_AFTER 0x00000020 14#define F_SPACE_AFTER 0x00000020
15#define F_JSON_RPC 0x00000040 15#define F_JSON_RPC 0x00000040
16#define F_ALLOW_NONREF 0x00000080 16#define F_ALLOW_NONREF 0x00000080
17#define F_SHRINK 0x00000100
17 18
18#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER 19#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
19#define F_DEFAULT 0 20#define F_DEFAULT 0
20 21
21#define INIT_SIZE 32 // initial scalar size to be allocated 22#define INIT_SIZE 32 // initial scalar size to be allocated
53 croak ("object is not of type JSON::XS"); 54 croak ("object is not of type JSON::XS");
54 55
55 return &SvUVX (SvRV (sv)); 56 return &SvUVX (SvRV (sv));
56} 57}
57 58
59static void
60shrink (SV *sv)
61{
62 sv_utf8_downgrade (sv, 1);
63#ifdef SvPV_shrink_to_cur
64 SvPV_shrink_to_cur (sv);
65#endif
66}
67
58///////////////////////////////////////////////////////////////////////////// 68/////////////////////////////////////////////////////////////////////////////
59 69
60static void 70static void
61need (enc_t *enc, STRLEN len) 71need (enc_t *enc, STRLEN len)
62{ 72{
85 95
86 while (str < end) 96 while (str < end)
87 { 97 {
88 unsigned char ch = *(unsigned char *)str; 98 unsigned char ch = *(unsigned char *)str;
89 99
90 if (ch == '"') 100 if (ch >= 0x20 && ch < 0x80) // most common case
91 { 101 {
102 if (ch == '"') // but with slow exceptions
103 {
92 need (enc, len += 1); 104 need (enc, len += 1);
93 *enc->cur++ = '\\'; 105 *enc->cur++ = '\\';
94 *enc->cur++ = '"'; 106 *enc->cur++ = '"';
95 ++str;
96 } 107 }
97 else if (ch == '\\') 108 else if (ch == '\\')
98 { 109 {
99 need (enc, len += 1); 110 need (enc, len += 1);
100 *enc->cur++ = '\\'; 111 *enc->cur++ = '\\';
101 *enc->cur++ = '\\'; 112 *enc->cur++ = '\\';
102 ++str;
103 } 113 }
104 else if (ch >= 0x20 && ch < 0x80) // most common case 114 else
105 {
106 *enc->cur++ = ch; 115 *enc->cur++ = ch;
107 ++str; 116
108 }
109 else if (ch == '\015')
110 {
111 need (enc, len += 1);
112 *enc->cur++ = '\\';
113 *enc->cur++ = 'r';
114 ++str;
115 }
116 else if (ch == '\012')
117 {
118 need (enc, len += 1);
119 *enc->cur++ = '\\';
120 *enc->cur++ = 'n';
121 ++str; 117 ++str;
122 } 118 }
123 else 119 else
124 { 120 {
125 STRLEN clen; 121 switch (ch)
126 UV uch;
127
128 if (is_utf8)
129 { 122 {
130 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); 123 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
131 if (clen < 0) 124 case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
132 croak ("malformed UTF-8 character in string, cannot convert to JSON"); 125 case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
133 } 126 case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
134 else 127 case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
135 {
136 uch = ch;
137 clen = 1;
138 }
139 128
140 if (uch < 0x80 || enc->flags & F_ASCII) 129 default:
141 {
142 if (uch > 0xFFFFUL)
143 { 130 {
131 STRLEN clen;
132 UV uch;
133
134 if (is_utf8)
135 {
136 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
137 if (clen == (STRLEN)-1)
138 croak ("malformed UTF-8 character in string, cannot convert to JSON");
139 }
140 else
141 {
142 uch = ch;
143 clen = 1;
144 }
145
146 if (uch < 0x80 || enc->flags & F_ASCII)
147 {
148 if (uch > 0xFFFFUL)
149 {
144 need (enc, len += 11); 150 need (enc, len += 11);
145 sprintf (enc->cur, "\\u%04x\\u%04x", 151 sprintf (enc->cur, "\\u%04x\\u%04x",
146 (uch - 0x10000) / 0x400 + 0xD800, 152 (uch - 0x10000) / 0x400 + 0xD800,
147 (uch - 0x10000) % 0x400 + 0xDC00); 153 (uch - 0x10000) % 0x400 + 0xDC00);
148 enc->cur += 12; 154 enc->cur += 12;
155 }
156 else
157 {
158 static char hexdigit [16] = "0123456789abcdef";
159 need (enc, len += 5);
160 *enc->cur++ = '\\';
161 *enc->cur++ = 'u';
162 *enc->cur++ = hexdigit [ uch >> 12 ];
163 *enc->cur++ = hexdigit [(uch >> 8) & 15];
164 *enc->cur++ = hexdigit [(uch >> 4) & 15];
165 *enc->cur++ = hexdigit [(uch >> 0) & 15];
166 }
167
168 str += clen;
169 }
170 else if (is_utf8)
171 {
172 need (enc, len += clen);
173 do
174 {
175 *enc->cur++ = *str++;
176 }
177 while (--clen);
178 }
179 else
180 {
181 need (enc, len += 10); // never more than 11 bytes needed
182 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
183 ++str;
184 }
149 } 185 }
150 else
151 {
152 static char hexdigit [16] = "0123456789abcdef";
153 need (enc, len += 5);
154 *enc->cur++ = '\\';
155 *enc->cur++ = 'u';
156 *enc->cur++ = hexdigit [ uch >> 12 ];
157 *enc->cur++ = hexdigit [(uch >> 8) & 15];
158 *enc->cur++ = hexdigit [(uch >> 4) & 15];
159 *enc->cur++ = hexdigit [(uch >> 0) & 15];
160 }
161
162 str += clen;
163 }
164 else if (is_utf8)
165 {
166 need (enc, len += clen);
167 while (clen--)
168 *enc->cur++ = *str++;
169 }
170 else
171 {
172 need (enc, 10); // never more than 11 bytes needed
173 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
174 ++str;
175 } 186 }
176 } 187 }
177 188
178 --len; 189 --len;
179 } 190 }
422 433
423 if (!(flags & (F_ASCII | F_UTF8))) 434 if (!(flags & (F_ASCII | F_UTF8)))
424 SvUTF8_on (enc.sv); 435 SvUTF8_on (enc.sv);
425 436
426 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 437 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
438
439 if (enc.flags & F_SHRINK)
440 shrink (enc.sv);
441
427 return enc.sv; 442 return enc.sv;
428} 443}
429 444
430///////////////////////////////////////////////////////////////////////////// 445/////////////////////////////////////////////////////////////////////////////
431 446
529 544
530 // possibly a surrogate pair 545 // possibly a surrogate pair
531 if (hi >= 0xd800 && hi < 0xdc00) 546 if (hi >= 0xd800 && hi < 0xdc00)
532 { 547 {
533 if (dec->cur [0] != '\\' || dec->cur [1] != 'u') 548 if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
534 ERR ("illegal surrogate character"); 549 ERR ("missing low surrogate character in surrogate pair");
535 550
536 dec->cur += 2; 551 dec->cur += 2;
537 552
538 lo = decode_4hex (dec); 553 lo = decode_4hex (dec);
539 if (lo == (UV)-1) 554 if (lo == (UV)-1)
542 if (lo < 0xdc00 || lo >= 0xe000) 557 if (lo < 0xdc00 || lo >= 0xe000)
543 ERR ("surrogate pair expected"); 558 ERR ("surrogate pair expected");
544 559
545 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; 560 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
546 } 561 }
547 else if (lo >= 0xdc00 && lo < 0xe000) 562 else if (hi >= 0xdc00 && hi < 0xe000)
548 ERR ("illegal surrogate character"); 563 ERR ("missing high surrogate character in surrogate pair");
549 564
550 if (hi >= 0x80) 565 if (hi >= 0x80)
551 { 566 {
552 utf8 = 1; 567 utf8 = 1;
553 568
556 } 571 }
557 else 572 else
558 APPEND_CH (hi); 573 APPEND_CH (hi);
559 } 574 }
560 break; 575 break;
576
577 default:
578 --dec->cur;
579 ERR ("illegal backslash escape sequence in string");
561 } 580 }
562 } 581 }
563 else if (ch >= 0x20 && ch <= 0x7f) 582 else if (ch >= 0x20 && ch <= 0x7f)
564 APPEND_CH (*dec->cur++); 583 APPEND_CH (*dec->cur++);
565 else if (ch >= 0x80) 584 else if (ch >= 0x80)
566 { 585 {
567 STRLEN clen; 586 STRLEN clen;
568 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); 587 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
569 if (clen < 0) 588 if (clen == (STRLEN)-1)
570 ERR ("malformed UTF-8 character in string, cannot convert to JSON"); 589 ERR ("malformed UTF-8 character in JSON string");
571 590
572 APPEND_GROW (clen); 591 APPEND_GROW (clen);
573 memcpy (cur, dec->cur, clen); 592 do
574 cur += clen; 593 {
575 dec->cur += clen; 594 *cur++ = *dec->cur++;
595 }
596 while (--clen);
597
598 utf8 = 1;
576 } 599 }
600 else if (dec->cur == dec->end)
601 ERR ("unexpected end of string while parsing json string");
577 else 602 else
578 ERR ("invalid character encountered"); 603 ERR ("invalid character encountered");
579 } 604 }
580 605
581 ++dec->cur; 606 ++dec->cur;
585 SvPOK_only (sv); 610 SvPOK_only (sv);
586 *SvEND (sv) = 0; 611 *SvEND (sv) = 0;
587 612
588 if (utf8) 613 if (utf8)
589 SvUTF8_on (sv); 614 SvUTF8_on (sv);
615
616 if (dec->flags & F_SHRINK)
617 shrink (sv);
590 618
591 return sv; 619 return sv;
592 620
593fail: 621fail:
594 SvREFCNT_dec (sv); 622 SvREFCNT_dec (sv);
609 { 637 {
610 ++dec->cur; 638 ++dec->cur;
611 if (*dec->cur >= '0' && *dec->cur <= '9') 639 if (*dec->cur >= '0' && *dec->cur <= '9')
612 ERR ("malformed number (leading zero must not be followed by another digit)"); 640 ERR ("malformed number (leading zero must not be followed by another digit)");
613 } 641 }
614 642 else if (*dec->cur < '0' || *dec->cur > '9')
615 // int 643 ERR ("malformed number (no digits after initial minus)");
644 else
645 do
646 {
647 ++dec->cur;
648 }
616 while (*dec->cur >= '0' && *dec->cur <= '9') 649 while (*dec->cur >= '0' && *dec->cur <= '9');
617 ++dec->cur;
618 650
619 // [frac] 651 // [frac]
620 if (*dec->cur == '.') 652 if (*dec->cur == '.')
621 { 653 {
622 is_nv = 1; 654 ++dec->cur;
655
656 if (*dec->cur < '0' || *dec->cur > '9')
657 ERR ("malformed number (no digits after decimal point)");
623 658
624 do 659 do
625 { 660 {
626 ++dec->cur; 661 ++dec->cur;
627 } 662 }
628 while (*dec->cur >= '0' && *dec->cur <= '9'); 663 while (*dec->cur >= '0' && *dec->cur <= '9');
664
665 is_nv = 1;
629 } 666 }
630 667
631 // [exp] 668 // [exp]
632 if (*dec->cur == 'e' || *dec->cur == 'E') 669 if (*dec->cur == 'e' || *dec->cur == 'E')
633 { 670 {
634 is_nv = 1;
635
636 ++dec->cur; 671 ++dec->cur;
672
637 if (*dec->cur == '-' || *dec->cur == '+') 673 if (*dec->cur == '-' || *dec->cur == '+')
638 ++dec->cur; 674 ++dec->cur;
639 675
676 if (*dec->cur < '0' || *dec->cur > '9')
677 ERR ("malformed number (no digits after exp sign)");
678
679 do
680 {
681 ++dec->cur;
682 }
640 while (*dec->cur >= '0' && *dec->cur <= '9') 683 while (*dec->cur >= '0' && *dec->cur <= '9');
641 ++dec->cur; 684
685 is_nv = 1;
642 } 686 }
643 687
644 if (!is_nv) 688 if (!is_nv)
645 { 689 {
646 UV uv; 690 UV uv;
664static SV * 708static SV *
665decode_av (dec_t *dec) 709decode_av (dec_t *dec)
666{ 710{
667 AV *av = newAV (); 711 AV *av = newAV ();
668 712
713 WS;
714 if (*dec->cur == ']')
715 ++dec->cur;
716 else
669 for (;;) 717 for (;;)
670 { 718 {
671 SV *value; 719 SV *value;
672 720
673 value = decode_sv (dec); 721 value = decode_sv (dec);
674 if (!value) 722 if (!value)
675 goto fail; 723 goto fail;
676 724
677 av_push (av, value); 725 av_push (av, value);
678 726
679 WS; 727 WS;
680 728
681 if (*dec->cur == ']') 729 if (*dec->cur == ']')
682 { 730 {
683 ++dec->cur; 731 ++dec->cur;
684 break; 732 break;
733 }
685 } 734
686
687 if (*dec->cur != ',') 735 if (*dec->cur != ',')
688 ERR (", or ] expected while parsing array"); 736 ERR (", or ] expected while parsing array");
689 737
690 ++dec->cur; 738 ++dec->cur;
691 } 739 }
692 740
693 return newRV_noinc ((SV *)av); 741 return newRV_noinc ((SV *)av);
694 742
695fail: 743fail:
696 SvREFCNT_dec (av); 744 SvREFCNT_dec (av);
700static SV * 748static SV *
701decode_hv (dec_t *dec) 749decode_hv (dec_t *dec)
702{ 750{
703 HV *hv = newHV (); 751 HV *hv = newHV ();
704 752
753 WS;
754 if (*dec->cur == '}')
755 ++dec->cur;
756 else
705 for (;;) 757 for (;;)
706 { 758 {
707 SV *key, *value; 759 SV *key, *value;
708 760
709 WS; EXPECT_CH ('"'); 761 WS; EXPECT_CH ('"');
710 762
711 key = decode_str (dec); 763 key = decode_str (dec);
712 if (!key) 764 if (!key)
713 goto fail;
714
715 WS; EXPECT_CH (':');
716
717 value = decode_sv (dec);
718 if (!value)
719 {
720 SvREFCNT_dec (key);
721 goto fail; 765 goto fail;
766
767 WS; EXPECT_CH (':');
768
769 value = decode_sv (dec);
770 if (!value)
771 {
772 SvREFCNT_dec (key);
773 goto fail;
722 } 774 }
723 775
724 //TODO: optimise 776 //TODO: optimise
725 hv_store_ent (hv, key, value, 0); 777 hv_store_ent (hv, key, value, 0);
726 778
727 WS; 779 WS;
728 780
729 if (*dec->cur == '}') 781 if (*dec->cur == '}')
730 { 782 {
731 ++dec->cur; 783 ++dec->cur;
732 break; 784 break;
733 } 785 }
734 786
735 if (*dec->cur != ',') 787 if (*dec->cur != ',')
736 ERR (", or } expected while parsing object/hash"); 788 ERR (", or } expected while parsing object/hash");
737 789
738 ++dec->cur; 790 ++dec->cur;
739 } 791 }
740 792
741 return newRV_noinc ((SV *)hv); 793 return newRV_noinc ((SV *)hv);
742 794
743fail: 795fail:
744 SvREFCNT_dec (hv); 796 SvREFCNT_dec (hv);
784 836
785 case 'n': 837 case 'n':
786 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) 838 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
787 { 839 {
788 dec->cur += 4; 840 dec->cur += 4;
789 return newSViv (1); 841 return newSVsv (&PL_sv_undef);
790 } 842 }
791 else 843 else
792 ERR ("'null' expected"); 844 ERR ("'null' expected");
793 845
794 break; 846 break;
795 847
796 default: 848 default:
797 ERR ("malformed json string"); 849 ERR ("malformed json string, neither array, object, number, string or atom");
798 break; 850 break;
799 } 851 }
800 852
801fail: 853fail:
802 return 0; 854 return 0;
805static SV * 857static SV *
806decode_json (SV *string, UV flags) 858decode_json (SV *string, UV flags)
807{ 859{
808 SV *sv; 860 SV *sv;
809 861
810 if (!(flags & F_UTF8)) 862 if (flags & F_UTF8)
863 sv_utf8_downgrade (string, 0);
864 else
811 sv_utf8_upgrade (string); 865 sv_utf8_upgrade (string);
812 866
813 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP 867 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
814 868
815 dec_t dec; 869 dec_t dec;
816 dec.flags = flags; 870 dec.flags = flags;
817 dec.cur = SvPVX (string); 871 dec.cur = SvPVX (string);
818 dec.end = SvEND (string); 872 dec.end = SvEND (string);
819 dec.err = 0; 873 dec.err = 0;
820 874
821 *dec.end = 1; // invalid anywhere
822 sv = decode_sv (&dec); 875 sv = decode_sv (&dec);
823 *dec.end = 0;
824 876
825 if (!sv) 877 if (!sv)
826 { 878 {
879 IV offset = dec.flags & F_UTF8
880 ? dec.cur - SvPVX (string)
827 IV offset = utf8_distance (dec.cur, SvPVX (string)); 881 : utf8_distance (dec.cur, SvPVX (string));
828 SV *uni = sv_newmortal (); 882 SV *uni = sv_newmortal ();
883 // horrible hack to silence warning inside pv_uni_display
884 COP cop;
885 memset (&cop, 0, sizeof (cop));
886 cop.cop_warnings = pWARN_NONE;
887 SAVEVPTR (PL_curcop);
888 PL_curcop = &cop;
829 889
830 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 890 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
831 croak ("%s, at character %d (%s)", 891 croak ("%s, at character offset %d (%s)",
832 dec.err, 892 dec.err,
833 (int)offset, 893 (int)offset,
834 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 894 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
835 } 895 }
836 896
867 CODE: 927 CODE:
868 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash); 928 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
869 OUTPUT: 929 OUTPUT:
870 RETVAL 930 RETVAL
871 931
872SV *ascii (SV *self, int enable) 932SV *ascii (SV *self, int enable = 1)
873 ALIAS: 933 ALIAS:
874 ascii = F_ASCII 934 ascii = F_ASCII
875 utf8 = F_UTF8 935 utf8 = F_UTF8
876 indent = F_INDENT 936 indent = F_INDENT
877 canonical = F_CANONICAL 937 canonical = F_CANONICAL
878 space_before = F_SPACE_BEFORE 938 space_before = F_SPACE_BEFORE
879 space_after = F_SPACE_AFTER 939 space_after = F_SPACE_AFTER
880 json_rpc = F_JSON_RPC 940 json_rpc = F_JSON_RPC
881 pretty = F_PRETTY 941 pretty = F_PRETTY
882 allow_nonref = F_ALLOW_NONREF 942 allow_nonref = F_ALLOW_NONREF
943 shrink = F_SHRINK
883 CODE: 944 CODE:
884{ 945{
885 UV *uv = SvJSON (self); 946 UV *uv = SvJSON (self);
886 if (enable) 947 if (enable)
887 *uv |= ix; 948 *uv |= ix;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines