ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.4 by root, Thu Mar 22 21:13:58 2007 UTC vs.
Revision 1.8 by root, Fri Mar 23 16:13:59 2007 UTC

12#define F_CANONICAL 0x00000008 12#define F_CANONICAL 0x00000008
13#define F_SPACE_BEFORE 0x00000010 13#define F_SPACE_BEFORE 0x00000010
14#define F_SPACE_AFTER 0x00000020 14#define F_SPACE_AFTER 0x00000020
15#define F_JSON_RPC 0x00000040 15#define F_JSON_RPC 0x00000040
16#define F_ALLOW_NONREF 0x00000080 16#define F_ALLOW_NONREF 0x00000080
17#define F_SHRINK 0x00000100
17 18
18#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER 19#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
19#define F_DEFAULT 0 20#define F_DEFAULT 0
20 21
21#define INIT_SIZE 32 // initial scalar size to be allocated 22#define INIT_SIZE 32 // initial scalar size to be allocated
53 croak ("object is not of type JSON::XS"); 54 croak ("object is not of type JSON::XS");
54 55
55 return &SvUVX (SvRV (sv)); 56 return &SvUVX (SvRV (sv));
56} 57}
57 58
59static void
60shrink (SV *sv)
61{
62 sv_utf8_downgrade (sv, 1);
63#ifdef SvPV_shrink_to_cur
64 SvPV_shrink_to_cur (sv);
65#endif
66}
67
58///////////////////////////////////////////////////////////////////////////// 68/////////////////////////////////////////////////////////////////////////////
59 69
60static void 70static void
61need (enc_t *enc, STRLEN len) 71need (enc_t *enc, STRLEN len)
62{ 72{
85 95
86 while (str < end) 96 while (str < end)
87 { 97 {
88 unsigned char ch = *(unsigned char *)str; 98 unsigned char ch = *(unsigned char *)str;
89 99
90 if (ch == '"') 100 if (ch >= 0x20 && ch < 0x80) // most common case
91 { 101 {
102 if (ch == '"') // but with slow exceptions
103 {
92 need (enc, len += 1); 104 need (enc, len += 1);
93 *enc->cur++ = '\\'; 105 *enc->cur++ = '\\';
94 *enc->cur++ = '"'; 106 *enc->cur++ = '"';
95 ++str;
96 } 107 }
97 else if (ch == '\\') 108 else if (ch == '\\')
98 { 109 {
99 need (enc, len += 1); 110 need (enc, len += 1);
100 *enc->cur++ = '\\'; 111 *enc->cur++ = '\\';
101 *enc->cur++ = '\\'; 112 *enc->cur++ = '\\';
102 ++str;
103 } 113 }
104 else if (ch >= 0x20 && ch < 0x80) // most common case 114 else
105 {
106 *enc->cur++ = ch; 115 *enc->cur++ = ch;
107 ++str; 116
108 }
109 else if (ch == '\015')
110 {
111 need (enc, len += 1);
112 *enc->cur++ = '\\';
113 *enc->cur++ = 'r';
114 ++str;
115 }
116 else if (ch == '\012')
117 {
118 need (enc, len += 1);
119 *enc->cur++ = '\\';
120 *enc->cur++ = 'n';
121 ++str; 117 ++str;
122 } 118 }
123 else 119 else
124 { 120 {
125 STRLEN clen; 121 switch (ch)
126 UV uch;
127
128 if (is_utf8)
129 { 122 {
130 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); 123 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
131 if (clen < 0) 124 case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
132 croak ("malformed UTF-8 character in string, cannot convert to JSON"); 125 case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
133 } 126 case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
134 else 127 case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
135 {
136 uch = ch;
137 clen = 1;
138 }
139 128
140 if (uch < 0x80 || enc->flags & F_ASCII) 129 default:
141 {
142 if (uch > 0xFFFFUL)
143 { 130 {
131 STRLEN clen;
132 UV uch;
133
134 if (is_utf8)
135 {
136 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
137 if (clen == (STRLEN)-1)
138 croak ("malformed UTF-8 character in string, cannot convert to JSON");
139 }
140 else
141 {
142 uch = ch;
143 clen = 1;
144 }
145
146 if (uch < 0x80 || enc->flags & F_ASCII)
147 {
148 if (uch > 0xFFFFUL)
149 {
144 need (enc, len += 11); 150 need (enc, len += 11);
145 sprintf (enc->cur, "\\u%04x\\u%04x", 151 sprintf (enc->cur, "\\u%04x\\u%04x",
146 (uch - 0x10000) / 0x400 + 0xD800, 152 (uch - 0x10000) / 0x400 + 0xD800,
147 (uch - 0x10000) % 0x400 + 0xDC00); 153 (uch - 0x10000) % 0x400 + 0xDC00);
148 enc->cur += 12; 154 enc->cur += 12;
155 }
156 else
157 {
158 static char hexdigit [16] = "0123456789abcdef";
159 need (enc, len += 5);
160 *enc->cur++ = '\\';
161 *enc->cur++ = 'u';
162 *enc->cur++ = hexdigit [ uch >> 12 ];
163 *enc->cur++ = hexdigit [(uch >> 8) & 15];
164 *enc->cur++ = hexdigit [(uch >> 4) & 15];
165 *enc->cur++ = hexdigit [(uch >> 0) & 15];
166 }
167
168 str += clen;
169 }
170 else if (is_utf8)
171 {
172 need (enc, len += clen);
173 do
174 {
175 *enc->cur++ = *str++;
176 }
177 while (--clen);
178 }
179 else
180 {
181 need (enc, len += 10); // never more than 11 bytes needed
182 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
183 ++str;
184 }
149 } 185 }
150 else
151 {
152 static char hexdigit [16] = "0123456789abcdef";
153 need (enc, len += 5);
154 *enc->cur++ = '\\';
155 *enc->cur++ = 'u';
156 *enc->cur++ = hexdigit [ uch >> 12 ];
157 *enc->cur++ = hexdigit [(uch >> 8) & 15];
158 *enc->cur++ = hexdigit [(uch >> 4) & 15];
159 *enc->cur++ = hexdigit [(uch >> 0) & 15];
160 }
161
162 str += clen;
163 }
164 else if (is_utf8)
165 {
166 need (enc, len += clen);
167 while (clen--)
168 *enc->cur++ = *str++;
169 }
170 else
171 {
172 need (enc, 10); // never more than 11 bytes needed
173 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
174 ++str;
175 } 186 }
176 } 187 }
177 188
178 --len; 189 --len;
179 } 190 }
306 317
307 if (fast) 318 if (fast)
308 qsort (hes, count, sizeof (HE *), he_cmp_fast); 319 qsort (hes, count, sizeof (HE *), he_cmp_fast);
309 else 320 else
310 { 321 {
311 // hack to disable "use bytes" 322 // hack to forcefully disable "use bytes"
312 COP *oldcop = PL_curcop, cop; 323 COP cop = *PL_curcop;
313 cop.op_private = 0; 324 cop.op_private = 0;
325
326 ENTER;
327 SAVETMPS;
328
329 SAVEVPTR (PL_curcop);
314 PL_curcop = &cop; 330 PL_curcop = &cop;
315 331
316 SAVETMPS;
317 qsort (hes, count, sizeof (HE *), he_cmp_slow); 332 qsort (hes, count, sizeof (HE *), he_cmp_slow);
333
318 FREETMPS; 334 FREETMPS;
319 335 LEAVE;
320 PL_curcop = oldcop;
321 } 336 }
322 337
323 for (i = 0; i < count; ++i) 338 for (i = 0; i < count; ++i)
324 { 339 {
325 INDENT; 340 INDENT;
422 437
423 if (!(flags & (F_ASCII | F_UTF8))) 438 if (!(flags & (F_ASCII | F_UTF8)))
424 SvUTF8_on (enc.sv); 439 SvUTF8_on (enc.sv);
425 440
426 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 441 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
442
443 if (enc.flags & F_SHRINK)
444 shrink (enc.sv);
445
427 return enc.sv; 446 return enc.sv;
428} 447}
429 448
430///////////////////////////////////////////////////////////////////////////// 449/////////////////////////////////////////////////////////////////////////////
431 450
529 548
530 // possibly a surrogate pair 549 // possibly a surrogate pair
531 if (hi >= 0xd800 && hi < 0xdc00) 550 if (hi >= 0xd800 && hi < 0xdc00)
532 { 551 {
533 if (dec->cur [0] != '\\' || dec->cur [1] != 'u') 552 if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
534 ERR ("illegal surrogate character"); 553 ERR ("missing low surrogate character in surrogate pair");
535 554
536 dec->cur += 2; 555 dec->cur += 2;
537 556
538 lo = decode_4hex (dec); 557 lo = decode_4hex (dec);
539 if (lo == (UV)-1) 558 if (lo == (UV)-1)
542 if (lo < 0xdc00 || lo >= 0xe000) 561 if (lo < 0xdc00 || lo >= 0xe000)
543 ERR ("surrogate pair expected"); 562 ERR ("surrogate pair expected");
544 563
545 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; 564 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
546 } 565 }
547 else if (lo >= 0xdc00 && lo < 0xe000) 566 else if (hi >= 0xdc00 && hi < 0xe000)
548 ERR ("illegal surrogate character"); 567 ERR ("missing high surrogate character in surrogate pair");
549 568
550 if (hi >= 0x80) 569 if (hi >= 0x80)
551 { 570 {
552 utf8 = 1; 571 utf8 = 1;
553 572
556 } 575 }
557 else 576 else
558 APPEND_CH (hi); 577 APPEND_CH (hi);
559 } 578 }
560 break; 579 break;
580
581 default:
582 --dec->cur;
583 ERR ("illegal backslash escape sequence in string");
561 } 584 }
562 } 585 }
563 else if (ch >= 0x20 && ch <= 0x7f) 586 else if (ch >= 0x20 && ch <= 0x7f)
564 APPEND_CH (*dec->cur++); 587 APPEND_CH (*dec->cur++);
565 else if (ch >= 0x80) 588 else if (ch >= 0x80)
566 { 589 {
567 STRLEN clen; 590 STRLEN clen;
568 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); 591 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
569 if (clen < 0) 592 if (clen == (STRLEN)-1)
570 ERR ("malformed UTF-8 character in string, cannot convert to JSON"); 593 ERR ("malformed UTF-8 character in JSON string");
571 594
572 APPEND_GROW (clen); 595 APPEND_GROW (clen);
573 memcpy (cur, dec->cur, clen); 596 do
574 cur += clen; 597 {
575 dec->cur += clen; 598 *cur++ = *dec->cur++;
599 }
600 while (--clen);
601
602 utf8 = 1;
576 } 603 }
604 else if (dec->cur == dec->end)
605 ERR ("unexpected end of string while parsing json string");
577 else 606 else
578 ERR ("invalid character encountered"); 607 ERR ("invalid character encountered");
579 } 608 }
580 609
581 ++dec->cur; 610 ++dec->cur;
585 SvPOK_only (sv); 614 SvPOK_only (sv);
586 *SvEND (sv) = 0; 615 *SvEND (sv) = 0;
587 616
588 if (utf8) 617 if (utf8)
589 SvUTF8_on (sv); 618 SvUTF8_on (sv);
619
620 if (dec->flags & F_SHRINK)
621 shrink (sv);
590 622
591 return sv; 623 return sv;
592 624
593fail: 625fail:
594 SvREFCNT_dec (sv); 626 SvREFCNT_dec (sv);
609 { 641 {
610 ++dec->cur; 642 ++dec->cur;
611 if (*dec->cur >= '0' && *dec->cur <= '9') 643 if (*dec->cur >= '0' && *dec->cur <= '9')
612 ERR ("malformed number (leading zero must not be followed by another digit)"); 644 ERR ("malformed number (leading zero must not be followed by another digit)");
613 } 645 }
614 646 else if (*dec->cur < '0' || *dec->cur > '9')
615 // int 647 ERR ("malformed number (no digits after initial minus)");
648 else
649 do
650 {
651 ++dec->cur;
652 }
616 while (*dec->cur >= '0' && *dec->cur <= '9') 653 while (*dec->cur >= '0' && *dec->cur <= '9');
617 ++dec->cur;
618 654
619 // [frac] 655 // [frac]
620 if (*dec->cur == '.') 656 if (*dec->cur == '.')
621 { 657 {
622 is_nv = 1; 658 ++dec->cur;
659
660 if (*dec->cur < '0' || *dec->cur > '9')
661 ERR ("malformed number (no digits after decimal point)");
623 662
624 do 663 do
625 { 664 {
626 ++dec->cur; 665 ++dec->cur;
627 } 666 }
628 while (*dec->cur >= '0' && *dec->cur <= '9'); 667 while (*dec->cur >= '0' && *dec->cur <= '9');
668
669 is_nv = 1;
629 } 670 }
630 671
631 // [exp] 672 // [exp]
632 if (*dec->cur == 'e' || *dec->cur == 'E') 673 if (*dec->cur == 'e' || *dec->cur == 'E')
633 { 674 {
634 is_nv = 1;
635
636 ++dec->cur; 675 ++dec->cur;
676
637 if (*dec->cur == '-' || *dec->cur == '+') 677 if (*dec->cur == '-' || *dec->cur == '+')
638 ++dec->cur; 678 ++dec->cur;
639 679
680 if (*dec->cur < '0' || *dec->cur > '9')
681 ERR ("malformed number (no digits after exp sign)");
682
683 do
684 {
685 ++dec->cur;
686 }
640 while (*dec->cur >= '0' && *dec->cur <= '9') 687 while (*dec->cur >= '0' && *dec->cur <= '9');
641 ++dec->cur; 688
689 is_nv = 1;
642 } 690 }
643 691
644 if (!is_nv) 692 if (!is_nv)
645 { 693 {
646 UV uv; 694 UV uv;
664static SV * 712static SV *
665decode_av (dec_t *dec) 713decode_av (dec_t *dec)
666{ 714{
667 AV *av = newAV (); 715 AV *av = newAV ();
668 716
717 WS;
718 if (*dec->cur == ']')
719 ++dec->cur;
720 else
669 for (;;) 721 for (;;)
670 { 722 {
671 SV *value; 723 SV *value;
672 724
673 value = decode_sv (dec); 725 value = decode_sv (dec);
674 if (!value) 726 if (!value)
675 goto fail; 727 goto fail;
676 728
677 av_push (av, value); 729 av_push (av, value);
678 730
679 WS; 731 WS;
680 732
681 if (*dec->cur == ']') 733 if (*dec->cur == ']')
682 { 734 {
683 ++dec->cur; 735 ++dec->cur;
684 break; 736 break;
737 }
685 } 738
686
687 if (*dec->cur != ',') 739 if (*dec->cur != ',')
688 ERR (", or ] expected while parsing array"); 740 ERR (", or ] expected while parsing array");
689 741
690 ++dec->cur; 742 ++dec->cur;
691 } 743 }
692 744
693 return newRV_noinc ((SV *)av); 745 return newRV_noinc ((SV *)av);
694 746
695fail: 747fail:
696 SvREFCNT_dec (av); 748 SvREFCNT_dec (av);
700static SV * 752static SV *
701decode_hv (dec_t *dec) 753decode_hv (dec_t *dec)
702{ 754{
703 HV *hv = newHV (); 755 HV *hv = newHV ();
704 756
757 WS;
758 if (*dec->cur == '}')
759 ++dec->cur;
760 else
705 for (;;) 761 for (;;)
706 { 762 {
707 SV *key, *value; 763 SV *key, *value;
708 764
709 WS; EXPECT_CH ('"'); 765 WS; EXPECT_CH ('"');
710 766
711 key = decode_str (dec); 767 key = decode_str (dec);
712 if (!key) 768 if (!key)
713 goto fail;
714
715 WS; EXPECT_CH (':');
716
717 value = decode_sv (dec);
718 if (!value)
719 {
720 SvREFCNT_dec (key);
721 goto fail; 769 goto fail;
770
771 WS; EXPECT_CH (':');
772
773 value = decode_sv (dec);
774 if (!value)
775 {
776 SvREFCNT_dec (key);
777 goto fail;
722 } 778 }
723 779
724 //TODO: optimise 780 //TODO: optimise
725 hv_store_ent (hv, key, value, 0); 781 hv_store_ent (hv, key, value, 0);
726 782
727 WS; 783 WS;
728 784
729 if (*dec->cur == '}') 785 if (*dec->cur == '}')
730 { 786 {
731 ++dec->cur; 787 ++dec->cur;
732 break; 788 break;
733 } 789 }
734 790
735 if (*dec->cur != ',') 791 if (*dec->cur != ',')
736 ERR (", or } expected while parsing object/hash"); 792 ERR (", or } expected while parsing object/hash");
737 793
738 ++dec->cur; 794 ++dec->cur;
739 } 795 }
740 796
741 return newRV_noinc ((SV *)hv); 797 return newRV_noinc ((SV *)hv);
742 798
743fail: 799fail:
744 SvREFCNT_dec (hv); 800 SvREFCNT_dec (hv);
784 840
785 case 'n': 841 case 'n':
786 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) 842 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
787 { 843 {
788 dec->cur += 4; 844 dec->cur += 4;
789 return newSViv (1); 845 return newSVsv (&PL_sv_undef);
790 } 846 }
791 else 847 else
792 ERR ("'null' expected"); 848 ERR ("'null' expected");
793 849
794 break; 850 break;
795 851
796 default: 852 default:
797 ERR ("malformed json string"); 853 ERR ("malformed json string, neither array, object, number, string or atom");
798 break; 854 break;
799 } 855 }
800 856
801fail: 857fail:
802 return 0; 858 return 0;
805static SV * 861static SV *
806decode_json (SV *string, UV flags) 862decode_json (SV *string, UV flags)
807{ 863{
808 SV *sv; 864 SV *sv;
809 865
810 if (!(flags & F_UTF8)) 866 if (flags & F_UTF8)
867 sv_utf8_downgrade (string, 0);
868 else
811 sv_utf8_upgrade (string); 869 sv_utf8_upgrade (string);
812 870
813 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP 871 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
814 872
815 dec_t dec; 873 dec_t dec;
816 dec.flags = flags; 874 dec.flags = flags;
817 dec.cur = SvPVX (string); 875 dec.cur = SvPVX (string);
818 dec.end = SvEND (string); 876 dec.end = SvEND (string);
819 dec.err = 0; 877 dec.err = 0;
820 878
821 *dec.end = 1; // invalid anywhere
822 sv = decode_sv (&dec); 879 sv = decode_sv (&dec);
823 *dec.end = 0;
824 880
825 if (!sv) 881 if (!sv)
826 { 882 {
883 IV offset = dec.flags & F_UTF8
884 ? dec.cur - SvPVX (string)
827 IV offset = utf8_distance (dec.cur, SvPVX (string)); 885 : utf8_distance (dec.cur, SvPVX (string));
828 SV *uni = sv_newmortal (); 886 SV *uni = sv_newmortal ();
829 887
888 // horrible hack to silence warning inside pv_uni_display
889 COP cop = *PL_curcop;
890 cop.cop_warnings = pWARN_NONE;
891 ENTER;
892 SAVEVPTR (PL_curcop);
893 PL_curcop = &cop;
830 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 894 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
895 LEAVE;
896
831 croak ("%s, at character %d (%s)", 897 croak ("%s, at character offset %d (%s)",
832 dec.err, 898 dec.err,
833 (int)offset, 899 (int)offset,
834 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 900 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
835 } 901 }
836 902
867 CODE: 933 CODE:
868 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash); 934 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
869 OUTPUT: 935 OUTPUT:
870 RETVAL 936 RETVAL
871 937
872SV *ascii (SV *self, int enable) 938SV *ascii (SV *self, int enable = 1)
873 ALIAS: 939 ALIAS:
874 ascii = F_ASCII 940 ascii = F_ASCII
875 utf8 = F_UTF8 941 utf8 = F_UTF8
876 indent = F_INDENT 942 indent = F_INDENT
877 canonical = F_CANONICAL 943 canonical = F_CANONICAL
878 space_before = F_SPACE_BEFORE 944 space_before = F_SPACE_BEFORE
879 space_after = F_SPACE_AFTER 945 space_after = F_SPACE_AFTER
880 json_rpc = F_JSON_RPC 946 json_rpc = F_JSON_RPC
881 pretty = F_PRETTY 947 pretty = F_PRETTY
882 allow_nonref = F_ALLOW_NONREF 948 allow_nonref = F_ALLOW_NONREF
949 shrink = F_SHRINK
883 CODE: 950 CODE:
884{ 951{
885 UV *uv = SvJSON (self); 952 UV *uv = SvJSON (self);
886 if (enable) 953 if (enable)
887 *uv |= ix; 954 *uv |= ix;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines