ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.4 by root, Thu Mar 22 21:13:58 2007 UTC vs.
Revision 1.5 by root, Thu Mar 22 23:24:18 2007 UTC

126 UV uch; 126 UV uch;
127 127
128 if (is_utf8) 128 if (is_utf8)
129 { 129 {
130 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); 130 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
131 if (clen < 0) 131 if (clen == (STRLEN)-1)
132 croak ("malformed UTF-8 character in string, cannot convert to JSON"); 132 croak ("malformed UTF-8 character in string, cannot convert to JSON");
133 } 133 }
134 else 134 else
135 { 135 {
136 uch = ch; 136 uch = ch;
162 str += clen; 162 str += clen;
163 } 163 }
164 else if (is_utf8) 164 else if (is_utf8)
165 { 165 {
166 need (enc, len += clen); 166 need (enc, len += clen);
167 while (clen--) 167 do
168 {
168 *enc->cur++ = *str++; 169 *enc->cur++ = *str++;
170 }
171 while (--clen);
169 } 172 }
170 else 173 else
171 { 174 {
172 need (enc, 10); // never more than 11 bytes needed 175 need (enc, 10); // never more than 11 bytes needed
173 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); 176 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
529 532
530 // possibly a surrogate pair 533 // possibly a surrogate pair
531 if (hi >= 0xd800 && hi < 0xdc00) 534 if (hi >= 0xd800 && hi < 0xdc00)
532 { 535 {
533 if (dec->cur [0] != '\\' || dec->cur [1] != 'u') 536 if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
534 ERR ("illegal surrogate character"); 537 ERR ("missing low surrogate character in surrogate pair");
535 538
536 dec->cur += 2; 539 dec->cur += 2;
537 540
538 lo = decode_4hex (dec); 541 lo = decode_4hex (dec);
539 if (lo == (UV)-1) 542 if (lo == (UV)-1)
542 if (lo < 0xdc00 || lo >= 0xe000) 545 if (lo < 0xdc00 || lo >= 0xe000)
543 ERR ("surrogate pair expected"); 546 ERR ("surrogate pair expected");
544 547
545 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; 548 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
546 } 549 }
547 else if (lo >= 0xdc00 && lo < 0xe000) 550 else if (hi >= 0xdc00 && hi < 0xe000)
548 ERR ("illegal surrogate character"); 551 ERR ("missing high surrogate character in surrogate pair");
549 552
550 if (hi >= 0x80) 553 if (hi >= 0x80)
551 { 554 {
552 utf8 = 1; 555 utf8 = 1;
553 556
556 } 559 }
557 else 560 else
558 APPEND_CH (hi); 561 APPEND_CH (hi);
559 } 562 }
560 break; 563 break;
564
565 default:
566 --dec->cur;
567 ERR ("illegal backslash escape sequence in string");
561 } 568 }
562 } 569 }
563 else if (ch >= 0x20 && ch <= 0x7f) 570 else if (ch >= 0x20 && ch <= 0x7f)
564 APPEND_CH (*dec->cur++); 571 APPEND_CH (*dec->cur++);
565 else if (ch >= 0x80) 572 else if (ch >= 0x80)
566 { 573 {
567 STRLEN clen; 574 STRLEN clen;
568 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); 575 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
569 if (clen < 0) 576 if (clen == (STRLEN)-1)
570 ERR ("malformed UTF-8 character in string, cannot convert to JSON"); 577 ERR ("malformed UTF-8 character in string, cannot convert to JSON");
571 578
572 APPEND_GROW (clen); 579 APPEND_GROW (clen);
573 memcpy (cur, dec->cur, clen); 580 do
574 cur += clen; 581 {
575 dec->cur += clen; 582 *cur++ = *dec->cur++;
576 } 583 }
584 while (--clen);
585
586 utf8 = 1;
587 }
588 else if (dec->cur == dec->end)
589 ERR ("unexpected end of string while parsing json string");
577 else 590 else
578 ERR ("invalid character encountered"); 591 ERR ("invalid character encountered");
579 } 592 }
580 593
581 ++dec->cur; 594 ++dec->cur;
609 { 622 {
610 ++dec->cur; 623 ++dec->cur;
611 if (*dec->cur >= '0' && *dec->cur <= '9') 624 if (*dec->cur >= '0' && *dec->cur <= '9')
612 ERR ("malformed number (leading zero must not be followed by another digit)"); 625 ERR ("malformed number (leading zero must not be followed by another digit)");
613 } 626 }
614 627 else if (*dec->cur < '0' || *dec->cur > '9')
615 // int 628 ERR ("malformed number (no digits after initial minus)");
629 else
630 do
631 {
632 ++dec->cur;
633 }
616 while (*dec->cur >= '0' && *dec->cur <= '9') 634 while (*dec->cur >= '0' && *dec->cur <= '9');
617 ++dec->cur;
618 635
619 // [frac] 636 // [frac]
620 if (*dec->cur == '.') 637 if (*dec->cur == '.')
621 { 638 {
639 ++dec->cur;
640
641 if (*dec->cur < '0' || *dec->cur > '9')
642 ERR ("malformed number (no digits after decimal point)");
643
644 do
645 {
646 ++dec->cur;
647 }
648 while (*dec->cur >= '0' && *dec->cur <= '9');
649
622 is_nv = 1; 650 is_nv = 1;
623
624 do
625 {
626 ++dec->cur;
627 }
628 while (*dec->cur >= '0' && *dec->cur <= '9');
629 } 651 }
630 652
631 // [exp] 653 // [exp]
632 if (*dec->cur == 'e' || *dec->cur == 'E') 654 if (*dec->cur == 'e' || *dec->cur == 'E')
633 { 655 {
634 is_nv = 1;
635
636 ++dec->cur; 656 ++dec->cur;
657
637 if (*dec->cur == '-' || *dec->cur == '+') 658 if (*dec->cur == '-' || *dec->cur == '+')
638 ++dec->cur; 659 ++dec->cur;
639 660
661 if (*dec->cur < '0' || *dec->cur > '9')
662 ERR ("malformed number (no digits after exp sign)");
663
664 do
665 {
666 ++dec->cur;
667 }
640 while (*dec->cur >= '0' && *dec->cur <= '9') 668 while (*dec->cur >= '0' && *dec->cur <= '9');
641 ++dec->cur; 669
670 is_nv = 1;
642 } 671 }
643 672
644 if (!is_nv) 673 if (!is_nv)
645 { 674 {
646 UV uv; 675 UV uv;
664static SV * 693static SV *
665decode_av (dec_t *dec) 694decode_av (dec_t *dec)
666{ 695{
667 AV *av = newAV (); 696 AV *av = newAV ();
668 697
698 WS;
699 if (*dec->cur == ']')
700 ++dec->cur;
701 else
669 for (;;) 702 for (;;)
670 { 703 {
671 SV *value; 704 SV *value;
672 705
673 value = decode_sv (dec); 706 value = decode_sv (dec);
674 if (!value) 707 if (!value)
675 goto fail; 708 goto fail;
676 709
677 av_push (av, value); 710 av_push (av, value);
678 711
679 WS; 712 WS;
680 713
681 if (*dec->cur == ']') 714 if (*dec->cur == ']')
682 { 715 {
683 ++dec->cur; 716 ++dec->cur;
684 break; 717 break;
718 }
685 } 719
686
687 if (*dec->cur != ',') 720 if (*dec->cur != ',')
688 ERR (", or ] expected while parsing array"); 721 ERR (", or ] expected while parsing array");
689 722
690 ++dec->cur; 723 ++dec->cur;
691 } 724 }
692 725
693 return newRV_noinc ((SV *)av); 726 return newRV_noinc ((SV *)av);
694 727
695fail: 728fail:
696 SvREFCNT_dec (av); 729 SvREFCNT_dec (av);
700static SV * 733static SV *
701decode_hv (dec_t *dec) 734decode_hv (dec_t *dec)
702{ 735{
703 HV *hv = newHV (); 736 HV *hv = newHV ();
704 737
738 WS;
739 if (*dec->cur == '}')
740 ++dec->cur;
741 else
705 for (;;) 742 for (;;)
706 { 743 {
707 SV *key, *value; 744 SV *key, *value;
708 745
709 WS; EXPECT_CH ('"'); 746 WS; EXPECT_CH ('"');
710 747
711 key = decode_str (dec); 748 key = decode_str (dec);
712 if (!key) 749 if (!key)
713 goto fail;
714
715 WS; EXPECT_CH (':');
716
717 value = decode_sv (dec);
718 if (!value)
719 {
720 SvREFCNT_dec (key);
721 goto fail; 750 goto fail;
751
752 WS; EXPECT_CH (':');
753
754 value = decode_sv (dec);
755 if (!value)
756 {
757 SvREFCNT_dec (key);
758 goto fail;
722 } 759 }
723 760
724 //TODO: optimise 761 //TODO: optimise
725 hv_store_ent (hv, key, value, 0); 762 hv_store_ent (hv, key, value, 0);
726 763
727 WS; 764 WS;
728 765
729 if (*dec->cur == '}') 766 if (*dec->cur == '}')
730 { 767 {
731 ++dec->cur; 768 ++dec->cur;
732 break; 769 break;
733 } 770 }
734 771
735 if (*dec->cur != ',') 772 if (*dec->cur != ',')
736 ERR (", or } expected while parsing object/hash"); 773 ERR (", or } expected while parsing object/hash");
737 774
738 ++dec->cur; 775 ++dec->cur;
739 } 776 }
740 777
741 return newRV_noinc ((SV *)hv); 778 return newRV_noinc ((SV *)hv);
742 779
743fail: 780fail:
744 SvREFCNT_dec (hv); 781 SvREFCNT_dec (hv);
784 821
785 case 'n': 822 case 'n':
786 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) 823 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
787 { 824 {
788 dec->cur += 4; 825 dec->cur += 4;
789 return newSViv (1); 826 return newSVsv (&PL_sv_undef);
790 } 827 }
791 else 828 else
792 ERR ("'null' expected"); 829 ERR ("'null' expected");
793 830
794 break; 831 break;
805static SV * 842static SV *
806decode_json (SV *string, UV flags) 843decode_json (SV *string, UV flags)
807{ 844{
808 SV *sv; 845 SV *sv;
809 846
810 if (!(flags & F_UTF8)) 847 if (flags & F_UTF8)
848 sv_utf8_downgrade (string, 0);
849 else
811 sv_utf8_upgrade (string); 850 sv_utf8_upgrade (string);
812 851
813 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP 852 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
814 853
815 dec_t dec; 854 dec_t dec;
816 dec.flags = flags; 855 dec.flags = flags;
817 dec.cur = SvPVX (string); 856 dec.cur = SvPVX (string);
818 dec.end = SvEND (string); 857 dec.end = SvEND (string);
819 dec.err = 0; 858 dec.err = 0;
820 859
821 *dec.end = 1; // invalid anywhere
822 sv = decode_sv (&dec); 860 sv = decode_sv (&dec);
823 *dec.end = 0;
824 861
825 if (!sv) 862 if (!sv)
826 { 863 {
827 IV offset = utf8_distance (dec.cur, SvPVX (string)); 864 IV offset = utf8_distance (dec.cur, SvPVX (string));
828 SV *uni = sv_newmortal (); 865 SV *uni = sv_newmortal ();
866 // horrible hack to silence warning inside pv_uni_display
867 COP cop;
868 memset (&cop, 0, sizeof (cop));
869 cop.cop_warnings = pWARN_NONE;
870 SAVEVPTR (PL_curcop);
871 PL_curcop = &cop;
829 872
830 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 873 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
831 croak ("%s, at character %d (%s)", 874 croak ("%s, at character offset %d (%s)",
832 dec.err, 875 dec.err,
833 (int)offset, 876 (int)offset,
834 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 877 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
835 } 878 }
836 879

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines