ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.35 by root, Wed Jun 6 14:52:49 2007 UTC vs.
Revision 1.39 by root, Mon Jun 11 03:42:57 2007 UTC

32#define F_DEFAULT (9UL << S_MAXDEPTH) 32#define F_DEFAULT (9UL << S_MAXDEPTH)
33 33
34#define INIT_SIZE 32 // initial scalar size to be allocated 34#define INIT_SIZE 32 // initial scalar size to be allocated
35#define INDENT_STEP 3 // spaces per indentation level 35#define INDENT_STEP 3 // spaces per indentation level
36 36
37#define SHORT_STRING_LEN 512 // special-case strings of up to this size 37#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
38 38
39#define SB do { 39#define SB do {
40#define SE } while (0) 40#define SE } while (0)
41 41
42#if __GNUC__ >= 3 42#if __GNUC__ >= 3
497 encode_str (enc, str, len, SvUTF8 (sv)); 497 encode_str (enc, str, len, SvUTF8 (sv));
498 encode_ch (enc, '"'); 498 encode_ch (enc, '"');
499 } 499 }
500 else if (SvNOKp (sv)) 500 else if (SvNOKp (sv))
501 { 501 {
502 // trust that perl will do the right thing w.r.t. JSON syntax.
502 need (enc, NV_DIG + 32); 503 need (enc, NV_DIG + 32);
503 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); 504 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
504 enc->cur += strlen (enc->cur); 505 enc->cur += strlen (enc->cur);
505 } 506 }
506 else if (SvIOKp (sv)) 507 else if (SvIOKp (sv))
507 { 508 {
508 // we assume we can always read an IV as a UV 509 // we assume we can always read an IV as a UV
509 if (SvUV (sv) & ~(UV)0x7fff) 510 if (SvUV (sv) & ~(UV)0x7fff)
510 { 511 {
512 // large integer, use the (rather slow) snprintf way.
511 need (enc, 32); 513 need (enc, sizeof (UV) * 3);
512 enc->cur += 514 enc->cur +=
513 SvIsUV(sv) 515 SvIsUV(sv)
514 ? snprintf (enc->cur, 32, "%"UVuf, (UV)SvUVX (sv)) 516 ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
515 : snprintf (enc->cur, 32, "%"IVdf, (IV)SvIVX (sv)); 517 : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
516 } 518 }
517 else 519 else
518 { 520 {
519 // optimise the "small number case" 521 // optimise the "small number case"
520 // code will likely be branchless and use only a single multiplication 522 // code will likely be branchless and use only a single multiplication
521 I32 i = SvIV (sv); 523 I32 i = SvIV (sv);
522 U32 u; 524 U32 u;
525 char digit, nz = 0;
523 526
524 need (enc, 6); 527 need (enc, 6);
525 528
526 *enc->cur = '-'; enc->cur += i < 0 ? 1 : 0; 529 *enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
527 u = i < 0 ? -i : i; 530 u = i < 0 ? -i : i;
528 531
529 // convert to 4.28 fixed-point representation 532 // convert to 4.28 fixed-point representation
530 u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits 533 u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
531 534
532 char digit, nz = 0; 535 // now output digit by digit, each time masking out the integer part
533 536 // and multiplying by 5 while moving the decimal point one to the right,
537 // resulting in a net multiplication by 10.
538 // we always write the digit to memory but conditionally increment
539 // the pointer, to ease the usage of conditional move instructions.
534 digit = u >> 28; *enc->cur = digit + '0'; nz |= digit; enc->cur += nz ? 1 : 0; u = (u & 0xfffffff) * 5; 540 digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5;
535 digit = u >> 27; *enc->cur = digit + '0'; nz |= digit; enc->cur += nz ? 1 : 0; u = (u & 0x7ffffff) * 5; 541 digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5;
536 digit = u >> 26; *enc->cur = digit + '0'; nz |= digit; enc->cur += nz ? 1 : 0; u = (u & 0x3ffffff) * 5; 542 digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5;
537 digit = u >> 25; *enc->cur = digit + '0'; nz |= digit; enc->cur += nz ? 1 : 0; u = (u & 0x1ffffff) * 5; 543 digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5;
538 digit = u >> 24; *enc->cur = digit + '0'; nz |= digit; enc->cur += 1; 544 digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
539 } 545 }
540 } 546 }
541 else if (SvROK (sv)) 547 else if (SvROK (sv))
542 encode_rv (enc, SvRV (sv)); 548 encode_rv (enc, SvRV (sv));
543 else if (!SvOK (sv)) 549 else if (!SvOK (sv))
646static SV * 652static SV *
647decode_str (dec_t *dec) 653decode_str (dec_t *dec)
648{ 654{
649 SV *sv = 0; 655 SV *sv = 0;
650 int utf8 = 0; 656 int utf8 = 0;
657 char *dec_cur = dec->cur;
651 658
652 do 659 do
653 { 660 {
654 char buf [SHORT_STRING_LEN + UTF8_MAXBYTES]; 661 char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
655 char *cur = buf; 662 char *cur = buf;
656 663
657 do 664 do
658 { 665 {
659 unsigned char ch = *(unsigned char *)dec->cur++; 666 unsigned char ch = *(unsigned char *)dec_cur++;
660 667
661 if (expect_false (ch == '"')) 668 if (expect_false (ch == '"'))
662 { 669 {
663 --dec->cur; 670 --dec_cur;
664 break; 671 break;
665 } 672 }
666 else if (expect_false (ch == '\\')) 673 else if (expect_false (ch == '\\'))
667 { 674 {
668 switch (*dec->cur) 675 switch (*dec_cur)
669 { 676 {
670 case '\\': 677 case '\\':
671 case '/': 678 case '/':
672 case '"': *cur++ = *dec->cur++; break; 679 case '"': *cur++ = *dec_cur++; break;
673 680
674 case 'b': ++dec->cur; *cur++ = '\010'; break; 681 case 'b': ++dec_cur; *cur++ = '\010'; break;
675 case 't': ++dec->cur; *cur++ = '\011'; break; 682 case 't': ++dec_cur; *cur++ = '\011'; break;
676 case 'n': ++dec->cur; *cur++ = '\012'; break; 683 case 'n': ++dec_cur; *cur++ = '\012'; break;
677 case 'f': ++dec->cur; *cur++ = '\014'; break; 684 case 'f': ++dec_cur; *cur++ = '\014'; break;
678 case 'r': ++dec->cur; *cur++ = '\015'; break; 685 case 'r': ++dec_cur; *cur++ = '\015'; break;
679 686
680 case 'u': 687 case 'u':
681 { 688 {
682 UV lo, hi; 689 UV lo, hi;
683 ++dec->cur; 690 ++dec_cur;
684 691
692 dec->cur = dec_cur;
685 hi = decode_4hex (dec); 693 hi = decode_4hex (dec);
694 dec_cur = dec->cur;
686 if (hi == (UV)-1) 695 if (hi == (UV)-1)
687 goto fail; 696 goto fail;
688 697
689 // possibly a surrogate pair 698 // possibly a surrogate pair
690 if (hi >= 0xd800) 699 if (hi >= 0xd800)
691 if (hi < 0xdc00) 700 if (hi < 0xdc00)
692 { 701 {
693 if (dec->cur [0] != '\\' || dec->cur [1] != 'u') 702 if (dec_cur [0] != '\\' || dec_cur [1] != 'u')
694 ERR ("missing low surrogate character in surrogate pair"); 703 ERR ("missing low surrogate character in surrogate pair");
695 704
696 dec->cur += 2; 705 dec_cur += 2;
697 706
707 dec->cur = dec_cur;
698 lo = decode_4hex (dec); 708 lo = decode_4hex (dec);
709 dec_cur = dec->cur;
699 if (lo == (UV)-1) 710 if (lo == (UV)-1)
700 goto fail; 711 goto fail;
701 712
702 if (lo < 0xdc00 || lo >= 0xe000) 713 if (lo < 0xdc00 || lo >= 0xe000)
703 ERR ("surrogate pair expected"); 714 ERR ("surrogate pair expected");
717 *cur++ = hi; 728 *cur++ = hi;
718 } 729 }
719 break; 730 break;
720 731
721 default: 732 default:
722 --dec->cur; 733 --dec_cur;
723 ERR ("illegal backslash escape sequence in string"); 734 ERR ("illegal backslash escape sequence in string");
724 } 735 }
725 } 736 }
726 else if (expect_true (ch >= 0x20 && ch <= 0x7f)) 737 else if (expect_true (ch >= 0x20 && ch <= 0x7f))
727 *cur++ = ch; 738 *cur++ = ch;
728 else if (ch >= 0x80) 739 else if (ch >= 0x80)
729 { 740 {
730 STRLEN clen; 741 STRLEN clen;
731 UV uch; 742 UV uch;
732 743
733 --dec->cur; 744 --dec_cur;
734 745
735 uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen); 746 uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
736 if (clen == (STRLEN)-1) 747 if (clen == (STRLEN)-1)
737 ERR ("malformed UTF-8 character in JSON string"); 748 ERR ("malformed UTF-8 character in JSON string");
738 749
739 do 750 do
740 *cur++ = *dec->cur++; 751 *cur++ = *dec_cur++;
741 while (--clen); 752 while (--clen);
742 753
743 utf8 = 1; 754 utf8 = 1;
744 } 755 }
745 else 756 else
746 { 757 {
747 --dec->cur; 758 --dec_cur;
748 759
749 if (!ch) 760 if (!ch)
750 ERR ("unexpected end of string while parsing JSON string"); 761 ERR ("unexpected end of string while parsing JSON string");
751 else 762 else
752 ERR ("invalid character encountered while parsing JSON string"); 763 ERR ("invalid character encountered while parsing JSON string");
765 } 776 }
766 else 777 else
767 sv = newSVpvn (buf, len); 778 sv = newSVpvn (buf, len);
768 } 779 }
769 } 780 }
770 while (*dec->cur != '"'); 781 while (*dec_cur != '"');
771 782
772 ++dec->cur; 783 ++dec_cur;
773 784
774 if (sv) 785 if (sv)
775 { 786 {
776 SvPOK_only (sv); 787 SvPOK_only (sv);
777 *SvEND (sv) = 0; 788 *SvEND (sv) = 0;
780 SvUTF8_on (sv); 791 SvUTF8_on (sv);
781 } 792 }
782 else 793 else
783 sv = newSVpvn ("", 0); 794 sv = newSVpvn ("", 0);
784 795
796 dec->cur = dec_cur;
785 return sv; 797 return sv;
786 798
787fail: 799fail:
800 dec->cur = dec_cur;
788 return 0; 801 return 0;
789} 802}
790 803
791static SV * 804static SV *
792decode_num (dec_t *dec) 805decode_num (dec_t *dec)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines