[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.37 by root, Wed Jun 6 17:49:01 2007 UTC vs.
Revision 1.40 by root, Tue Jun 12 01:27:02 2007 UTC

…		…
32	#define F_DEFAULT (9UL << S_MAXDEPTH)	32	#define F_DEFAULT (9UL << S_MAXDEPTH)
33		33
34	#define INIT_SIZE 32 // initial scalar size to be allocated	34	#define INIT_SIZE 32 // initial scalar size to be allocated
35	#define INDENT_STEP 3 // spaces per indentation level	35	#define INDENT_STEP 3 // spaces per indentation level
36		36
37	#define SHORT_STRING_LEN 512 // special-case strings of up to this size	37	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
38		38
39	#define SB do {	39	#define SB do {
40	#define SE } while (0)	40	#define SE } while (0)
41		41
42	#if __GNUC__ >= 3	42	#if __GNUC__ >= 3
…		…
178	STRLEN clen;	178	STRLEN clen;
179	UV uch;	179	UV uch;
180		180
181	if (is_utf8)	181	if (is_utf8)
182	{	182	{
183	//uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
184	uch = decode_utf8 (str, end - str, &clen);	183	uch = decode_utf8 (str, end - str, &clen);
185	if (clen == (STRLEN)-1)	184	if (clen == (STRLEN)-1)
186	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);	185	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
187	}	186	}
188	else	187	else
…		…
497	encode_str (enc, str, len, SvUTF8 (sv));	496	encode_str (enc, str, len, SvUTF8 (sv));
498	encode_ch (enc, '"');	497	encode_ch (enc, '"');
499	}	498	}
500	else if (SvNOKp (sv))	499	else if (SvNOKp (sv))
501	{	500	{
		501	// trust that perl will do the right thing w.r.t. JSON syntax.
502	need (enc, NV_DIG + 32);	502	need (enc, NV_DIG + 32);
503	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	503	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
504	enc->cur += strlen (enc->cur);	504	enc->cur += strlen (enc->cur);
505	}	505	}
506	else if (SvIOKp (sv))	506	else if (SvIOKp (sv))
507	{	507	{
508	// we assume we can always read an IV as a UV	508	// we assume we can always read an IV as a UV
509	if (SvUV (sv) & ~(UV)0x7fff)	509	if (SvUV (sv) & ~(UV)0x7fff)
510	{	510	{
		511	// large integer, use the (rather slow) snprintf way.
511	need (enc, sizeof (UV) * 3);	512	need (enc, sizeof (UV) * 3);
512	enc->cur +=	513	enc->cur +=
513	SvIsUV(sv)	514	SvIsUV(sv)
514	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))	515	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
515	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));	516	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
…		…
518	{	519	{
519	// optimise the "small number case"	520	// optimise the "small number case"
520	// code will likely be branchless and use only a single multiplication	521	// code will likely be branchless and use only a single multiplication
521	I32 i = SvIV (sv);	522	I32 i = SvIV (sv);
522	U32 u;	523	U32 u;
		524	char digit, nz = 0;
523		525
524	need (enc, 6);	526	need (enc, 6);
525		527
526	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;	528	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
527	u = i < 0 ? -i : i;	529	u = i < 0 ? -i : i;
528		530
529	// convert to 4.28 fixed-point representation	531	// convert to 4.28 fixed-point representation
530	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits	532	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
531		533
532	char digit, nz = 0;	534	// now output digit by digit, each time masking out the integer part
533		535	// and multiplying by 5 while moving the decimal point one to the right,
		536	// resulting in a net multiplication by 10.
		537	// we always write the digit to memory but conditionally increment
		538	// the pointer, to ease the usage of conditional move instructions.
534	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;	539	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;
535	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;	540	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;
536	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;	541	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;
537	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;	542	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;
538	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1;	543	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
539	}	544	}
540	}	545	}
541	else if (SvROK (sv))	546	else if (SvROK (sv))
542	encode_rv (enc, SvRV (sv));	547	encode_rv (enc, SvRV (sv));
543	else if (!SvOK (sv))	548	else if (!SvOK (sv))
…		…
625	decode_4hex (dec_t *dec)	630	decode_4hex (dec_t *dec)
626	{	631	{
627	signed char d1, d2, d3, d4;	632	signed char d1, d2, d3, d4;
628	unsigned char cur = (unsigned char )dec->cur;	633	unsigned char cur = (unsigned char )dec->cur;
629		634
630	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("four hexadecimal digits expected");	635	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected");
631	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("four hexadecimal digits expected");	636	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected");
632	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("four hexadecimal digits expected");	637	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected");
633	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("four hexadecimal digits expected");	638	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected");
634		639
635	dec->cur += 4;	640	dec->cur += 4;
636		641
637	return ((UV)d1) << 12	642	return ((UV)d1) << 12
638	\| ((UV)d2) << 8	643	\| ((UV)d2) << 8
…		…
646	static SV *	651	static SV *
647	decode_str (dec_t *dec)	652	decode_str (dec_t *dec)
648	{	653	{
649	SV *sv = 0;	654	SV *sv = 0;
650	int utf8 = 0;	655	int utf8 = 0;
		656	char *dec_cur = dec->cur;
651		657
652	do	658	do
653	{	659	{
654	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];	660	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
655	char *cur = buf;	661	char *cur = buf;
656		662
657	do	663	do
658	{	664	{
659	unsigned char ch = (unsigned char )dec->cur++;	665	unsigned char ch = (unsigned char )dec_cur++;
660		666
661	if (expect_false (ch == '"'))	667	if (expect_false (ch == '"'))
662	{	668	{
663	--dec->cur;	669	--dec_cur;
664	break;	670	break;
665	}	671	}
666	else if (expect_false (ch == '\\'))	672	else if (expect_false (ch == '\\'))
667	{	673	{
668	switch (*dec->cur)	674	switch (*dec_cur)
669	{	675	{
670	case '\\':	676	case '\\':
671	case '/':	677	case '/':
672	case '"': cur++ = dec->cur++; break;	678	case '"': cur++ = dec_cur++; break;
673		679
674	case 'b': ++dec->cur; *cur++ = '\010'; break;	680	case 'b': ++dec_cur; *cur++ = '\010'; break;
675	case 't': ++dec->cur; *cur++ = '\011'; break;	681	case 't': ++dec_cur; *cur++ = '\011'; break;
676	case 'n': ++dec->cur; *cur++ = '\012'; break;	682	case 'n': ++dec_cur; *cur++ = '\012'; break;
677	case 'f': ++dec->cur; *cur++ = '\014'; break;	683	case 'f': ++dec_cur; *cur++ = '\014'; break;
678	case 'r': ++dec->cur; *cur++ = '\015'; break;	684	case 'r': ++dec_cur; *cur++ = '\015'; break;
679		685
680	case 'u':	686	case 'u':
681	{	687	{
682	UV lo, hi;	688	UV lo, hi;
683	++dec->cur;	689	++dec_cur;
684		690
		691	dec->cur = dec_cur;
685	hi = decode_4hex (dec);	692	hi = decode_4hex (dec);
		693	dec_cur = dec->cur;
686	if (hi == (UV)-1)	694	if (hi == (UV)-1)
687	goto fail;	695	goto fail;
688		696
689	// possibly a surrogate pair	697	// possibly a surrogate pair
690	if (hi >= 0xd800)	698	if (hi >= 0xd800)
691	if (hi < 0xdc00)	699	if (hi < 0xdc00)
692	{	700	{
693	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	701	if (dec_cur [0] != '\\' \|\| dec_cur [1] != 'u')
694	ERR ("missing low surrogate character in surrogate pair");	702	ERR ("missing low surrogate character in surrogate pair");
695		703
696	dec->cur += 2;	704	dec_cur += 2;
697		705
		706	dec->cur = dec_cur;
698	lo = decode_4hex (dec);	707	lo = decode_4hex (dec);
		708	dec_cur = dec->cur;
699	if (lo == (UV)-1)	709	if (lo == (UV)-1)
700	goto fail;	710	goto fail;
701		711
702	if (lo < 0xdc00 \|\| lo >= 0xe000)	712	if (lo < 0xdc00 \|\| lo >= 0xe000)
703	ERR ("surrogate pair expected");	713	ERR ("surrogate pair expected");
…		…
717	*cur++ = hi;	727	*cur++ = hi;
718	}	728	}
719	break;	729	break;
720		730
721	default:	731	default:
722	--dec->cur;	732	--dec_cur;
723	ERR ("illegal backslash escape sequence in string");	733	ERR ("illegal backslash escape sequence in string");
724	}	734	}
725	}	735	}
726	else if (expect_true (ch >= 0x20 && ch <= 0x7f))	736	else if (expect_true (ch >= 0x20 && ch <= 0x7f))
727	*cur++ = ch;	737	*cur++ = ch;
728	else if (ch >= 0x80)	738	else if (ch >= 0x80)
729	{	739	{
730	STRLEN clen;	740	STRLEN clen;
731	UV uch;	741	UV uch;
732		742
733	--dec->cur;	743	--dec_cur;
734		744
735	uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);	745	uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
736	if (clen == (STRLEN)-1)	746	if (clen == (STRLEN)-1)
737	ERR ("malformed UTF-8 character in JSON string");	747	ERR ("malformed UTF-8 character in JSON string");
738		748
739	do	749	do
740	cur++ = dec->cur++;	750	cur++ = dec_cur++;
741	while (--clen);	751	while (--clen);
742		752
743	utf8 = 1;	753	utf8 = 1;
744	}	754	}
745	else	755	else
746	{	756	{
747	--dec->cur;	757	--dec_cur;
748		758
749	if (!ch)	759	if (!ch)
750	ERR ("unexpected end of string while parsing JSON string");	760	ERR ("unexpected end of string while parsing JSON string");
751	else	761	else
752	ERR ("invalid character encountered while parsing JSON string");	762	ERR ("invalid character encountered while parsing JSON string");
…		…
765	}	775	}
766	else	776	else
767	sv = newSVpvn (buf, len);	777	sv = newSVpvn (buf, len);
768	}	778	}
769	}	779	}
770	while (*dec->cur != '"');	780	while (*dec_cur != '"');
771		781
772	++dec->cur;	782	++dec_cur;
773		783
774	if (sv)	784	if (sv)
775	{	785	{
776	SvPOK_only (sv);	786	SvPOK_only (sv);
777	*SvEND (sv) = 0;	787	*SvEND (sv) = 0;
…		…
780	SvUTF8_on (sv);	790	SvUTF8_on (sv);
781	}	791	}
782	else	792	else
783	sv = newSVpvn ("", 0);	793	sv = newSVpvn ("", 0);
784		794
		795	dec->cur = dec_cur;
785	return sv;	796	return sv;
786		797
787	fail:	798	fail:
		799	dec->cur = dec_cur;
788	return 0;	800	return 0;
789	}	801	}
790		802
791	static SV *	803	static SV *
792	decode_num (dec_t *dec)	804	decode_num (dec_t *dec)
…		…
854	{	866	{
855	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so	867	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so
856	if (*start == '-')	868	if (*start == '-')
857	switch (dec->cur - start)	869	switch (dec->cur - start)
858	{	870	{
859	case 2: return newSViv (-( start [1] - '0' ));	871	case 2: return newSViv (-( start [1] - '0' * 1));
860	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));	872	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
861	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));	873	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
862	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));	874	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
863	}	875	}
864	else	876	else
865	switch (dec->cur - start)	877	switch (dec->cur - start)
866	{	878	{
867	case 1: return newSViv ( start [0] - '0' );	879	case 1: return newSViv ( start [0] - '0' * 1);
868	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);	880	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
869	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);	881	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
870	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);	882	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
871	}	883	}
872		884
…		…
879	if (uv < (UV)IV_MIN)	891	if (uv < (UV)IV_MIN)
880	return newSViv (-(IV)uv);	892	return newSViv (-(IV)uv);
881	}	893	}
882	else	894	else
883	return newSVuv (uv);	895	return newSVuv (uv);
		896
		897	// here would likely be the place for bigint support
884	}	898	}
885	}	899	}
886		900
		901	// if we ever support bigint or bigfloat, this is the place for bigfloat
887	return newSVnv (Atof (start));	902	return newSVnv (Atof (start));
888		903
889	fail:	904	fail:
890	return 0;	905	return 0;
891	}	906	}
…		…
992		1007
993	static SV *	1008	static SV *
994	decode_sv (dec_t *dec)	1009	decode_sv (dec_t *dec)
995	{	1010	{
996	decode_ws (dec);	1011	decode_ws (dec);
		1012
		1013	// the beauty of JSON: you need exactly one character lookahead
		1014	// to parse anything.
997	switch (*dec->cur)	1015	switch (*dec->cur)
998	{	1016	{
999	case '"': ++dec->cur; return decode_str (dec);	1017	case '"': ++dec->cur; return decode_str (dec);
1000	case '[': ++dec->cur; return decode_av (dec);	1018	case '[': ++dec->cur; return decode_av (dec);
1001	case '{': ++dec->cur; return decode_hv (dec);	1019	case '{': ++dec->cur; return decode_hv (dec);
…		…
1130	MODULE = JSON::XS PACKAGE = JSON::XS	1148	MODULE = JSON::XS PACKAGE = JSON::XS
1131		1149
1132	BOOT:	1150	BOOT:
1133	{	1151	{
1134	int i;	1152	int i;
1135
1136	memset (decode_hexdigit, 0xff, 256);
1137		1153
1138	for (i = 0; i < 256; ++i)	1154	for (i = 0; i < 256; ++i)
1139	decode_hexdigit [i] =	1155	decode_hexdigit [i] =
1140	i >= '0' && i <= '9' ? i - '0'	1156	i >= '0' && i <= '9' ? i - '0'
1141	: i >= 'a' && i <= 'f' ? i - 'a' + 10	1157	: i >= 'a' && i <= 'f' ? i - 'a' + 10

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.37 by root, Wed Jun 6 17:49:01 2007 UTC vs. Revision 1.40 by root, Tue Jun 12 01:27:02 2007 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.37 by root, Wed Jun 6 17:49:01 2007 UTC vs.
Revision 1.40 by root, Tue Jun 12 01:27:02 2007 UTC