[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.37 by root, Wed Jun 6 17:49:01 2007 UTC vs.
Revision 1.41 by root, Sat Jun 23 22:53:16 2007 UTC

…		…
7	#include "stdlib.h"	7	#include "stdlib.h"
8	#include "stdio.h"	8	#include "stdio.h"
9		9
10	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)	10	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
11	# define snprintf _snprintf // C compilers have this in stdio.h	11	# define snprintf _snprintf // C compilers have this in stdio.h
		12	#endif
		13
		14	// some old perls do not have this, try to make it work, no
		15	// guarentees, though.
		16	#ifndef UTF8_MAXBYTES
		17	# define UTF8_MAXBYTES 13
12	#endif	18	#endif
13		19
14	#define F_ASCII 0x00000001UL	20	#define F_ASCII 0x00000001UL
15	#define F_LATIN1 0x00000002UL	21	#define F_LATIN1 0x00000002UL
16	#define F_UTF8 0x00000004UL	22	#define F_UTF8 0x00000004UL
…		…
32	#define F_DEFAULT (9UL << S_MAXDEPTH)	38	#define F_DEFAULT (9UL << S_MAXDEPTH)
33		39
34	#define INIT_SIZE 32 // initial scalar size to be allocated	40	#define INIT_SIZE 32 // initial scalar size to be allocated
35	#define INDENT_STEP 3 // spaces per indentation level	41	#define INDENT_STEP 3 // spaces per indentation level
36		42
37	#define SHORT_STRING_LEN 512 // special-case strings of up to this size	43	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
38		44
39	#define SB do {	45	#define SB do {
40	#define SE } while (0)	46	#define SE } while (0)
41		47
42	#if __GNUC__ >= 3	48	#if __GNUC__ >= 3
…		…
178	STRLEN clen;	184	STRLEN clen;
179	UV uch;	185	UV uch;
180		186
181	if (is_utf8)	187	if (is_utf8)
182	{	188	{
183	//uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
184	uch = decode_utf8 (str, end - str, &clen);	189	uch = decode_utf8 (str, end - str, &clen);
185	if (clen == (STRLEN)-1)	190	if (clen == (STRLEN)-1)
186	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);	191	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
187	}	192	}
188	else	193	else
…		…
497	encode_str (enc, str, len, SvUTF8 (sv));	502	encode_str (enc, str, len, SvUTF8 (sv));
498	encode_ch (enc, '"');	503	encode_ch (enc, '"');
499	}	504	}
500	else if (SvNOKp (sv))	505	else if (SvNOKp (sv))
501	{	506	{
		507	// trust that perl will do the right thing w.r.t. JSON syntax.
502	need (enc, NV_DIG + 32);	508	need (enc, NV_DIG + 32);
503	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	509	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
504	enc->cur += strlen (enc->cur);	510	enc->cur += strlen (enc->cur);
505	}	511	}
506	else if (SvIOKp (sv))	512	else if (SvIOKp (sv))
507	{	513	{
508	// we assume we can always read an IV as a UV	514	// we assume we can always read an IV as a UV
509	if (SvUV (sv) & ~(UV)0x7fff)	515	if (SvUV (sv) & ~(UV)0x7fff)
510	{	516	{
		517	// large integer, use the (rather slow) snprintf way.
511	need (enc, sizeof (UV) * 3);	518	need (enc, sizeof (UV) * 3);
512	enc->cur +=	519	enc->cur +=
513	SvIsUV(sv)	520	SvIsUV(sv)
514	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))	521	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
515	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));	522	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
…		…
518	{	525	{
519	// optimise the "small number case"	526	// optimise the "small number case"
520	// code will likely be branchless and use only a single multiplication	527	// code will likely be branchless and use only a single multiplication
521	I32 i = SvIV (sv);	528	I32 i = SvIV (sv);
522	U32 u;	529	U32 u;
		530	char digit, nz = 0;
523		531
524	need (enc, 6);	532	need (enc, 6);
525		533
526	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;	534	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
527	u = i < 0 ? -i : i;	535	u = i < 0 ? -i : i;
528		536
529	// convert to 4.28 fixed-point representation	537	// convert to 4.28 fixed-point representation
530	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits	538	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
531		539
532	char digit, nz = 0;	540	// now output digit by digit, each time masking out the integer part
533		541	// and multiplying by 5 while moving the decimal point one to the right,
		542	// resulting in a net multiplication by 10.
		543	// we always write the digit to memory but conditionally increment
		544	// the pointer, to ease the usage of conditional move instructions.
534	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;	545	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;
535	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;	546	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;
536	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;	547	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;
537	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;	548	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;
538	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1;	549	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
539	}	550	}
540	}	551	}
541	else if (SvROK (sv))	552	else if (SvROK (sv))
542	encode_rv (enc, SvRV (sv));	553	encode_rv (enc, SvRV (sv));
543	else if (!SvOK (sv))	554	else if (!SvOK (sv))
…		…
625	decode_4hex (dec_t *dec)	636	decode_4hex (dec_t *dec)
626	{	637	{
627	signed char d1, d2, d3, d4;	638	signed char d1, d2, d3, d4;
628	unsigned char cur = (unsigned char )dec->cur;	639	unsigned char cur = (unsigned char )dec->cur;
629		640
630	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("four hexadecimal digits expected");	641	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected");
631	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("four hexadecimal digits expected");	642	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected");
632	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("four hexadecimal digits expected");	643	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected");
633	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("four hexadecimal digits expected");	644	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected");
634		645
635	dec->cur += 4;	646	dec->cur += 4;
636		647
637	return ((UV)d1) << 12	648	return ((UV)d1) << 12
638	\| ((UV)d2) << 8	649	\| ((UV)d2) << 8
…		…
646	static SV *	657	static SV *
647	decode_str (dec_t *dec)	658	decode_str (dec_t *dec)
648	{	659	{
649	SV *sv = 0;	660	SV *sv = 0;
650	int utf8 = 0;	661	int utf8 = 0;
		662	char *dec_cur = dec->cur;
651		663
652	do	664	do
653	{	665	{
654	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];	666	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
655	char *cur = buf;	667	char *cur = buf;
656		668
657	do	669	do
658	{	670	{
659	unsigned char ch = (unsigned char )dec->cur++;	671	unsigned char ch = (unsigned char )dec_cur++;
660		672
661	if (expect_false (ch == '"'))	673	if (expect_false (ch == '"'))
662	{	674	{
663	--dec->cur;	675	--dec_cur;
664	break;	676	break;
665	}	677	}
666	else if (expect_false (ch == '\\'))	678	else if (expect_false (ch == '\\'))
667	{	679	{
668	switch (*dec->cur)	680	switch (*dec_cur)
669	{	681	{
670	case '\\':	682	case '\\':
671	case '/':	683	case '/':
672	case '"': cur++ = dec->cur++; break;	684	case '"': cur++ = dec_cur++; break;
673		685
674	case 'b': ++dec->cur; *cur++ = '\010'; break;	686	case 'b': ++dec_cur; *cur++ = '\010'; break;
675	case 't': ++dec->cur; *cur++ = '\011'; break;	687	case 't': ++dec_cur; *cur++ = '\011'; break;
676	case 'n': ++dec->cur; *cur++ = '\012'; break;	688	case 'n': ++dec_cur; *cur++ = '\012'; break;
677	case 'f': ++dec->cur; *cur++ = '\014'; break;	689	case 'f': ++dec_cur; *cur++ = '\014'; break;
678	case 'r': ++dec->cur; *cur++ = '\015'; break;	690	case 'r': ++dec_cur; *cur++ = '\015'; break;
679		691
680	case 'u':	692	case 'u':
681	{	693	{
682	UV lo, hi;	694	UV lo, hi;
683	++dec->cur;	695	++dec_cur;
684		696
		697	dec->cur = dec_cur;
685	hi = decode_4hex (dec);	698	hi = decode_4hex (dec);
		699	dec_cur = dec->cur;
686	if (hi == (UV)-1)	700	if (hi == (UV)-1)
687	goto fail;	701	goto fail;
688		702
689	// possibly a surrogate pair	703	// possibly a surrogate pair
690	if (hi >= 0xd800)	704	if (hi >= 0xd800)
691	if (hi < 0xdc00)	705	if (hi < 0xdc00)
692	{	706	{
693	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	707	if (dec_cur [0] != '\\' \|\| dec_cur [1] != 'u')
694	ERR ("missing low surrogate character in surrogate pair");	708	ERR ("missing low surrogate character in surrogate pair");
695		709
696	dec->cur += 2;	710	dec_cur += 2;
697		711
		712	dec->cur = dec_cur;
698	lo = decode_4hex (dec);	713	lo = decode_4hex (dec);
		714	dec_cur = dec->cur;
699	if (lo == (UV)-1)	715	if (lo == (UV)-1)
700	goto fail;	716	goto fail;
701		717
702	if (lo < 0xdc00 \|\| lo >= 0xe000)	718	if (lo < 0xdc00 \|\| lo >= 0xe000)
703	ERR ("surrogate pair expected");	719	ERR ("surrogate pair expected");
…		…
717	*cur++ = hi;	733	*cur++ = hi;
718	}	734	}
719	break;	735	break;
720		736
721	default:	737	default:
722	--dec->cur;	738	--dec_cur;
723	ERR ("illegal backslash escape sequence in string");	739	ERR ("illegal backslash escape sequence in string");
724	}	740	}
725	}	741	}
726	else if (expect_true (ch >= 0x20 && ch <= 0x7f))	742	else if (expect_true (ch >= 0x20 && ch <= 0x7f))
727	*cur++ = ch;	743	*cur++ = ch;
728	else if (ch >= 0x80)	744	else if (ch >= 0x80)
729	{	745	{
730	STRLEN clen;	746	STRLEN clen;
731	UV uch;	747	UV uch;
732		748
733	--dec->cur;	749	--dec_cur;
734		750
735	uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);	751	uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
736	if (clen == (STRLEN)-1)	752	if (clen == (STRLEN)-1)
737	ERR ("malformed UTF-8 character in JSON string");	753	ERR ("malformed UTF-8 character in JSON string");
738		754
739	do	755	do
740	cur++ = dec->cur++;	756	cur++ = dec_cur++;
741	while (--clen);	757	while (--clen);
742		758
743	utf8 = 1;	759	utf8 = 1;
744	}	760	}
745	else	761	else
746	{	762	{
747	--dec->cur;	763	--dec_cur;
748		764
749	if (!ch)	765	if (!ch)
750	ERR ("unexpected end of string while parsing JSON string");	766	ERR ("unexpected end of string while parsing JSON string");
751	else	767	else
752	ERR ("invalid character encountered while parsing JSON string");	768	ERR ("invalid character encountered while parsing JSON string");
…		…
765	}	781	}
766	else	782	else
767	sv = newSVpvn (buf, len);	783	sv = newSVpvn (buf, len);
768	}	784	}
769	}	785	}
770	while (*dec->cur != '"');	786	while (*dec_cur != '"');
771		787
772	++dec->cur;	788	++dec_cur;
773		789
774	if (sv)	790	if (sv)
775	{	791	{
776	SvPOK_only (sv);	792	SvPOK_only (sv);
777	*SvEND (sv) = 0;	793	*SvEND (sv) = 0;
…		…
780	SvUTF8_on (sv);	796	SvUTF8_on (sv);
781	}	797	}
782	else	798	else
783	sv = newSVpvn ("", 0);	799	sv = newSVpvn ("", 0);
784		800
		801	dec->cur = dec_cur;
785	return sv;	802	return sv;
786		803
787	fail:	804	fail:
		805	dec->cur = dec_cur;
788	return 0;	806	return 0;
789	}	807	}
790		808
791	static SV *	809	static SV *
792	decode_num (dec_t *dec)	810	decode_num (dec_t *dec)
…		…
854	{	872	{
855	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so	873	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so
856	if (*start == '-')	874	if (*start == '-')
857	switch (dec->cur - start)	875	switch (dec->cur - start)
858	{	876	{
859	case 2: return newSViv (-( start [1] - '0' ));	877	case 2: return newSViv (-( start [1] - '0' * 1));
860	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));	878	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
861	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));	879	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
862	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));	880	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
863	}	881	}
864	else	882	else
865	switch (dec->cur - start)	883	switch (dec->cur - start)
866	{	884	{
867	case 1: return newSViv ( start [0] - '0' );	885	case 1: return newSViv ( start [0] - '0' * 1);
868	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);	886	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
869	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);	887	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
870	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);	888	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
871	}	889	}
872		890
…		…
879	if (uv < (UV)IV_MIN)	897	if (uv < (UV)IV_MIN)
880	return newSViv (-(IV)uv);	898	return newSViv (-(IV)uv);
881	}	899	}
882	else	900	else
883	return newSVuv (uv);	901	return newSVuv (uv);
		902
		903	// here would likely be the place for bigint support
884	}	904	}
885	}	905	}
886		906
		907	// if we ever support bigint or bigfloat, this is the place for bigfloat
887	return newSVnv (Atof (start));	908	return newSVnv (Atof (start));
888		909
889	fail:	910	fail:
890	return 0;	911	return 0;
891	}	912	}
…		…
992		1013
993	static SV *	1014	static SV *
994	decode_sv (dec_t *dec)	1015	decode_sv (dec_t *dec)
995	{	1016	{
996	decode_ws (dec);	1017	decode_ws (dec);
		1018
		1019	// the beauty of JSON: you need exactly one character lookahead
		1020	// to parse anything.
997	switch (*dec->cur)	1021	switch (*dec->cur)
998	{	1022	{
999	case '"': ++dec->cur; return decode_str (dec);	1023	case '"': ++dec->cur; return decode_str (dec);
1000	case '[': ++dec->cur; return decode_av (dec);	1024	case '[': ++dec->cur; return decode_av (dec);
1001	case '{': ++dec->cur; return decode_hv (dec);	1025	case '{': ++dec->cur; return decode_hv (dec);
…		…
1130	MODULE = JSON::XS PACKAGE = JSON::XS	1154	MODULE = JSON::XS PACKAGE = JSON::XS
1131		1155
1132	BOOT:	1156	BOOT:
1133	{	1157	{
1134	int i;	1158	int i;
1135
1136	memset (decode_hexdigit, 0xff, 256);
1137		1159
1138	for (i = 0; i < 256; ++i)	1160	for (i = 0; i < 256; ++i)
1139	decode_hexdigit [i] =	1161	decode_hexdigit [i] =
1140	i >= '0' && i <= '9' ? i - '0'	1162	i >= '0' && i <= '9' ? i - '0'
1141	: i >= 'a' && i <= 'f' ? i - 'a' + 10	1163	: i >= 'a' && i <= 'f' ? i - 'a' + 10

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.37 by root, Wed Jun 6 17:49:01 2007 UTC vs. Revision 1.41 by root, Sat Jun 23 22:53:16 2007 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.37 by root, Wed Jun 6 17:49:01 2007 UTC vs.
Revision 1.41 by root, Sat Jun 23 22:53:16 2007 UTC