[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.67 by root, Wed Nov 28 14:01:01 2007 UTC vs.
Revision 1.81 by root, Wed Mar 26 01:40:43 2008 UTC

…		…
4		4
5	#include <assert.h>	5	#include <assert.h>
6	#include <string.h>	6	#include <string.h>
7	#include <stdlib.h>	7	#include <stdlib.h>
8	#include <stdio.h>	8	#include <stdio.h>
		9	#include <limits.h>
9	#include <float.h>	10	#include <float.h>
10		11
11	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)	12	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
12	# define snprintf _snprintf // C compilers have this in stdio.h	13	# define snprintf _snprintf // C compilers have this in stdio.h
13	#endif	14	#endif
…		…
15	// some old perls do not have this, try to make it work, no	16	// some old perls do not have this, try to make it work, no
16	// guarentees, though. if it breaks, you get to keep the pieces.	17	// guarentees, though. if it breaks, you get to keep the pieces.
17	#ifndef UTF8_MAXBYTES	18	#ifndef UTF8_MAXBYTES
18	# define UTF8_MAXBYTES 13	19	# define UTF8_MAXBYTES 13
19	#endif	20	#endif
		21
		22	#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2)
20		23
21	#define F_ASCII 0x00000001UL	24	#define F_ASCII 0x00000001UL
22	#define F_LATIN1 0x00000002UL	25	#define F_LATIN1 0x00000002UL
23	#define F_UTF8 0x00000004UL	26	#define F_UTF8 0x00000004UL
24	#define F_INDENT 0x00000008UL	27	#define F_INDENT 0x00000008UL
…		…
50		53
51	#define SB do {	54	#define SB do {
52	#define SE } while (0)	55	#define SE } while (0)
53		56
54	#if __GNUC__ >= 3	57	#if __GNUC__ >= 3
55	# define expect(expr,value) __builtin_expect ((expr),(value))	58	# define expect(expr,value) __builtin_expect ((expr), (value))
56	# define inline inline	59	# define INLINE static inline
57	#else	60	#else
58	# define expect(expr,value) (expr)	61	# define expect(expr,value) (expr)
59	# define inline static	62	# define INLINE static
60	#endif	63	#endif
61		64
62	#define expect_false(expr) expect ((expr) != 0, 0)	65	#define expect_false(expr) expect ((expr) != 0, 0)
63	#define expect_true(expr) expect ((expr) != 0, 1)	66	#define expect_true(expr) expect ((expr) != 0, 1)
		67
		68	#define IN_RANGE_INC(type,val,beg,end) \
		69	((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
		70	<= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
64		71
65	#ifdef USE_ITHREADS	72	#ifdef USE_ITHREADS
66	# define JSON_SLOW 1	73	# define JSON_SLOW 1
67	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))	74	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
68	#else	75	#else
…		…
71	#endif	78	#endif
72		79
73	static HV json_stash, json_boolean_stash; // JSON::XS::	80	static HV json_stash, json_boolean_stash; // JSON::XS::
74	static SV json_true, json_false;	81	static SV json_true, json_false;
75		82
		83	enum {
		84	INCR_M_WS = 0, // initial whitespace skipping, must be 0
		85	INCR_M_STR, // inside string
		86	INCR_M_BS, // inside backslash
		87	INCR_M_JSON // outside anything, count nesting
		88	};
		89
		90	#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON)
		91
76	typedef struct {	92	typedef struct {
77	U32 flags;	93	U32 flags;
78	SV *cb_object;	94	SV *cb_object;
79	HV *cb_sk_object;	95	HV *cb_sk_object;
		96
		97	// for the incremental parser
		98	SV *incr_text; // the source text so far
		99	STRLEN incr_pos; // the current offset into the text
		100	int incr_nest; // {[]}-nesting level
		101	int incr_mode;
80	} JSON;	102	} JSON;
81		103
82	/////////////////////////////////////////////////////////////////////////////	104	/////////////////////////////////////////////////////////////////////////////
83	// utility functions	105	// utility functions
84		106
85	inline void	107	INLINE void
86	shrink (SV *sv)	108	shrink (SV *sv)
87	{	109	{
88	sv_utf8_downgrade (sv, 1);	110	sv_utf8_downgrade (sv, 1);
89	if (SvLEN (sv) > SvCUR (sv) + 1)	111	if (SvLEN (sv) > SvCUR (sv) + 1)
90	{	112	{
…		…
99	// decode an utf-8 character and return it, or (UV)-1 in	121	// decode an utf-8 character and return it, or (UV)-1 in
100	// case of an error.	122	// case of an error.
101	// we special-case "safe" characters from U+80 .. U+7FF,	123	// we special-case "safe" characters from U+80 .. U+7FF,
102	// but use the very good perl function to parse anything else.	124	// but use the very good perl function to parse anything else.
103	// note that we never call this function for a ascii codepoints	125	// note that we never call this function for a ascii codepoints
104	inline UV	126	INLINE UV
105	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)	127	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
106	{	128	{
107	if (expect_false (s[0] > 0xdf \|\| s[0] < 0xc2))	129	if (expect_true (len >= 2
108	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);	130	&& IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
109	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)	131	&& IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
110	{	132	{
111	*clen = 2;	133	*clen = 2;
112	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);	134	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
113	}	135	}
114	else	136	else
115	{	137	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
116	*clen = (STRLEN)-1;	138	}
117	return (UV)-1;	139
118	}	140	// likewise for encoding, also never called for ascii codepoints
		141	// this function takes advantage of this fact, although current gccs
		142	// seem to optimise the check for >= 0x80 away anyways
		143	INLINE unsigned char *
		144	encode_utf8 (unsigned char *s, UV ch)
		145	{
		146	if (expect_false (ch < 0x000080))
		147	*s++ = ch;
		148	else if (expect_true (ch < 0x000800))
		149	*s++ = 0xc0 \| ( ch >> 6),
		150	*s++ = 0x80 \| ( ch & 0x3f);
		151	else if ( ch < 0x010000)
		152	*s++ = 0xe0 \| ( ch >> 12),
		153	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		154	*s++ = 0x80 \| ( ch & 0x3f);
		155	else if ( ch < 0x110000)
		156	*s++ = 0xf0 \| ( ch >> 18),
		157	*s++ = 0x80 \| ((ch >> 12) & 0x3f),
		158	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		159	*s++ = 0x80 \| ( ch & 0x3f);
		160
		161	return s;
119	}	162	}
120		163
121	/////////////////////////////////////////////////////////////////////////////	164	/////////////////////////////////////////////////////////////////////////////
122	// encoder	165	// encoder
123		166
…		…
128	char *end; // SvEND (sv)	171	char *end; // SvEND (sv)
129	SV *sv; // result scalar	172	SV *sv; // result scalar
130	JSON json;	173	JSON json;
131	U32 indent; // indentation level	174	U32 indent; // indentation level
132	U32 maxdepth; // max. indentation/recursion level	175	U32 maxdepth; // max. indentation/recursion level
		176	UV limit; // escape character values >= this value when encoding
133	} enc_t;	177	} enc_t;
134		178
135	inline void	179	INLINE void
136	need (enc_t *enc, STRLEN len)	180	need (enc_t *enc, STRLEN len)
137	{	181	{
138	if (expect_false (enc->cur + len >= enc->end))	182	if (expect_false (enc->cur + len >= enc->end))
139	{	183	{
140	STRLEN cur = enc->cur - SvPVX (enc->sv);	184	STRLEN cur = enc->cur - SvPVX (enc->sv);
…		…
142	enc->cur = SvPVX (enc->sv) + cur;	186	enc->cur = SvPVX (enc->sv) + cur;
143	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;	187	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
144	}	188	}
145	}	189	}
146		190
147	inline void	191	INLINE void
148	encode_ch (enc_t *enc, char ch)	192	encode_ch (enc_t *enc, char ch)
149	{	193	{
150	need (enc, 1);	194	need (enc, 1);
151	*enc->cur++ = ch;	195	*enc->cur++ = ch;
152	}	196	}
…		…
206	{	250	{
207	uch = ch;	251	uch = ch;
208	clen = 1;	252	clen = 1;
209	}	253	}
210		254
211	if (uch > 0x10FFFFUL)	255	if (uch < 0x80/0x20/ \|\| uch >= enc->limit)
212	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
213
214	if (uch < 0x80 \|\| enc->json.flags & F_ASCII \|\| (enc->json.flags & F_LATIN1 && uch > 0xFF))
215	{	256	{
216	if (uch > 0xFFFFUL)	257	if (uch >= 0x10000UL)
217	{	258	{
		259	if (uch >= 0x110000UL)
		260	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		261
218	need (enc, len += 11);	262	need (enc, len += 11);
219	sprintf (enc->cur, "\\u%04x\\u%04x",	263	sprintf (enc->cur, "\\u%04x\\u%04x",
220	(int)((uch - 0x10000) / 0x400 + 0xD800),	264	(int)((uch - 0x10000) / 0x400 + 0xD800),
221	(int)((uch - 0x10000) % 0x400 + 0xDC00));	265	(int)((uch - 0x10000) % 0x400 + 0xDC00));
222	enc->cur += 12;	266	enc->cur += 12;
…		…
250	while (--clen);	294	while (--clen);
251	}	295	}
252	else	296	else
253	{	297	{
254	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed	298	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
255	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	299	enc->cur = encode_utf8 (enc->cur, uch);
256	++str;	300	++str;
257	}	301	}
258	}	302	}
259	}	303	}
260	}	304	}
261		305
262	--len;	306	--len;
263	}	307	}
264	}	308	}
265		309
266	inline void	310	INLINE void
267	encode_indent (enc_t *enc)	311	encode_indent (enc_t *enc)
268	{	312	{
269	if (enc->json.flags & F_INDENT)	313	if (enc->json.flags & F_INDENT)
270	{	314	{
271	int spaces = enc->indent * INDENT_STEP;	315	int spaces = enc->indent * INDENT_STEP;
…		…
274	memset (enc->cur, ' ', spaces);	318	memset (enc->cur, ' ', spaces);
275	enc->cur += spaces;	319	enc->cur += spaces;
276	}	320	}
277	}	321	}
278		322
279	inline void	323	INLINE void
280	encode_space (enc_t *enc)	324	encode_space (enc_t *enc)
281	{	325	{
282	need (enc, 1);	326	need (enc, 1);
283	encode_ch (enc, ' ');	327	encode_ch (enc, ' ');
284	}	328	}
285		329
286	inline void	330	INLINE void
287	encode_nl (enc_t *enc)	331	encode_nl (enc_t *enc)
288	{	332	{
289	if (enc->json.flags & F_INDENT)	333	if (enc->json.flags & F_INDENT)
290	{	334	{
291	need (enc, 1);	335	need (enc, 1);
292	encode_ch (enc, '\n');	336	encode_ch (enc, '\n');
293	}	337	}
294	}	338	}
295		339
296	inline void	340	INLINE void
297	encode_comma (enc_t *enc)	341	encode_comma (enc_t *enc)
298	{	342	{
299	encode_ch (enc, ',');	343	encode_ch (enc, ',');
300		344
301	if (enc->json.flags & F_INDENT)	345	if (enc->json.flags & F_INDENT)
…		…
394		438
395	static void	439	static void
396	encode_hv (enc_t enc, HV hv)	440	encode_hv (enc_t enc, HV hv)
397	{	441	{
398	HE *he;	442	HE *he;
399	int count;
400		443
401	if (enc->indent >= enc->maxdepth)	444	if (enc->indent >= enc->maxdepth)
402	croak ("data structure too deep (hit recursion limit)");	445	croak ("data structure too deep (hit recursion limit)");
403		446
404	encode_ch (enc, '{');	447	encode_ch (enc, '{');
…		…
619	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	662	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
620	enc->cur += strlen (enc->cur);	663	enc->cur += strlen (enc->cur);
621	}	664	}
622	else if (SvIOKp (sv))	665	else if (SvIOKp (sv))
623	{	666	{
624	// we assume we can always read an IV as a UV	667	// we assume we can always read an IV as a UV and vice versa
625	if (SvUV (sv) & ~(UV)0x7fff)	668	// we assume two's complement
626	{	669	// we assume no aliasing issues in the union
627	// large integer, use the (rather slow) snprintf way.	670	if (SvIsUV (sv) ? SvUVX (sv) <= 59000
628	need (enc, sizeof (UV) * 3);	671	: SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
629	enc->cur +=
630	SvIsUV(sv)
631	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
632	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
633	}
634	else
635	{	672	{
636	// optimise the "small number case"	673	// optimise the "small number case"
637	// code will likely be branchless and use only a single multiplication	674	// code will likely be branchless and use only a single multiplication
		675	// works for numbers up to 59074
638	I32 i = SvIV (sv);	676	I32 i = SvIVX (sv);
639	U32 u;	677	U32 u;
640	char digit, nz = 0;	678	char digit, nz = 0;
641		679
642	need (enc, 6);	680	need (enc, 6);
643		681
…		…
649		687
650	// now output digit by digit, each time masking out the integer part	688	// now output digit by digit, each time masking out the integer part
651	// and multiplying by 5 while moving the decimal point one to the right,	689	// and multiplying by 5 while moving the decimal point one to the right,
652	// resulting in a net multiplication by 10.	690	// resulting in a net multiplication by 10.
653	// we always write the digit to memory but conditionally increment	691	// we always write the digit to memory but conditionally increment
654	// the pointer, to ease the usage of conditional move instructions.	692	// the pointer, to enable the use of conditional move instructions.
655	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;	693	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffffUL) 5;
656	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;	694	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffffUL) 5;
657	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;	695	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffffUL) 5;
658	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;	696	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffffUL) 5;
659	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'	697	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
		698	}
		699	else
		700	{
		701	// large integer, use the (rather slow) snprintf way.
		702	need (enc, IVUV_MAXCHARS);
		703	enc->cur +=
		704	SvIsUV(sv)
		705	? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
		706	: snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
660	}	707	}
661	}	708	}
662	else if (SvROK (sv))	709	else if (SvROK (sv))
663	encode_rv (enc, SvRV (sv));	710	encode_rv (enc, SvRV (sv));
664	else if (!SvOK (sv))	711	else if (!SvOK (sv))
…		…
680	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	727	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
681	enc.cur = SvPVX (enc.sv);	728	enc.cur = SvPVX (enc.sv);
682	enc.end = SvEND (enc.sv);	729	enc.end = SvEND (enc.sv);
683	enc.indent = 0;	730	enc.indent = 0;
684	enc.maxdepth = DEC_DEPTH (enc.json.flags);	731	enc.maxdepth = DEC_DEPTH (enc.json.flags);
		732	enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
		733	: enc.json.flags & F_LATIN1 ? 0x000100UL
		734	: 0x110000UL;
685		735
686	SvPOK_only (enc.sv);	736	SvPOK_only (enc.sv);
687	encode_sv (&enc, scalar);	737	encode_sv (&enc, scalar);
688		738
689	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));	739	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
…		…
710	JSON json;	760	JSON json;
711	U32 depth; // recursion depth	761	U32 depth; // recursion depth
712	U32 maxdepth; // recursion depth limit	762	U32 maxdepth; // recursion depth limit
713	} dec_t;	763	} dec_t;
714		764
715	inline void	765	INLINE void
716	decode_comment (dec_t *dec)	766	decode_comment (dec_t *dec)
717	{	767	{
718	// only '#'-style comments allowed a.t.m.	768	// only '#'-style comments allowed a.t.m.
719		769
720	while (dec->cur && dec->cur != 0x0a && *dec->cur != 0x0d)	770	while (dec->cur && dec->cur != 0x0a && *dec->cur != 0x0d)
721	++dec->cur;	771	++dec->cur;
722	}	772	}
723		773
724	inline void	774	INLINE void
725	decode_ws (dec_t *dec)	775	decode_ws (dec_t *dec)
726	{	776	{
727	for (;;)	777	for (;;)
728	{	778	{
729	char ch = *dec->cur;	779	char ch = *dec->cur;
…		…
855		905
856	if (hi >= 0x80)	906	if (hi >= 0x80)
857	{	907	{
858	utf8 = 1;	908	utf8 = 1;
859		909
860	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);	910	cur = encode_utf8 (cur, hi);
861	}	911	}
862	else	912	else
863	*cur++ = hi;	913	*cur++ = hi;
864	}	914	}
865	break;	915	break;
…		…
867	default:	917	default:
868	--dec_cur;	918	--dec_cur;
869	ERR ("illegal backslash escape sequence in string");	919	ERR ("illegal backslash escape sequence in string");
870	}	920	}
871	}	921	}
872	else if (expect_true (ch >= 0x20 && ch <= 0x7f))	922	else if (expect_true (ch >= 0x20 && ch < 0x80))
873	*cur++ = ch;	923	*cur++ = ch;
874	else if (ch >= 0x80)	924	else if (ch >= 0x80)
875	{	925	{
876	STRLEN clen;	926	STRLEN clen;
877	UV uch;	927	UV uch;
…		…
1000		1050
1001	if (!is_nv)	1051	if (!is_nv)
1002	{	1052	{
1003	int len = dec->cur - start;	1053	int len = dec->cur - start;
1004		1054
1005	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so	1055	// special case the rather common 1..5-digit-int case
1006	if (*start == '-')	1056	if (*start == '-')
1007	switch (len)	1057	switch (len)
1008	{	1058	{
1009	case 2: return newSViv (-( start [1] - '0' * 1));	1059	case 2: return newSViv (-( start [1] - '0' * 1));
1010	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));	1060	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
1011	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));	1061	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
1012	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));	1062	case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
		1063	case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111));
1013	}	1064	}
1014	else	1065	else
1015	switch (len)	1066	switch (len)
1016	{	1067	{
1017	case 1: return newSViv ( start [0] - '0' * 1);	1068	case 1: return newSViv ( start [0] - '0' * 1);
1018	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);	1069	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
1019	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);	1070	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
1020	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);	1071	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		1072	case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111);
1021	}	1073	}
1022		1074
1023	{	1075	{
1024	UV uv;	1076	UV uv;
1025	int numtype = grok_number (start, len, &uv);	1077	int numtype = grok_number (start, len, &uv);
…		…
1133	char *p = dec->cur;	1185	char *p = dec->cur;
1134	char *e = p + 24; // only try up to 24 bytes	1186	char *e = p + 24; // only try up to 24 bytes
1135		1187
1136	for (;;)	1188	for (;;)
1137	{	1189	{
1138	// the >= 0x80 is true on most architectures	1190	// the >= 0x80 is false on most architectures
1139	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')	1191	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')
1140	{	1192	{
1141	// slow path, back up and use decode_str	1193	// slow path, back up and use decode_str
1142	SV *key = decode_str (dec);	1194	SV *key = decode_str (dec);
1143	if (!key)	1195	if (!key)
…		…
1274		1326
1275	static SV *	1327	static SV *
1276	decode_sv (dec_t *dec)	1328	decode_sv (dec_t *dec)
1277	{	1329	{
1278	// the beauty of JSON: you need exactly one character lookahead	1330	// the beauty of JSON: you need exactly one character lookahead
1279	// to parse anything.	1331	// to parse everything.
1280	switch (*dec->cur)	1332	switch (*dec->cur)
1281	{	1333	{
1282	case '"': ++dec->cur; return decode_str (dec);	1334	case '"': ++dec->cur; return decode_str (dec);
1283	case '[': ++dec->cur; return decode_av (dec);	1335	case '[': ++dec->cur; return decode_av (dec);
1284	case '{': ++dec->cur; return decode_hv (dec);	1336	case '{': ++dec->cur; return decode_hv (dec);
1285		1337
1286	case '-':	1338	case '-':
1287	case '0': case '1': case '2': case '3': case '4':	1339	case '0': case '1': case '2': case '3': case '4':
1288	case '5': case '6': case '7': case '8': case '9':	1340	case '5': case '6': case '7': case '8': case '9':
1289	return decode_num (dec);	1341	return decode_num (dec);
…		…
1335	fail:	1387	fail:
1336	return 0;	1388	return 0;
1337	}	1389	}
1338		1390
1339	static SV *	1391	static SV *
1340	decode_json (SV string, JSON json, UV *offset_return)	1392	decode_json (SV string, JSON json, STRLEN *offset_return)
1341	{	1393	{
1342	dec_t dec;	1394	dec_t dec;
1343	UV offset;	1395	STRLEN offset;
1344	SV *sv;	1396	SV *sv;
1345		1397
1346	SvGETMAGIC (string);	1398	SvGETMAGIC (string);
1347	SvUPGRADE (string, SVt_PV);	1399	SvUPGRADE (string, SVt_PV);
1348		1400
…		…
1421		1473
1422	return sv;	1474	return sv;
1423	}	1475	}
1424		1476
1425	/////////////////////////////////////////////////////////////////////////////	1477	/////////////////////////////////////////////////////////////////////////////
		1478	// incremental parser
		1479
		1480	static void
		1481	incr_parse (JSON *self)
		1482	{
		1483	const char *p = SvPVX (self->incr_text) + self->incr_pos;
		1484
		1485	for (;;)
		1486	{
		1487	//printf ("loop pod %d p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), p, p, self->incr_mode, self->incr_nest);//D
		1488	switch (self->incr_mode)
		1489	{
		1490	// only used for intiial whitespace skipping
		1491	case INCR_M_WS:
		1492	for (;;)
		1493	{
		1494	if (*p > 0x20)
		1495	{
		1496	self->incr_mode = INCR_M_JSON;
		1497	goto incr_m_json;
		1498	}
		1499	else if (!*p)
		1500	goto interrupt;
		1501
		1502	++p;
		1503	}
		1504
		1505	// skip a single char inside a string (for \\-processing)
		1506	case INCR_M_BS:
		1507	if (!*p)
		1508	goto interrupt;
		1509
		1510	++p;
		1511	self->incr_mode = INCR_M_STR;
		1512	goto incr_m_str;
		1513
		1514	// inside a string
		1515	case INCR_M_STR:
		1516	incr_m_str:
		1517	for (;;)
		1518	{
		1519	if (*p == '"')
		1520	{
		1521	++p;
		1522	self->incr_mode = INCR_M_JSON;
		1523
		1524	if (!self->incr_nest)
		1525	goto interrupt;
		1526
		1527	goto incr_m_json;
		1528	}
		1529	else if (*p == '\\')
		1530	{
		1531	++p; // "virtually" consumes character after \
		1532
		1533	if (!*p) // if at end of string we have to switch modes
		1534	{
		1535	self->incr_mode = INCR_M_BS;
		1536	goto interrupt;
		1537	}
		1538	}
		1539	else if (!*p)
		1540	goto interrupt;
		1541
		1542	++p;
		1543	}
		1544
		1545	// after initial ws, outside string
		1546	case INCR_M_JSON:
		1547	incr_m_json:
		1548	for (;;)
		1549	{
		1550	switch (*p++)
		1551	{
		1552	case 0:
		1553	--p;
		1554	goto interrupt;
		1555
		1556	case 0x09:
		1557	case 0x0a:
		1558	case 0x0d:
		1559	case 0x20:
		1560	if (!self->incr_nest)
		1561	{
		1562	--p; // do not eat the whitespace, let the next round do it
		1563	goto interrupt;
		1564	}
		1565	break;
		1566
		1567	case '"':
		1568	self->incr_mode = INCR_M_STR;
		1569	goto incr_m_str;
		1570
		1571	case '[':
		1572	case '{':
		1573	++self->incr_nest;
		1574	break;
		1575
		1576	case ']':
		1577	case '}':
		1578	if (!--self->incr_nest)
		1579	goto interrupt;
		1580	}
		1581	}
		1582	}
		1583
		1584	modechange:
		1585	;
		1586	}
		1587
		1588	interrupt:
		1589	self->incr_pos = p - SvPVX (self->incr_text);
		1590	//printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
		1591	}
		1592
		1593	/////////////////////////////////////////////////////////////////////////////
1426	// XS interface functions	1594	// XS interface functions
1427		1595
1428	MODULE = JSON::XS PACKAGE = JSON::XS	1596	MODULE = JSON::XS PACKAGE = JSON::XS
1429		1597
1430	BOOT:	1598	BOOT:
…		…
1457	{	1625	{
1458	SV *pv = NEWSV (0, sizeof (JSON));	1626	SV *pv = NEWSV (0, sizeof (JSON));
1459	SvPOK_only (pv);	1627	SvPOK_only (pv);
1460	Zero (SvPVX (pv), 1, JSON);	1628	Zero (SvPVX (pv), 1, JSON);
1461	((JSON *)SvPVX (pv))->flags = F_DEFAULT;	1629	((JSON *)SvPVX (pv))->flags = F_DEFAULT;
1462	XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), JSON_STASH)));	1630	XPUSHs (sv_2mortal (sv_bless (
		1631	newRV_noinc (pv),
		1632	strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
		1633	)));
1463	}	1634	}
1464		1635
1465	void ascii (JSON *self, int enable = 1)	1636	void ascii (JSON *self, int enable = 1)
1466	ALIAS:	1637	ALIAS:
1467	ascii = F_ASCII	1638	ascii = F_ASCII
…		…
1494	get_utf8 = F_UTF8	1665	get_utf8 = F_UTF8
1495	get_indent = F_INDENT	1666	get_indent = F_INDENT
1496	get_canonical = F_CANONICAL	1667	get_canonical = F_CANONICAL
1497	get_space_before = F_SPACE_BEFORE	1668	get_space_before = F_SPACE_BEFORE
1498	get_space_after = F_SPACE_AFTER	1669	get_space_after = F_SPACE_AFTER
1499	get_pretty = F_PRETTY
1500	get_allow_nonref = F_ALLOW_NONREF	1670	get_allow_nonref = F_ALLOW_NONREF
1501	get_shrink = F_SHRINK	1671	get_shrink = F_SHRINK
1502	get_allow_blessed = F_ALLOW_BLESSED	1672	get_allow_blessed = F_ALLOW_BLESSED
1503	get_convert_blessed = F_CONV_BLESSED	1673	get_convert_blessed = F_CONV_BLESSED
1504	get_relaxed = F_RELAXED	1674	get_relaxed = F_RELAXED
…		…
1588	XPUSHs (decode_json (jsonstr, self, 0));	1758	XPUSHs (decode_json (jsonstr, self, 0));
1589		1759
1590	void decode_prefix (JSON self, SV jsonstr)	1760	void decode_prefix (JSON self, SV jsonstr)
1591	PPCODE:	1761	PPCODE:
1592	{	1762	{
1593	UV offset;	1763	STRLEN offset;
1594	EXTEND (SP, 2);	1764	EXTEND (SP, 2);
1595	PUSHs (decode_json (jsonstr, self, &offset));	1765	PUSHs (decode_json (jsonstr, self, &offset));
1596	PUSHs (sv_2mortal (newSVuv (offset)));	1766	PUSHs (sv_2mortal (newSVuv (offset)));
		1767	}
		1768
		1769	void incr_parse (JSON self, SV jsonstr = 0)
		1770	PPCODE:
		1771	{
		1772	if (!self->incr_text)
		1773	self->incr_text = newSVpvn ("", 0);
		1774
		1775	// append data, if any
		1776	if (jsonstr)
		1777	{
		1778	if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text))
		1779	{
		1780	/* utf-8-ness differs, need to upgrade */
		1781	sv_utf8_upgrade (self->incr_text);
		1782
		1783	if (self->incr_pos)
		1784	self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
		1785	- (U8 *)SvPVX (self->incr_text);
		1786	}
		1787
		1788	{
		1789	STRLEN len;
		1790	const char *str = SvPV (jsonstr, len);
		1791	SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1);
		1792	Move (str, SvEND (self->incr_text), len, char);
		1793	SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
		1794	*SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
		1795	}
		1796	}
		1797
		1798	if (GIMME_V != G_VOID)
		1799	do
		1800	{
		1801	STRLEN offset;
		1802
		1803	incr_parse (self);
		1804
		1805	if (!INCR_DONE (self))
		1806	break;
		1807
		1808	XPUSHs (decode_json (self->incr_text, self, &offset));
		1809
		1810	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset);
		1811	self->incr_pos -= offset;
		1812	self->incr_nest = 0;
		1813	self->incr_mode = 0;
		1814	}
		1815	while (GIMME_V == G_ARRAY);
		1816	}
		1817
		1818	SV incr_text (JSON self)
		1819	ATTRS: lvalue
		1820	CODE:
		1821	{
		1822	if (self->incr_pos)
		1823	croak ("incr_text can only be called after a successful incr_parse call in scalar context");
		1824
		1825	RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
		1826	}
		1827	OUTPUT:
		1828	RETVAL
		1829
		1830	void incr_skip (JSON *self)
		1831	CODE:
		1832	{
		1833	if (self->incr_pos)
		1834	{
		1835	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos);
		1836	self->incr_pos = 0;
		1837	self->incr_nest = 0;
		1838	self->incr_mode = 0;
		1839	}
1597	}	1840	}
1598		1841
1599	void DESTROY (JSON *self)	1842	void DESTROY (JSON *self)
1600	CODE:	1843	CODE:
1601	SvREFCNT_dec (self->cb_sk_object);	1844	SvREFCNT_dec (self->cb_sk_object);
1602	SvREFCNT_dec (self->cb_object);	1845	SvREFCNT_dec (self->cb_object);
		1846	SvREFCNT_dec (self->incr_text);
1603		1847
1604	PROTOTYPES: ENABLE	1848	PROTOTYPES: ENABLE
1605		1849
1606	void to_json (SV *scalar)	1850	void encode_json (SV *scalar)
		1851	ALIAS:
		1852	to_json_ = 0
		1853	encode_json = F_UTF8
1607	PPCODE:	1854	PPCODE:
1608	{	1855	{
1609	JSON json = { F_DEFAULT \| F_UTF8 };	1856	JSON json = { F_DEFAULT \| ix };
1610	XPUSHs (encode_json (scalar, &json));	1857	XPUSHs (encode_json (scalar, &json));
1611	}	1858	}
1612		1859
1613	void from_json (SV *jsonstr)	1860	void decode_json (SV *jsonstr)
		1861	ALIAS:
		1862	from_json_ = 0
		1863	decode_json = F_UTF8
1614	PPCODE:	1864	PPCODE:
1615	{	1865	{
1616	JSON json = { F_DEFAULT \| F_UTF8 };	1866	JSON json = { F_DEFAULT \| ix };
1617	XPUSHs (decode_json (jsonstr, &json, 0));	1867	XPUSHs (decode_json (jsonstr, &json, 0));
1618	}	1868	}
1619		1869
		1870

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.67 by root, Wed Nov 28 14:01:01 2007 UTC vs. Revision 1.81 by root, Wed Mar 26 01:40:43 2008 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.67 by root, Wed Nov 28 14:01:01 2007 UTC vs.
Revision 1.81 by root, Wed Mar 26 01:40:43 2008 UTC