[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.63 by root, Mon Aug 27 01:49:01 2007 UTC vs.
Revision 1.83 by root, Wed Mar 26 22:54:38 2008 UTC

…		…
4		4
5	#include <assert.h>	5	#include <assert.h>
6	#include <string.h>	6	#include <string.h>
7	#include <stdlib.h>	7	#include <stdlib.h>
8	#include <stdio.h>	8	#include <stdio.h>
		9	#include <limits.h>
9	#include <float.h>	10	#include <float.h>
10		11
11	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)	12	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
12	# define snprintf _snprintf // C compilers have this in stdio.h	13	# define snprintf _snprintf // C compilers have this in stdio.h
13	#endif	14	#endif
…		…
15	// some old perls do not have this, try to make it work, no	16	// some old perls do not have this, try to make it work, no
16	// guarentees, though. if it breaks, you get to keep the pieces.	17	// guarentees, though. if it breaks, you get to keep the pieces.
17	#ifndef UTF8_MAXBYTES	18	#ifndef UTF8_MAXBYTES
18	# define UTF8_MAXBYTES 13	19	# define UTF8_MAXBYTES 13
19	#endif	20	#endif
		21
		22	#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2)
20		23
21	#define F_ASCII 0x00000001UL	24	#define F_ASCII 0x00000001UL
22	#define F_LATIN1 0x00000002UL	25	#define F_LATIN1 0x00000002UL
23	#define F_UTF8 0x00000004UL	26	#define F_UTF8 0x00000004UL
24	#define F_INDENT 0x00000008UL	27	#define F_INDENT 0x00000008UL
…		…
50		53
51	#define SB do {	54	#define SB do {
52	#define SE } while (0)	55	#define SE } while (0)
53		56
54	#if __GNUC__ >= 3	57	#if __GNUC__ >= 3
55	# define expect(expr,value) __builtin_expect ((expr),(value))	58	# define expect(expr,value) __builtin_expect ((expr), (value))
56	# define inline inline	59	# define INLINE static inline
57	#else	60	#else
58	# define expect(expr,value) (expr)	61	# define expect(expr,value) (expr)
59	# define inline static	62	# define INLINE static
60	#endif	63	#endif
61		64
62	#define expect_false(expr) expect ((expr) != 0, 0)	65	#define expect_false(expr) expect ((expr) != 0, 0)
63	#define expect_true(expr) expect ((expr) != 0, 1)	66	#define expect_true(expr) expect ((expr) != 0, 1)
		67
		68	#define IN_RANGE_INC(type,val,beg,end) \
		69	((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
		70	<= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
64		71
65	#ifdef USE_ITHREADS	72	#ifdef USE_ITHREADS
66	# define JSON_SLOW 1	73	# define JSON_SLOW 1
67	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))	74	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
68	#else	75	#else
…		…
71	#endif	78	#endif
72		79
73	static HV json_stash, json_boolean_stash; // JSON::XS::	80	static HV json_stash, json_boolean_stash; // JSON::XS::
74	static SV json_true, json_false;	81	static SV json_true, json_false;
75		82
		83	enum {
		84	INCR_M_WS = 0, // initial whitespace skipping, must be 0
		85	INCR_M_STR, // inside string
		86	INCR_M_BS, // inside backslash
		87	INCR_M_JSON // outside anything, count nesting
		88	};
		89
		90	#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON)
		91
76	typedef struct {	92	typedef struct {
77	U32 flags;	93	U32 flags;
78	SV *cb_object;	94	SV *cb_object;
79	HV *cb_sk_object;	95	HV *cb_sk_object;
		96
		97	// for the incremental parser
		98	SV *incr_text; // the source text so far
		99	STRLEN incr_pos; // the current offset into the text
		100	int incr_nest; // {[]}-nesting level
		101	int incr_mode;
80	} JSON;	102	} JSON;
81		103
82	/////////////////////////////////////////////////////////////////////////////	104	/////////////////////////////////////////////////////////////////////////////
83	// utility functions	105	// utility functions
84		106
85	inline void	107	INLINE SV *
		108	get_bool (const char *name)
		109	{
		110	SV *sv = get_sv (name, 1);
		111
		112	SvREADONLY_on (sv);
		113	SvREADONLY_on (SvRV (sv));
		114
		115	return sv;
		116	}
		117
		118	INLINE void
86	shrink (SV *sv)	119	shrink (SV *sv)
87	{	120	{
88	sv_utf8_downgrade (sv, 1);	121	sv_utf8_downgrade (sv, 1);
89	if (SvLEN (sv) > SvCUR (sv) + 1)	122	if (SvLEN (sv) > SvCUR (sv) + 1)
90	{	123	{
…		…
99	// decode an utf-8 character and return it, or (UV)-1 in	132	// decode an utf-8 character and return it, or (UV)-1 in
100	// case of an error.	133	// case of an error.
101	// we special-case "safe" characters from U+80 .. U+7FF,	134	// we special-case "safe" characters from U+80 .. U+7FF,
102	// but use the very good perl function to parse anything else.	135	// but use the very good perl function to parse anything else.
103	// note that we never call this function for a ascii codepoints	136	// note that we never call this function for a ascii codepoints
104	inline UV	137	INLINE UV
105	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)	138	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
106	{	139	{
107	if (expect_false (s[0] > 0xdf \|\| s[0] < 0xc2))	140	if (expect_true (len >= 2
108	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);	141	&& IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
109	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)	142	&& IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
110	{	143	{
111	*clen = 2;	144	*clen = 2;
112	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);	145	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
113	}	146	}
114	else	147	else
115	{	148	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
116	*clen = (STRLEN)-1;	149	}
117	return (UV)-1;	150
118	}	151	// likewise for encoding, also never called for ascii codepoints
		152	// this function takes advantage of this fact, although current gccs
		153	// seem to optimise the check for >= 0x80 away anyways
		154	INLINE unsigned char *
		155	encode_utf8 (unsigned char *s, UV ch)
		156	{
		157	if (expect_false (ch < 0x000080))
		158	*s++ = ch;
		159	else if (expect_true (ch < 0x000800))
		160	*s++ = 0xc0 \| ( ch >> 6),
		161	*s++ = 0x80 \| ( ch & 0x3f);
		162	else if ( ch < 0x010000)
		163	*s++ = 0xe0 \| ( ch >> 12),
		164	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		165	*s++ = 0x80 \| ( ch & 0x3f);
		166	else if ( ch < 0x110000)
		167	*s++ = 0xf0 \| ( ch >> 18),
		168	*s++ = 0x80 \| ((ch >> 12) & 0x3f),
		169	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		170	*s++ = 0x80 \| ( ch & 0x3f);
		171
		172	return s;
119	}	173	}
120		174
121	/////////////////////////////////////////////////////////////////////////////	175	/////////////////////////////////////////////////////////////////////////////
122	// encoder	176	// encoder
123		177
…		…
128	char *end; // SvEND (sv)	182	char *end; // SvEND (sv)
129	SV *sv; // result scalar	183	SV *sv; // result scalar
130	JSON json;	184	JSON json;
131	U32 indent; // indentation level	185	U32 indent; // indentation level
132	U32 maxdepth; // max. indentation/recursion level	186	U32 maxdepth; // max. indentation/recursion level
		187	UV limit; // escape character values >= this value when encoding
133	} enc_t;	188	} enc_t;
134		189
135	inline void	190	INLINE void
136	need (enc_t *enc, STRLEN len)	191	need (enc_t *enc, STRLEN len)
137	{	192	{
138	if (expect_false (enc->cur + len >= enc->end))	193	if (expect_false (enc->cur + len >= enc->end))
139	{	194	{
140	STRLEN cur = enc->cur - SvPVX (enc->sv);	195	STRLEN cur = enc->cur - SvPVX (enc->sv);
…		…
142	enc->cur = SvPVX (enc->sv) + cur;	197	enc->cur = SvPVX (enc->sv) + cur;
143	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;	198	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
144	}	199	}
145	}	200	}
146		201
147	inline void	202	INLINE void
148	encode_ch (enc_t *enc, char ch)	203	encode_ch (enc_t *enc, char ch)
149	{	204	{
150	need (enc, 1);	205	need (enc, 1);
151	*enc->cur++ = ch;	206	*enc->cur++ = ch;
152	}	207	}
…		…
206	{	261	{
207	uch = ch;	262	uch = ch;
208	clen = 1;	263	clen = 1;
209	}	264	}
210		265
211	if (uch > 0x10FFFFUL)	266	if (uch < 0x80/0x20/ \|\| uch >= enc->limit)
212	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
213
214	if (uch < 0x80 \|\| enc->json.flags & F_ASCII \|\| (enc->json.flags & F_LATIN1 && uch > 0xFF))
215	{	267	{
216	if (uch > 0xFFFFUL)	268	if (uch >= 0x10000UL)
217	{	269	{
		270	if (uch >= 0x110000UL)
		271	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		272
218	need (enc, len += 11);	273	need (enc, len += 11);
219	sprintf (enc->cur, "\\u%04x\\u%04x",	274	sprintf (enc->cur, "\\u%04x\\u%04x",
220	(int)((uch - 0x10000) / 0x400 + 0xD800),	275	(int)((uch - 0x10000) / 0x400 + 0xD800),
221	(int)((uch - 0x10000) % 0x400 + 0xDC00));	276	(int)((uch - 0x10000) % 0x400 + 0xDC00));
222	enc->cur += 12;	277	enc->cur += 12;
…		…
250	while (--clen);	305	while (--clen);
251	}	306	}
252	else	307	else
253	{	308	{
254	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed	309	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
255	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	310	enc->cur = encode_utf8 (enc->cur, uch);
256	++str;	311	++str;
257	}	312	}
258	}	313	}
259	}	314	}
260	}	315	}
261		316
262	--len;	317	--len;
263	}	318	}
264	}	319	}
265		320
266	inline void	321	INLINE void
267	encode_indent (enc_t *enc)	322	encode_indent (enc_t *enc)
268	{	323	{
269	if (enc->json.flags & F_INDENT)	324	if (enc->json.flags & F_INDENT)
270	{	325	{
271	int spaces = enc->indent * INDENT_STEP;	326	int spaces = enc->indent * INDENT_STEP;
…		…
274	memset (enc->cur, ' ', spaces);	329	memset (enc->cur, ' ', spaces);
275	enc->cur += spaces;	330	enc->cur += spaces;
276	}	331	}
277	}	332	}
278		333
279	inline void	334	INLINE void
280	encode_space (enc_t *enc)	335	encode_space (enc_t *enc)
281	{	336	{
282	need (enc, 1);	337	need (enc, 1);
283	encode_ch (enc, ' ');	338	encode_ch (enc, ' ');
284	}	339	}
285		340
286	inline void	341	INLINE void
287	encode_nl (enc_t *enc)	342	encode_nl (enc_t *enc)
288	{	343	{
289	if (enc->json.flags & F_INDENT)	344	if (enc->json.flags & F_INDENT)
290	{	345	{
291	need (enc, 1);	346	need (enc, 1);
292	encode_ch (enc, '\n');	347	encode_ch (enc, '\n');
293	}	348	}
294	}	349	}
295		350
296	inline void	351	INLINE void
297	encode_comma (enc_t *enc)	352	encode_comma (enc_t *enc)
298	{	353	{
299	encode_ch (enc, ',');	354	encode_ch (enc, ',');
300		355
301	if (enc->json.flags & F_INDENT)	356	if (enc->json.flags & F_INDENT)
…		…
312	int i, len = av_len (av);	367	int i, len = av_len (av);
313		368
314	if (enc->indent >= enc->maxdepth)	369	if (enc->indent >= enc->maxdepth)
315	croak ("data structure too deep (hit recursion limit)");	370	croak ("data structure too deep (hit recursion limit)");
316		371
317	encode_ch (enc, '['); encode_nl (enc);	372	encode_ch (enc, '[');
318	++enc->indent;	373
		374	if (len >= 0)
		375	{
		376	encode_nl (enc); ++enc->indent;
319		377
320	for (i = 0; i <= len; ++i)	378	for (i = 0; i <= len; ++i)
321	{	379	{
322	SV **svp = av_fetch (av, i, 0);	380	SV **svp = av_fetch (av, i, 0);
323		381
324	encode_indent (enc);	382	encode_indent (enc);
325		383
326	if (svp)	384	if (svp)
327	encode_sv (enc, *svp);	385	encode_sv (enc, *svp);
328	else	386	else
329	encode_str (enc, "null", 4, 0);	387	encode_str (enc, "null", 4, 0);
330		388
331	if (i < len)	389	if (i < len)
332	encode_comma (enc);	390	encode_comma (enc);
333	}	391	}
334		392
		393	encode_nl (enc); --enc->indent; encode_indent (enc);
		394	}
		395
335	encode_nl (enc);	396	encode_ch (enc, ']');
336
337	--enc->indent;
338	encode_indent (enc); encode_ch (enc, ']');
339	}	397	}
340		398
341	static void	399	static void
342	encode_hk (enc_t enc, HE he)	400	encode_hk (enc_t enc, HE he)
343	{	401	{
…		…
391		449
392	static void	450	static void
393	encode_hv (enc_t enc, HV hv)	451	encode_hv (enc_t enc, HV hv)
394	{	452	{
395	HE *he;	453	HE *he;
396	int count;
397		454
398	if (enc->indent >= enc->maxdepth)	455	if (enc->indent >= enc->maxdepth)
399	croak ("data structure too deep (hit recursion limit)");	456	croak ("data structure too deep (hit recursion limit)");
400		457
401	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;	458	encode_ch (enc, '{');
402		459
403	// for canonical output we have to sort by keys first	460	// for canonical output we have to sort by keys first
404	// actually, this is mostly due to the stupid so-called	461	// actually, this is mostly due to the stupid so-called
405	// security workaround added somewhere in 5.8.x.	462	// security workaround added somewhere in 5.8.x.
406	// that randomises hash orderings	463	// that randomises hash orderings
…		…
459		516
460	FREETMPS;	517	FREETMPS;
461	LEAVE;	518	LEAVE;
462	}	519	}
463		520
		521	encode_nl (enc); ++enc->indent;
		522
464	while (count--)	523	while (count--)
465	{	524	{
466	encode_indent (enc);	525	encode_indent (enc);
467	he = hes [count];	526	he = hes [count];
468	encode_hk (enc, he);	527	encode_hk (enc, he);
469	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));	528	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
470		529
471	if (count)	530	if (count)
472	encode_comma (enc);	531	encode_comma (enc);
473	}	532	}
		533
		534	encode_nl (enc); --enc->indent; encode_indent (enc);
474	}	535	}
475	}	536	}
476	else	537	else
477	{	538	{
478	if (hv_iterinit (hv) \|\| SvMAGICAL (hv))	539	if (hv_iterinit (hv) \|\| SvMAGICAL (hv))
479	if ((he = hv_iternext (hv)))	540	if ((he = hv_iternext (hv)))
		541	{
		542	encode_nl (enc); ++enc->indent;
		543
480	for (;;)	544	for (;;)
481	{	545	{
482	encode_indent (enc);	546	encode_indent (enc);
483	encode_hk (enc, he);	547	encode_hk (enc, he);
484	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));	548	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
485		549
486	if (!(he = hv_iternext (hv)))	550	if (!(he = hv_iternext (hv)))
487	break;	551	break;
488		552
489	encode_comma (enc);	553	encode_comma (enc);
490	}	554	}
491	}
492		555
		556	encode_nl (enc); --enc->indent; encode_indent (enc);
		557	}
		558	}
		559
493	encode_nl (enc);	560	encode_ch (enc, '}');
494
495	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
496	}	561	}
497		562
498	// encode objects, arrays and special \0=false and \1=true values.	563	// encode objects, arrays and special \0=false and \1=true values.
499	static void	564	static void
500	encode_rv (enc_t enc, SV sv)	565	encode_rv (enc_t enc, SV sv)
…		…
608	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	673	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
609	enc->cur += strlen (enc->cur);	674	enc->cur += strlen (enc->cur);
610	}	675	}
611	else if (SvIOKp (sv))	676	else if (SvIOKp (sv))
612	{	677	{
613	// we assume we can always read an IV as a UV	678	// we assume we can always read an IV as a UV and vice versa
614	if (SvUV (sv) & ~(UV)0x7fff)	679	// we assume two's complement
615	{	680	// we assume no aliasing issues in the union
616	// large integer, use the (rather slow) snprintf way.	681	if (SvIsUV (sv) ? SvUVX (sv) <= 59000
617	need (enc, sizeof (UV) * 3);	682	: SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
618	enc->cur +=
619	SvIsUV(sv)
620	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
621	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
622	}
623	else
624	{	683	{
625	// optimise the "small number case"	684	// optimise the "small number case"
626	// code will likely be branchless and use only a single multiplication	685	// code will likely be branchless and use only a single multiplication
		686	// works for numbers up to 59074
627	I32 i = SvIV (sv);	687	I32 i = SvIVX (sv);
628	U32 u;	688	U32 u;
629	char digit, nz = 0;	689	char digit, nz = 0;
630		690
631	need (enc, 6);	691	need (enc, 6);
632		692
…		…
638		698
639	// now output digit by digit, each time masking out the integer part	699	// now output digit by digit, each time masking out the integer part
640	// and multiplying by 5 while moving the decimal point one to the right,	700	// and multiplying by 5 while moving the decimal point one to the right,
641	// resulting in a net multiplication by 10.	701	// resulting in a net multiplication by 10.
642	// we always write the digit to memory but conditionally increment	702	// we always write the digit to memory but conditionally increment
643	// the pointer, to ease the usage of conditional move instructions.	703	// the pointer, to enable the use of conditional move instructions.
644	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;	704	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffffUL) 5;
645	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;	705	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffffUL) 5;
646	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;	706	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffffUL) 5;
647	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;	707	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffffUL) 5;
648	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'	708	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
		709	}
		710	else
		711	{
		712	// large integer, use the (rather slow) snprintf way.
		713	need (enc, IVUV_MAXCHARS);
		714	enc->cur +=
		715	SvIsUV(sv)
		716	? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
		717	: snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
649	}	718	}
650	}	719	}
651	else if (SvROK (sv))	720	else if (SvROK (sv))
652	encode_rv (enc, SvRV (sv));	721	encode_rv (enc, SvRV (sv));
653	else if (!SvOK (sv))	722	else if (!SvOK (sv))
…		…
669	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	738	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
670	enc.cur = SvPVX (enc.sv);	739	enc.cur = SvPVX (enc.sv);
671	enc.end = SvEND (enc.sv);	740	enc.end = SvEND (enc.sv);
672	enc.indent = 0;	741	enc.indent = 0;
673	enc.maxdepth = DEC_DEPTH (enc.json.flags);	742	enc.maxdepth = DEC_DEPTH (enc.json.flags);
		743	enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
		744	: enc.json.flags & F_LATIN1 ? 0x000100UL
		745	: 0x110000UL;
674		746
675	SvPOK_only (enc.sv);	747	SvPOK_only (enc.sv);
676	encode_sv (&enc, scalar);	748	encode_sv (&enc, scalar);
677		749
678	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));	750	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
…		…
699	JSON json;	771	JSON json;
700	U32 depth; // recursion depth	772	U32 depth; // recursion depth
701	U32 maxdepth; // recursion depth limit	773	U32 maxdepth; // recursion depth limit
702	} dec_t;	774	} dec_t;
703		775
704	inline void	776	INLINE void
		777	decode_comment (dec_t *dec)
		778	{
		779	// only '#'-style comments allowed a.t.m.
		780
		781	while (dec->cur && dec->cur != 0x0a && *dec->cur != 0x0d)
		782	++dec->cur;
		783	}
		784
		785	INLINE void
705	decode_ws (dec_t *dec)	786	decode_ws (dec_t *dec)
706	{	787	{
707	for (;;)	788	for (;;)
708	{	789	{
709	char ch = *dec->cur;	790	char ch = *dec->cur;
710		791
711	if (ch > 0x20	792	if (ch > 0x20)
		793	{
		794	if (expect_false (ch == '#'))
		795	{
		796	if (dec->json.flags & F_RELAXED)
		797	decode_comment (dec);
		798	else
		799	break;
		800	}
		801	else
		802	break;
		803	}
712	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))	804	else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)
713	break;	805	break; // parse error, but let higher level handle it, gives better error messages
714
715	if (ch == '#' && dec->json.flags & F_RELAXED)
716	++dec->cur;
717		806
718	++dec->cur;	807	++dec->cur;
719	}	808	}
720	}	809	}
721		810
…		…
827		916
828	if (hi >= 0x80)	917	if (hi >= 0x80)
829	{	918	{
830	utf8 = 1;	919	utf8 = 1;
831		920
832	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);	921	cur = encode_utf8 (cur, hi);
833	}	922	}
834	else	923	else
835	*cur++ = hi;	924	*cur++ = hi;
836	}	925	}
837	break;	926	break;
…		…
839	default:	928	default:
840	--dec_cur;	929	--dec_cur;
841	ERR ("illegal backslash escape sequence in string");	930	ERR ("illegal backslash escape sequence in string");
842	}	931	}
843	}	932	}
844	else if (expect_true (ch >= 0x20 && ch <= 0x7f))	933	else if (expect_true (ch >= 0x20 && ch < 0x80))
845	*cur++ = ch;	934	*cur++ = ch;
846	else if (ch >= 0x80)	935	else if (ch >= 0x80)
847	{	936	{
848	STRLEN clen;	937	STRLEN clen;
849	UV uch;	938	UV uch;
…		…
972		1061
973	if (!is_nv)	1062	if (!is_nv)
974	{	1063	{
975	int len = dec->cur - start;	1064	int len = dec->cur - start;
976		1065
977	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so	1066	// special case the rather common 1..5-digit-int case
978	if (*start == '-')	1067	if (*start == '-')
979	switch (len)	1068	switch (len)
980	{	1069	{
981	case 2: return newSViv (-( start [1] - '0' * 1));	1070	case 2: return newSViv (-( start [1] - '0' * 1));
982	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));	1071	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
983	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));	1072	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
984	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));	1073	case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
		1074	case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111));
985	}	1075	}
986	else	1076	else
987	switch (len)	1077	switch (len)
988	{	1078	{
989	case 1: return newSViv ( start [0] - '0' * 1);	1079	case 1: return newSViv ( start [0] - '0' * 1);
990	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);	1080	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
991	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);	1081	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
992	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);	1082	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		1083	case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111);
993	}	1084	}
994		1085
995	{	1086	{
996	UV uv;	1087	UV uv;
997	int numtype = grok_number (start, len, &uv);	1088	int numtype = grok_number (start, len, &uv);
…		…
1105	char *p = dec->cur;	1196	char *p = dec->cur;
1106	char *e = p + 24; // only try up to 24 bytes	1197	char *e = p + 24; // only try up to 24 bytes
1107		1198
1108	for (;;)	1199	for (;;)
1109	{	1200	{
1110	// the >= 0x80 is true on most architectures	1201	// the >= 0x80 is false on most architectures
1111	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')	1202	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')
1112	{	1203	{
1113	// slow path, back up and use decode_str	1204	// slow path, back up and use decode_str
1114	SV *key = decode_str (dec);	1205	SV *key = decode_str (dec);
1115	if (!key)	1206	if (!key)
…		…
1246		1337
1247	static SV *	1338	static SV *
1248	decode_sv (dec_t *dec)	1339	decode_sv (dec_t *dec)
1249	{	1340	{
1250	// the beauty of JSON: you need exactly one character lookahead	1341	// the beauty of JSON: you need exactly one character lookahead
1251	// to parse anything.	1342	// to parse everything.
1252	switch (*dec->cur)	1343	switch (*dec->cur)
1253	{	1344	{
1254	case '"': ++dec->cur; return decode_str (dec);	1345	case '"': ++dec->cur; return decode_str (dec);
1255	case '[': ++dec->cur; return decode_av (dec);	1346	case '[': ++dec->cur; return decode_av (dec);
1256	case '{': ++dec->cur; return decode_hv (dec);	1347	case '{': ++dec->cur; return decode_hv (dec);
1257		1348
1258	case '-':	1349	case '-':
1259	case '0': case '1': case '2': case '3': case '4':	1350	case '0': case '1': case '2': case '3': case '4':
1260	case '5': case '6': case '7': case '8': case '9':	1351	case '5': case '6': case '7': case '8': case '9':
1261	return decode_num (dec);	1352	return decode_num (dec);
…		…
1263	case 't':	1354	case 't':
1264	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))	1355	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
1265	{	1356	{
1266	dec->cur += 4;	1357	dec->cur += 4;
1267	#if JSON_SLOW	1358	#if JSON_SLOW
1268	json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true);	1359	json_true = get_bool ("JSON::XS::true");
1269	#endif	1360	#endif
1270	return SvREFCNT_inc (json_true);	1361	return newSVsv (json_true);
1271	}	1362	}
1272	else	1363	else
1273	ERR ("'true' expected");	1364	ERR ("'true' expected");
1274		1365
1275	break;	1366	break;
…		…
1277	case 'f':	1368	case 'f':
1278	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))	1369	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
1279	{	1370	{
1280	dec->cur += 5;	1371	dec->cur += 5;
1281	#if JSON_SLOW	1372	#if JSON_SLOW
1282	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);	1373	json_false = get_bool ("JSON::XS::false");
1283	#endif	1374	#endif
1284	return SvREFCNT_inc (json_false);	1375	return newSVsv (json_false);
1285	}	1376	}
1286	else	1377	else
1287	ERR ("'false' expected");	1378	ERR ("'false' expected");
1288		1379
1289	break;	1380	break;
…		…
1307	fail:	1398	fail:
1308	return 0;	1399	return 0;
1309	}	1400	}
1310		1401
1311	static SV *	1402	static SV *
1312	decode_json (SV string, JSON json, UV *offset_return)	1403	decode_json (SV string, JSON json, STRLEN *offset_return)
1313	{	1404	{
1314	dec_t dec;	1405	dec_t dec;
1315	UV offset;	1406	STRLEN offset;
1316	SV *sv;	1407	SV *sv;
1317		1408
1318	SvGETMAGIC (string);	1409	SvGETMAGIC (string);
1319	SvUPGRADE (string, SVt_PV);	1410	SvUPGRADE (string, SVt_PV);
1320		1411
…		…
1393		1484
1394	return sv;	1485	return sv;
1395	}	1486	}
1396		1487
1397	/////////////////////////////////////////////////////////////////////////////	1488	/////////////////////////////////////////////////////////////////////////////
		1489	// incremental parser
		1490
		1491	static void
		1492	incr_parse (JSON *self)
		1493	{
		1494	const char *p = SvPVX (self->incr_text) + self->incr_pos;
		1495
		1496	for (;;)
		1497	{
		1498	//printf ("loop pod %d p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), p, p, self->incr_mode, self->incr_nest);//D
		1499	switch (self->incr_mode)
		1500	{
		1501	// only used for intiial whitespace skipping
		1502	case INCR_M_WS:
		1503	for (;;)
		1504	{
		1505	if (*p > 0x20)
		1506	{
		1507	self->incr_mode = INCR_M_JSON;
		1508	goto incr_m_json;
		1509	}
		1510	else if (!*p)
		1511	goto interrupt;
		1512
		1513	++p;
		1514	}
		1515
		1516	// skip a single char inside a string (for \\-processing)
		1517	case INCR_M_BS:
		1518	if (!*p)
		1519	goto interrupt;
		1520
		1521	++p;
		1522	self->incr_mode = INCR_M_STR;
		1523	goto incr_m_str;
		1524
		1525	// inside a string
		1526	case INCR_M_STR:
		1527	incr_m_str:
		1528	for (;;)
		1529	{
		1530	if (*p == '"')
		1531	{
		1532	++p;
		1533	self->incr_mode = INCR_M_JSON;
		1534
		1535	if (!self->incr_nest)
		1536	goto interrupt;
		1537
		1538	goto incr_m_json;
		1539	}
		1540	else if (*p == '\\')
		1541	{
		1542	++p; // "virtually" consumes character after \
		1543
		1544	if (!*p) // if at end of string we have to switch modes
		1545	{
		1546	self->incr_mode = INCR_M_BS;
		1547	goto interrupt;
		1548	}
		1549	}
		1550	else if (!*p)
		1551	goto interrupt;
		1552
		1553	++p;
		1554	}
		1555
		1556	// after initial ws, outside string
		1557	case INCR_M_JSON:
		1558	incr_m_json:
		1559	for (;;)
		1560	{
		1561	switch (*p++)
		1562	{
		1563	case 0:
		1564	--p;
		1565	goto interrupt;
		1566
		1567	case 0x09:
		1568	case 0x0a:
		1569	case 0x0d:
		1570	case 0x20:
		1571	if (!self->incr_nest)
		1572	{
		1573	--p; // do not eat the whitespace, let the next round do it
		1574	goto interrupt;
		1575	}
		1576	break;
		1577
		1578	case '"':
		1579	self->incr_mode = INCR_M_STR;
		1580	goto incr_m_str;
		1581
		1582	case '[':
		1583	case '{':
		1584	++self->incr_nest;
		1585	break;
		1586
		1587	case ']':
		1588	case '}':
		1589	if (!--self->incr_nest)
		1590	goto interrupt;
		1591	}
		1592	}
		1593	}
		1594
		1595	modechange:
		1596	;
		1597	}
		1598
		1599	interrupt:
		1600	self->incr_pos = p - SvPVX (self->incr_text);
		1601	//printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
		1602	}
		1603
		1604	/////////////////////////////////////////////////////////////////////////////
1398	// XS interface functions	1605	// XS interface functions
1399		1606
1400	MODULE = JSON::XS PACKAGE = JSON::XS	1607	MODULE = JSON::XS PACKAGE = JSON::XS
1401		1608
1402	BOOT:	1609	BOOT:
…		…
1411	: -1;	1618	: -1;
1412		1619
1413	json_stash = gv_stashpv ("JSON::XS" , 1);	1620	json_stash = gv_stashpv ("JSON::XS" , 1);
1414	json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1);	1621	json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1);
1415		1622
1416	json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true );	1623	json_true = get_bool ("JSON::XS::true");
1417	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);	1624	json_false = get_bool ("JSON::XS::false");
1418	}	1625	}
1419		1626
1420	PROTOTYPES: DISABLE	1627	PROTOTYPES: DISABLE
1421		1628
1422	void CLONE (...)	1629	void CLONE (...)
…		…
1429	{	1636	{
1430	SV *pv = NEWSV (0, sizeof (JSON));	1637	SV *pv = NEWSV (0, sizeof (JSON));
1431	SvPOK_only (pv);	1638	SvPOK_only (pv);
1432	Zero (SvPVX (pv), 1, JSON);	1639	Zero (SvPVX (pv), 1, JSON);
1433	((JSON *)SvPVX (pv))->flags = F_DEFAULT;	1640	((JSON *)SvPVX (pv))->flags = F_DEFAULT;
1434	XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), JSON_STASH)));	1641	XPUSHs (sv_2mortal (sv_bless (
		1642	newRV_noinc (pv),
		1643	strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
		1644	)));
1435	}	1645	}
1436		1646
1437	void ascii (JSON *self, int enable = 1)	1647	void ascii (JSON *self, int enable = 1)
1438	ALIAS:	1648	ALIAS:
1439	ascii = F_ASCII	1649	ascii = F_ASCII
…		…
1457	self->flags &= ~ix;	1667	self->flags &= ~ix;
1458		1668
1459	XPUSHs (ST (0));	1669	XPUSHs (ST (0));
1460	}	1670	}
1461		1671
		1672	void get_ascii (JSON *self)
		1673	ALIAS:
		1674	get_ascii = F_ASCII
		1675	get_latin1 = F_LATIN1
		1676	get_utf8 = F_UTF8
		1677	get_indent = F_INDENT
		1678	get_canonical = F_CANONICAL
		1679	get_space_before = F_SPACE_BEFORE
		1680	get_space_after = F_SPACE_AFTER
		1681	get_allow_nonref = F_ALLOW_NONREF
		1682	get_shrink = F_SHRINK
		1683	get_allow_blessed = F_ALLOW_BLESSED
		1684	get_convert_blessed = F_CONV_BLESSED
		1685	get_relaxed = F_RELAXED
		1686	PPCODE:
		1687	XPUSHs (boolSV (self->flags & ix));
		1688
1462	void max_depth (JSON *self, UV max_depth = 0x80000000UL)	1689	void max_depth (JSON *self, UV max_depth = 0x80000000UL)
1463	PPCODE:	1690	PPCODE:
1464	{	1691	{
1465	UV log2 = 0;	1692	UV log2 = 0;
1466		1693
…		…
1472	self->flags = self->flags & ~F_MAXDEPTH \| (log2 << S_MAXDEPTH);	1699	self->flags = self->flags & ~F_MAXDEPTH \| (log2 << S_MAXDEPTH);
1473		1700
1474	XPUSHs (ST (0));	1701	XPUSHs (ST (0));
1475	}	1702	}
1476		1703
		1704	U32 get_max_depth (JSON *self)
		1705	CODE:
		1706	RETVAL = DEC_DEPTH (self->flags);
		1707	OUTPUT:
		1708	RETVAL
		1709
1477	void max_size (JSON *self, UV max_size = 0)	1710	void max_size (JSON *self, UV max_size = 0)
1478	PPCODE:	1711	PPCODE:
1479	{	1712	{
1480	UV log2 = 0;	1713	UV log2 = 0;
1481		1714
…		…
1487		1720
1488	self->flags = self->flags & ~F_MAXSIZE \| (log2 << S_MAXSIZE);	1721	self->flags = self->flags & ~F_MAXSIZE \| (log2 << S_MAXSIZE);
1489		1722
1490	XPUSHs (ST (0));	1723	XPUSHs (ST (0));
1491	}	1724	}
		1725
		1726	int get_max_size (JSON *self)
		1727	CODE:
		1728	RETVAL = DEC_SIZE (self->flags);
		1729	OUTPUT:
		1730	RETVAL
1492		1731
1493	void filter_json_object (JSON self, SV cb = &PL_sv_undef)	1732	void filter_json_object (JSON self, SV cb = &PL_sv_undef)
1494	PPCODE:	1733	PPCODE:
1495	{	1734	{
1496	SvREFCNT_dec (self->cb_object);	1735	SvREFCNT_dec (self->cb_object);
…		…
1530	XPUSHs (decode_json (jsonstr, self, 0));	1769	XPUSHs (decode_json (jsonstr, self, 0));
1531		1770
1532	void decode_prefix (JSON self, SV jsonstr)	1771	void decode_prefix (JSON self, SV jsonstr)
1533	PPCODE:	1772	PPCODE:
1534	{	1773	{
1535	UV offset;	1774	STRLEN offset;
1536	EXTEND (SP, 2);	1775	EXTEND (SP, 2);
1537	PUSHs (decode_json (jsonstr, self, &offset));	1776	PUSHs (decode_json (jsonstr, self, &offset));
1538	PUSHs (sv_2mortal (newSVuv (offset)));	1777	PUSHs (sv_2mortal (newSVuv (offset)));
		1778	}
		1779
		1780	void incr_parse (JSON self, SV jsonstr = 0)
		1781	PPCODE:
		1782	{
		1783	if (!self->incr_text)
		1784	self->incr_text = newSVpvn ("", 0);
		1785
		1786	// append data, if any
		1787	if (jsonstr)
		1788	{
		1789	if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text))
		1790	{
		1791	/* utf-8-ness differs, need to upgrade */
		1792	sv_utf8_upgrade (self->incr_text);
		1793
		1794	if (self->incr_pos)
		1795	self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
		1796	- (U8 *)SvPVX (self->incr_text);
		1797	}
		1798
		1799	{
		1800	STRLEN len;
		1801	const char *str = SvPV (jsonstr, len);
		1802	SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1);
		1803	Move (str, SvEND (self->incr_text), len, char);
		1804	SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
		1805	*SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
		1806	}
		1807	}
		1808
		1809	if (GIMME_V != G_VOID)
		1810	do
		1811	{
		1812	STRLEN offset;
		1813
		1814	if (!INCR_DONE (self))
		1815	{
		1816	incr_parse (self);
		1817	if (!INCR_DONE (self))
		1818	break;
		1819	}
		1820
		1821	XPUSHs (decode_json (self->incr_text, self, &offset));
		1822
		1823	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset);
		1824	self->incr_pos -= offset;
		1825	self->incr_nest = 0;
		1826	self->incr_mode = 0;
		1827	}
		1828	while (GIMME_V == G_ARRAY);
		1829	}
		1830
		1831	SV incr_text (JSON self)
		1832	ATTRS: lvalue
		1833	CODE:
		1834	{
		1835	if (self->incr_pos)
		1836	croak ("incr_text can not be called when the incremental parser already started parsing");
		1837
		1838	RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
		1839	}
		1840	OUTPUT:
		1841	RETVAL
		1842
		1843	void incr_skip (JSON *self)
		1844	CODE:
		1845	{
		1846	if (self->incr_pos)
		1847	{
		1848	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos);
		1849	self->incr_pos = 0;
		1850	self->incr_nest = 0;
		1851	self->incr_mode = 0;
		1852	}
1539	}	1853	}
1540		1854
1541	void DESTROY (JSON *self)	1855	void DESTROY (JSON *self)
1542	CODE:	1856	CODE:
1543	SvREFCNT_dec (self->cb_sk_object);	1857	SvREFCNT_dec (self->cb_sk_object);
1544	SvREFCNT_dec (self->cb_object);	1858	SvREFCNT_dec (self->cb_object);
		1859	SvREFCNT_dec (self->incr_text);
1545		1860
1546	PROTOTYPES: ENABLE	1861	PROTOTYPES: ENABLE
1547		1862
1548	void to_json (SV *scalar)	1863	void encode_json (SV *scalar)
		1864	ALIAS:
		1865	to_json_ = 0
		1866	encode_json = F_UTF8
1549	PPCODE:	1867	PPCODE:
1550	{	1868	{
1551	JSON json = { F_DEFAULT \| F_UTF8 };	1869	JSON json = { F_DEFAULT \| ix };
1552	XPUSHs (encode_json (scalar, &json));	1870	XPUSHs (encode_json (scalar, &json));
1553	}	1871	}
1554		1872
1555	void from_json (SV *jsonstr)	1873	void decode_json (SV *jsonstr)
		1874	ALIAS:
		1875	from_json_ = 0
		1876	decode_json = F_UTF8
1556	PPCODE:	1877	PPCODE:
1557	{	1878	{
1558	JSON json = { F_DEFAULT \| F_UTF8 };	1879	JSON json = { F_DEFAULT \| ix };
1559	XPUSHs (decode_json (jsonstr, &json, 0));	1880	XPUSHs (decode_json (jsonstr, &json, 0));
1560	}	1881	}
1561		1882
		1883

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.63 by root, Mon Aug 27 01:49:01 2007 UTC vs. Revision 1.83 by root, Wed Mar 26 22:54:38 2008 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.63 by root, Mon Aug 27 01:49:01 2007 UTC vs.
Revision 1.83 by root, Wed Mar 26 22:54:38 2008 UTC