[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.62 by root, Sun Aug 26 22:27:32 2007 UTC vs.
Revision 1.105 by root, Tue Jan 19 01:07:27 2010 UTC

…		…
4		4
5	#include <assert.h>	5	#include <assert.h>
6	#include <string.h>	6	#include <string.h>
7	#include <stdlib.h>	7	#include <stdlib.h>
8	#include <stdio.h>	8	#include <stdio.h>
		9	#include <limits.h>
9	#include <float.h>	10	#include <float.h>
10		11
11	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)	12	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
12	# define snprintf _snprintf // C compilers have this in stdio.h	13	# define snprintf _snprintf // C compilers have this in stdio.h
13	#endif	14	#endif
14		15
15	// some old perls do not have this, try to make it work, no	16	// some old perls do not have this, try to make it work, no
16	// guarentees, though. if it breaks, you get to keep the pieces.	17	// guarantees, though. if it breaks, you get to keep the pieces.
17	#ifndef UTF8_MAXBYTES	18	#ifndef UTF8_MAXBYTES
18	# define UTF8_MAXBYTES 13	19	# define UTF8_MAXBYTES 13
19	#endif	20	#endif
		21
		22	// three extra for rounding, sign, and end of string
		23	#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3)
20		24
21	#define F_ASCII 0x00000001UL	25	#define F_ASCII 0x00000001UL
22	#define F_LATIN1 0x00000002UL	26	#define F_LATIN1 0x00000002UL
23	#define F_UTF8 0x00000004UL	27	#define F_UTF8 0x00000004UL
24	#define F_INDENT 0x00000008UL	28	#define F_INDENT 0x00000008UL
…		…
27	#define F_SPACE_AFTER 0x00000040UL	31	#define F_SPACE_AFTER 0x00000040UL
28	#define F_ALLOW_NONREF 0x00000100UL	32	#define F_ALLOW_NONREF 0x00000100UL
29	#define F_SHRINK 0x00000200UL	33	#define F_SHRINK 0x00000200UL
30	#define F_ALLOW_BLESSED 0x00000400UL	34	#define F_ALLOW_BLESSED 0x00000400UL
31	#define F_CONV_BLESSED 0x00000800UL	35	#define F_CONV_BLESSED 0x00000800UL
32	#define F_MAXDEPTH 0xf8000000UL
33	#define S_MAXDEPTH 27
34	#define F_MAXSIZE 0x01f00000UL	36	#define F_RELAXED 0x00001000UL
35	#define S_MAXSIZE 20	37	#define F_ALLOW_UNKNOWN 0x00002000UL
36	#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing	38	#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing
37		39
38	#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH))
39	#define DEC_SIZE(flags) (1UL << ((flags & F_MAXSIZE ) >> S_MAXSIZE ))
40
41	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	40	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
42	#define F_DEFAULT (9UL << S_MAXDEPTH)
43		41
44	#define INIT_SIZE 32 // initial scalar size to be allocated	42	#define INIT_SIZE 32 // initial scalar size to be allocated
45	#define INDENT_STEP 3 // spaces per indentation level	43	#define INDENT_STEP 3 // spaces per indentation level
46		44
47	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size	45	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
48		46
49	#define SB do {	47	#define SB do {
50	#define SE } while (0)	48	#define SE } while (0)
51		49
52	#if __GNUC__ >= 3	50	#if __GNUC__ >= 3
53	# define expect(expr,value) __builtin_expect ((expr),(value))	51	# define expect(expr,value) __builtin_expect ((expr), (value))
54	# define inline inline	52	# define INLINE static inline
55	#else	53	#else
56	# define expect(expr,value) (expr)	54	# define expect(expr,value) (expr)
57	# define inline static	55	# define INLINE static
58	#endif	56	#endif
59		57
60	#define expect_false(expr) expect ((expr) != 0, 0)	58	#define expect_false(expr) expect ((expr) != 0, 0)
61	#define expect_true(expr) expect ((expr) != 0, 1)	59	#define expect_true(expr) expect ((expr) != 0, 1)
		60
		61	#define IN_RANGE_INC(type,val,beg,end) \
		62	((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
		63	<= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
		64
		65	#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)"
62		66
63	#ifdef USE_ITHREADS	67	#ifdef USE_ITHREADS
64	# define JSON_SLOW 1	68	# define JSON_SLOW 1
65	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))	69	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
66	#else	70	#else
…		…
69	#endif	73	#endif
70		74
71	static HV json_stash, json_boolean_stash; // JSON::XS::	75	static HV json_stash, json_boolean_stash; // JSON::XS::
72	static SV json_true, json_false;	76	static SV json_true, json_false;
73		77
		78	enum {
		79	INCR_M_WS = 0, // initial whitespace skipping, must be 0
		80	INCR_M_STR, // inside string
		81	INCR_M_BS, // inside backslash
		82	INCR_M_C0, // inside comment in initial whitespace sequence
		83	INCR_M_C1, // inside comment in other places
		84	INCR_M_JSON // outside anything, count nesting
		85	};
		86
		87	#define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON)
		88
74	typedef struct {	89	typedef struct {
75	U32 flags;	90	U32 flags;
		91	U32 max_depth;
		92	STRLEN max_size;
		93
76	SV *cb_object;	94	SV *cb_object;
77	HV *cb_sk_object;	95	HV *cb_sk_object;
		96
		97	// for the incremental parser
		98	SV *incr_text; // the source text so far
		99	STRLEN incr_pos; // the current offset into the text
		100	int incr_nest; // {[]}-nesting level
		101	unsigned char incr_mode;
78	} JSON;	102	} JSON;
		103
		104	INLINE void
		105	json_init (JSON *json)
		106	{
		107	Zero (json, 1, JSON);
		108	json->max_depth = 512;
		109	}
79		110
80	/////////////////////////////////////////////////////////////////////////////	111	/////////////////////////////////////////////////////////////////////////////
81	// utility functions	112	// utility functions
82		113
83	inline void	114	INLINE SV *
		115	get_bool (const char *name)
		116	{
		117	SV *sv = get_sv (name, 1);
		118
		119	SvREADONLY_on (sv);
		120	SvREADONLY_on (SvRV (sv));
		121
		122	return sv;
		123	}
		124
		125	INLINE void
84	shrink (SV *sv)	126	shrink (SV *sv)
85	{	127	{
86	sv_utf8_downgrade (sv, 1);	128	sv_utf8_downgrade (sv, 1);
		129
87	if (SvLEN (sv) > SvCUR (sv) + 1)	130	if (SvLEN (sv) > SvCUR (sv) + 1)
88	{	131	{
89	#ifdef SvPV_shrink_to_cur	132	#ifdef SvPV_shrink_to_cur
90	SvPV_shrink_to_cur (sv);	133	SvPV_shrink_to_cur (sv);
91	#elif defined (SvPV_renew)	134	#elif defined (SvPV_renew)
…		…
97	// decode an utf-8 character and return it, or (UV)-1 in	140	// decode an utf-8 character and return it, or (UV)-1 in
98	// case of an error.	141	// case of an error.
99	// we special-case "safe" characters from U+80 .. U+7FF,	142	// we special-case "safe" characters from U+80 .. U+7FF,
100	// but use the very good perl function to parse anything else.	143	// but use the very good perl function to parse anything else.
101	// note that we never call this function for a ascii codepoints	144	// note that we never call this function for a ascii codepoints
102	inline UV	145	INLINE UV
103	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)	146	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
104	{	147	{
105	if (expect_false (s[0] > 0xdf \|\| s[0] < 0xc2))	148	if (expect_true (len >= 2
106	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);	149	&& IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
107	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)	150	&& IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
108	{	151	{
109	*clen = 2;	152	*clen = 2;
110	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);	153	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
111	}	154	}
112	else	155	else
113	{	156	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
114	*clen = (STRLEN)-1;	157	}
115	return (UV)-1;	158
		159	// likewise for encoding, also never called for ascii codepoints
		160	// this function takes advantage of this fact, although current gccs
		161	// seem to optimise the check for >= 0x80 away anyways
		162	INLINE unsigned char *
		163	encode_utf8 (unsigned char *s, UV ch)
		164	{
		165	if (expect_false (ch < 0x000080))
		166	*s++ = ch;
		167	else if (expect_true (ch < 0x000800))
		168	*s++ = 0xc0 \| ( ch >> 6),
		169	*s++ = 0x80 \| ( ch & 0x3f);
		170	else if ( ch < 0x010000)
		171	*s++ = 0xe0 \| ( ch >> 12),
		172	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		173	*s++ = 0x80 \| ( ch & 0x3f);
		174	else if ( ch < 0x110000)
		175	*s++ = 0xf0 \| ( ch >> 18),
		176	*s++ = 0x80 \| ((ch >> 12) & 0x3f),
		177	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		178	*s++ = 0x80 \| ( ch & 0x3f);
		179
		180	return s;
		181	}
		182
		183	// convert offset pointer to character index, sv must be string
		184	static STRLEN
		185	ptr_to_index (SV sv, char offset)
		186	{
		187	return SvUTF8 (sv)
		188	? utf8_distance (offset, SvPVX (sv))
		189	: offset - SvPVX (sv);
		190	}
		191
		192	/////////////////////////////////////////////////////////////////////////////
		193	// fp hell
		194
		195	// scan a group of digits, and a trailing exponent
		196	static void
		197	json_atof_scan1 (const char s, NV accum, int *expo, int postdp)
		198	{
		199	UV uaccum = 0;
		200	int eaccum = 0;
		201
		202	for (;;)
116	}	203	{
117	}	204	U8 dig = (U8)*s - '0';
118		205
		206	if (expect_false (dig >= 10))
		207	{
		208	if (dig == (U8)((U8)'.' - (U8)'0'))
		209	{
		210	++s;
		211	json_atof_scan1 (s, accum, expo, 1);
		212	}
		213	else if ((dig \| ' ') == 'e' - '0')
		214	{
		215	int exp2 = 0;
		216	int neg = 0;
		217
		218	++s;
		219
		220	if (*s == '-')
		221	{
		222	++s;
		223	neg = 1;
		224	}
		225	else if (*s == '+')
		226	++s;
		227
		228	while ((dig = (U8)*s - '0') < 10)
		229	exp2 = exp2 * 10 + *s++ - '0';
		230
		231	*expo += neg ? -exp2 : exp2;
		232	}
		233
		234	break;
		235	}
		236
		237	++s;
		238
		239	uaccum = uaccum * 10 + dig;
		240	++eaccum;
		241
		242	// if we have too many digits, then recurse for more
		243	// we actually do this for rather few digits
		244	if (uaccum >= (UV_MAX - 9) / 10)
		245	{
		246	if (postdp) *expo -= eaccum;
		247	json_atof_scan1 (s, accum, expo, postdp);
		248	if (postdp) *expo += eaccum;
		249
		250	break;
		251	}
		252	}
		253
		254	// this relies greatly on the quality of the pow ()
		255	// implementation of the platform, but a good
		256	// implementation is hard to beat.
		257	if (postdp) *expo -= eaccum;
		258	accum += uaccum Perl_pow (10., *expo);
		259	*expo += eaccum;
		260	}
		261
		262	static NV
		263	json_atof (const char *s)
		264	{
		265	NV accum = 0.;
		266	int expo = 0;
		267	int neg = 0;
		268
		269	if (*s == '-')
		270	{
		271	++s;
		272	neg = 1;
		273	}
		274
		275	json_atof_scan1 (s, &accum, &expo, 0);
		276
		277	return neg ? -accum : accum;
		278	}
119	/////////////////////////////////////////////////////////////////////////////	279	/////////////////////////////////////////////////////////////////////////////
120	// encoder	280	// encoder
121		281
122	// structure used for encoding JSON	282	// structure used for encoding JSON
123	typedef struct	283	typedef struct
…		…
125	char *cur; // SvPVX (sv) + current output position	285	char *cur; // SvPVX (sv) + current output position
126	char *end; // SvEND (sv)	286	char *end; // SvEND (sv)
127	SV *sv; // result scalar	287	SV *sv; // result scalar
128	JSON json;	288	JSON json;
129	U32 indent; // indentation level	289	U32 indent; // indentation level
130	U32 maxdepth; // max. indentation/recursion level	290	UV limit; // escape character values >= this value when encoding
131	} enc_t;	291	} enc_t;
132		292
133	inline void	293	INLINE void
134	need (enc_t *enc, STRLEN len)	294	need (enc_t *enc, STRLEN len)
135	{	295	{
136	if (expect_false (enc->cur + len >= enc->end))	296	if (expect_false (enc->cur + len >= enc->end))
137	{	297	{
138	STRLEN cur = enc->cur - SvPVX (enc->sv);	298	STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
139	SvGROW (enc->sv, cur + len + 1);	299	SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
140	enc->cur = SvPVX (enc->sv) + cur;	300	enc->cur = SvPVX (enc->sv) + cur;
141	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;	301	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
142	}	302	}
143	}	303	}
144		304
145	inline void	305	INLINE void
146	encode_ch (enc_t *enc, char ch)	306	encode_ch (enc_t *enc, char ch)
147	{	307	{
148	need (enc, 1);	308	need (enc, 1);
149	*enc->cur++ = ch;	309	*enc->cur++ = ch;
150	}	310	}
…		…
204	{	364	{
205	uch = ch;	365	uch = ch;
206	clen = 1;	366	clen = 1;
207	}	367	}
208		368
209	if (uch > 0x10FFFFUL)	369	if (uch < 0x80/0x20/ \|\| uch >= enc->limit)
210	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
211
212	if (uch < 0x80 \|\| enc->json.flags & F_ASCII \|\| (enc->json.flags & F_LATIN1 && uch > 0xFF))
213	{	370	{
214	if (uch > 0xFFFFUL)	371	if (uch >= 0x10000UL)
215	{	372	{
		373	if (uch >= 0x110000UL)
		374	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		375
216	need (enc, len += 11);	376	need (enc, len += 11);
217	sprintf (enc->cur, "\\u%04x\\u%04x",	377	sprintf (enc->cur, "\\u%04x\\u%04x",
218	(int)((uch - 0x10000) / 0x400 + 0xD800),	378	(int)((uch - 0x10000) / 0x400 + 0xD800),
219	(int)((uch - 0x10000) % 0x400 + 0xDC00));	379	(int)((uch - 0x10000) % 0x400 + 0xDC00));
220	enc->cur += 12;	380	enc->cur += 12;
221	}	381	}
222	else	382	else
223	{	383	{
224	static char hexdigit [16] = "0123456789abcdef";
225	need (enc, len += 5);	384	need (enc, len += 5);
226	*enc->cur++ = '\\';	385	*enc->cur++ = '\\';
227	*enc->cur++ = 'u';	386	*enc->cur++ = 'u';
228	*enc->cur++ = hexdigit [ uch >> 12 ];	387	*enc->cur++ = PL_hexdigit [ uch >> 12 ];
229	*enc->cur++ = hexdigit [(uch >> 8) & 15];	388	*enc->cur++ = PL_hexdigit [(uch >> 8) & 15];
230	*enc->cur++ = hexdigit [(uch >> 4) & 15];	389	*enc->cur++ = PL_hexdigit [(uch >> 4) & 15];
231	*enc->cur++ = hexdigit [(uch >> 0) & 15];	390	*enc->cur++ = PL_hexdigit [(uch >> 0) & 15];
232	}	391	}
233		392
234	str += clen;	393	str += clen;
235	}	394	}
236	else if (enc->json.flags & F_LATIN1)	395	else if (enc->json.flags & F_LATIN1)
…		…
248	while (--clen);	407	while (--clen);
249	}	408	}
250	else	409	else
251	{	410	{
252	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed	411	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
253	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	412	enc->cur = encode_utf8 (enc->cur, uch);
254	++str;	413	++str;
255	}	414	}
256	}	415	}
257	}	416	}
258	}	417	}
259		418
260	--len;	419	--len;
261	}	420	}
262	}	421	}
263		422
264	inline void	423	INLINE void
265	encode_indent (enc_t *enc)	424	encode_indent (enc_t *enc)
266	{	425	{
267	if (enc->json.flags & F_INDENT)	426	if (enc->json.flags & F_INDENT)
268	{	427	{
269	int spaces = enc->indent * INDENT_STEP;	428	int spaces = enc->indent * INDENT_STEP;
…		…
272	memset (enc->cur, ' ', spaces);	431	memset (enc->cur, ' ', spaces);
273	enc->cur += spaces;	432	enc->cur += spaces;
274	}	433	}
275	}	434	}
276		435
277	inline void	436	INLINE void
278	encode_space (enc_t *enc)	437	encode_space (enc_t *enc)
279	{	438	{
280	need (enc, 1);	439	need (enc, 1);
281	encode_ch (enc, ' ');	440	encode_ch (enc, ' ');
282	}	441	}
283		442
284	inline void	443	INLINE void
285	encode_nl (enc_t *enc)	444	encode_nl (enc_t *enc)
286	{	445	{
287	if (enc->json.flags & F_INDENT)	446	if (enc->json.flags & F_INDENT)
288	{	447	{
289	need (enc, 1);	448	need (enc, 1);
290	encode_ch (enc, '\n');	449	encode_ch (enc, '\n');
291	}	450	}
292	}	451	}
293		452
294	inline void	453	INLINE void
295	encode_comma (enc_t *enc)	454	encode_comma (enc_t *enc)
296	{	455	{
297	encode_ch (enc, ',');	456	encode_ch (enc, ',');
298		457
299	if (enc->json.flags & F_INDENT)	458	if (enc->json.flags & F_INDENT)
…		…
307	static void	466	static void
308	encode_av (enc_t enc, AV av)	467	encode_av (enc_t enc, AV av)
309	{	468	{
310	int i, len = av_len (av);	469	int i, len = av_len (av);
311		470
312	if (enc->indent >= enc->maxdepth)	471	if (enc->indent >= enc->json.max_depth)
313	croak ("data structure too deep (hit recursion limit)");	472	croak (ERR_NESTING_EXCEEDED);
314		473
315	encode_ch (enc, '['); encode_nl (enc);	474	encode_ch (enc, '[');
316	++enc->indent;	475
		476	if (len >= 0)
		477	{
		478	encode_nl (enc); ++enc->indent;
317		479
318	for (i = 0; i <= len; ++i)	480	for (i = 0; i <= len; ++i)
319	{	481	{
320	SV **svp = av_fetch (av, i, 0);	482	SV **svp = av_fetch (av, i, 0);
321		483
322	encode_indent (enc);	484	encode_indent (enc);
323		485
324	if (svp)	486	if (svp)
325	encode_sv (enc, *svp);	487	encode_sv (enc, *svp);
326	else	488	else
327	encode_str (enc, "null", 4, 0);	489	encode_str (enc, "null", 4, 0);
328		490
329	if (i < len)	491	if (i < len)
330	encode_comma (enc);	492	encode_comma (enc);
331	}	493	}
332		494
		495	encode_nl (enc); --enc->indent; encode_indent (enc);
		496	}
		497
333	encode_nl (enc);	498	encode_ch (enc, ']');
334
335	--enc->indent;
336	encode_indent (enc); encode_ch (enc, ']');
337	}	499	}
338		500
339	static void	501	static void
340	encode_hk (enc_t enc, HE he)	502	encode_hk (enc_t enc, HE he)
341	{	503	{
…		…
389		551
390	static void	552	static void
391	encode_hv (enc_t enc, HV hv)	553	encode_hv (enc_t enc, HV hv)
392	{	554	{
393	HE *he;	555	HE *he;
394	int count;
395		556
396	if (enc->indent >= enc->maxdepth)	557	if (enc->indent >= enc->json.max_depth)
397	croak ("data structure too deep (hit recursion limit)");	558	croak (ERR_NESTING_EXCEEDED);
398		559
399	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;	560	encode_ch (enc, '{');
400		561
401	// for canonical output we have to sort by keys first	562	// for canonical output we have to sort by keys first
402	// actually, this is mostly due to the stupid so-called	563	// actually, this is mostly due to the stupid so-called
403	// security workaround added somewhere in 5.8.x.	564	// security workaround added somewhere in 5.8.x
404	// that randomises hash orderings	565	// that randomises hash orderings
405	if (enc->json.flags & F_CANONICAL)	566	if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv))
406	{	567	{
407	int count = hv_iterinit (hv);	568	int count = hv_iterinit (hv);
408		569
409	if (SvMAGICAL (hv))	570	if (SvMAGICAL (hv))
410	{	571	{
…		…
457		618
458	FREETMPS;	619	FREETMPS;
459	LEAVE;	620	LEAVE;
460	}	621	}
461		622
		623	encode_nl (enc); ++enc->indent;
		624
462	while (count--)	625	while (count--)
463	{	626	{
464	encode_indent (enc);	627	encode_indent (enc);
465	he = hes [count];	628	he = hes [count];
466	encode_hk (enc, he);	629	encode_hk (enc, he);
467	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));	630	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
468		631
469	if (count)	632	if (count)
470	encode_comma (enc);	633	encode_comma (enc);
471	}	634	}
		635
		636	encode_nl (enc); --enc->indent; encode_indent (enc);
472	}	637	}
473	}	638	}
474	else	639	else
475	{	640	{
476	if (hv_iterinit (hv) \|\| SvMAGICAL (hv))	641	if (hv_iterinit (hv) \|\| SvMAGICAL (hv))
477	if ((he = hv_iternext (hv)))	642	if ((he = hv_iternext (hv)))
		643	{
		644	encode_nl (enc); ++enc->indent;
		645
478	for (;;)	646	for (;;)
479	{	647	{
480	encode_indent (enc);	648	encode_indent (enc);
481	encode_hk (enc, he);	649	encode_hk (enc, he);
482	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));	650	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
483		651
484	if (!(he = hv_iternext (hv)))	652	if (!(he = hv_iternext (hv)))
485	break;	653	break;
486		654
487	encode_comma (enc);	655	encode_comma (enc);
488	}	656	}
489	}
490		657
		658	encode_nl (enc); --enc->indent; encode_indent (enc);
		659	}
		660	}
		661
491	encode_nl (enc);	662	encode_ch (enc, '}');
492
493	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
494	}	663	}
495		664
496	// encode objects, arrays and special \0=false and \1=true values.	665	// encode objects, arrays and special \0=false and \1=true values.
497	static void	666	static void
498	encode_rv (enc_t enc, SV sv)	667	encode_rv (enc_t enc, SV sv)
…		…
575		744
576	if (len == 1 && *pv == '1')	745	if (len == 1 && *pv == '1')
577	encode_str (enc, "true", 4, 0);	746	encode_str (enc, "true", 4, 0);
578	else if (len == 1 && *pv == '0')	747	else if (len == 1 && *pv == '0')
579	encode_str (enc, "false", 5, 0);	748	encode_str (enc, "false", 5, 0);
		749	else if (enc->json.flags & F_ALLOW_UNKNOWN)
		750	encode_str (enc, "null", 4, 0);
580	else	751	else
581	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",	752	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
582	SvPV_nolen (sv_2mortal (newRV_inc (sv))));	753	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
583	}	754	}
		755	else if (enc->json.flags & F_ALLOW_UNKNOWN)
		756	encode_str (enc, "null", 4, 0);
584	else	757	else
585	croak ("encountered %s, but JSON can only represent references to arrays or hashes",	758	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
586	SvPV_nolen (sv_2mortal (newRV_inc (sv))));	759	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
587	}	760	}
588		761
…		…
606	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	779	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
607	enc->cur += strlen (enc->cur);	780	enc->cur += strlen (enc->cur);
608	}	781	}
609	else if (SvIOKp (sv))	782	else if (SvIOKp (sv))
610	{	783	{
611	// we assume we can always read an IV as a UV	784	// we assume we can always read an IV as a UV and vice versa
612	if (SvUV (sv) & ~(UV)0x7fff)	785	// we assume two's complement
613	{	786	// we assume no aliasing issues in the union
614	// large integer, use the (rather slow) snprintf way.	787	if (SvIsUV (sv) ? SvUVX (sv) <= 59000
615	need (enc, sizeof (UV) * 3);	788	: SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
616	enc->cur +=
617	SvIsUV(sv)
618	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
619	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
620	}
621	else
622	{	789	{
623	// optimise the "small number case"	790	// optimise the "small number case"
624	// code will likely be branchless and use only a single multiplication	791	// code will likely be branchless and use only a single multiplication
		792	// works for numbers up to 59074
625	I32 i = SvIV (sv);	793	I32 i = SvIVX (sv);
626	U32 u;	794	U32 u;
627	char digit, nz = 0;	795	char digit, nz = 0;
628		796
629	need (enc, 6);	797	need (enc, 6);
630		798
…		…
636		804
637	// now output digit by digit, each time masking out the integer part	805	// now output digit by digit, each time masking out the integer part
638	// and multiplying by 5 while moving the decimal point one to the right,	806	// and multiplying by 5 while moving the decimal point one to the right,
639	// resulting in a net multiplication by 10.	807	// resulting in a net multiplication by 10.
640	// we always write the digit to memory but conditionally increment	808	// we always write the digit to memory but conditionally increment
641	// the pointer, to ease the usage of conditional move instructions.	809	// the pointer, to enable the use of conditional move instructions.
642	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;	810	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffffUL) 5;
643	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;	811	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffffUL) 5;
644	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;	812	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffffUL) 5;
645	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;	813	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffffUL) 5;
646	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'	814	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
647	}	815	}
		816	else
		817	{
		818	// large integer, use the (rather slow) snprintf way.
		819	need (enc, IVUV_MAXCHARS);
		820	enc->cur +=
		821	SvIsUV(sv)
		822	? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
		823	: snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
		824	}
648	}	825	}
649	else if (SvROK (sv))	826	else if (SvROK (sv))
650	encode_rv (enc, SvRV (sv));	827	encode_rv (enc, SvRV (sv));
651	else if (!SvOK (sv))	828	else if (!SvOK (sv) \|\| enc->json.flags & F_ALLOW_UNKNOWN)
652	encode_str (enc, "null", 4, 0);	829	encode_str (enc, "null", 4, 0);
653	else	830	else
654	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",	831	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
655	SvPV_nolen (sv), SvFLAGS (sv));	832	SvPV_nolen (sv), SvFLAGS (sv));
656	}	833	}
…		…
666	enc.json = *json;	843	enc.json = *json;
667	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	844	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
668	enc.cur = SvPVX (enc.sv);	845	enc.cur = SvPVX (enc.sv);
669	enc.end = SvEND (enc.sv);	846	enc.end = SvEND (enc.sv);
670	enc.indent = 0;	847	enc.indent = 0;
671	enc.maxdepth = DEC_DEPTH (enc.json.flags);	848	enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
		849	: enc.json.flags & F_LATIN1 ? 0x000100UL
		850	: 0x110000UL;
672		851
673	SvPOK_only (enc.sv);	852	SvPOK_only (enc.sv);
674	encode_sv (&enc, scalar);	853	encode_sv (&enc, scalar);
		854	encode_nl (&enc);
675		855
676	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));	856	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
677	*SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings	857	*SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
678		858
679	if (!(enc.json.flags & (F_ASCII \| F_LATIN1 \| F_UTF8)))	859	if (!(enc.json.flags & (F_ASCII \| F_LATIN1 \| F_UTF8)))
…		…
697	JSON json;	877	JSON json;
698	U32 depth; // recursion depth	878	U32 depth; // recursion depth
699	U32 maxdepth; // recursion depth limit	879	U32 maxdepth; // recursion depth limit
700	} dec_t;	880	} dec_t;
701		881
702	inline void	882	INLINE void
		883	decode_comment (dec_t *dec)
		884	{
		885	// only '#'-style comments allowed a.t.m.
		886
		887	while (dec->cur && dec->cur != 0x0a && *dec->cur != 0x0d)
		888	++dec->cur;
		889	}
		890
		891	INLINE void
703	decode_ws (dec_t *dec)	892	decode_ws (dec_t *dec)
704	{	893	{
705	for (;;)	894	for (;;)
706	{	895	{
707	char ch = *dec->cur;	896	char ch = *dec->cur;
708		897
709	if (ch > 0x20	898	if (ch > 0x20)
		899	{
		900	if (expect_false (ch == '#'))
		901	{
		902	if (dec->json.flags & F_RELAXED)
		903	decode_comment (dec);
		904	else
		905	break;
		906	}
		907	else
		908	break;
		909	}
710	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))	910	else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)
711	break;	911	break; // parse error, but let higher level handle it, gives better error messages
712		912
713	++dec->cur;	913	++dec->cur;
714	}	914	}
715	}	915	}
716		916
…		…
720	if (*dec->cur != ch) \	920	if (*dec->cur != ch) \
721	ERR (# ch " expected"); \	921	ERR (# ch " expected"); \
722	++dec->cur; \	922	++dec->cur; \
723	SE	923	SE
724		924
725	#define DEC_INC_DEPTH if (++dec->depth > dec->maxdepth) ERR ("json datastructure exceeds maximum nesting level (set a higher max_depth)")	925	#define DEC_INC_DEPTH if (++dec->depth > dec->json.max_depth) ERR (ERR_NESTING_EXCEEDED)
726	#define DEC_DEC_DEPTH --dec->depth	926	#define DEC_DEC_DEPTH --dec->depth
727		927
728	static SV decode_sv (dec_t dec);	928	static SV decode_sv (dec_t dec);
729		929
730	static signed char decode_hexdigit[256];	930	static signed char decode_hexdigit[256];
…		…
822		1022
823	if (hi >= 0x80)	1023	if (hi >= 0x80)
824	{	1024	{
825	utf8 = 1;	1025	utf8 = 1;
826		1026
827	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);	1027	cur = encode_utf8 (cur, hi);
828	}	1028	}
829	else	1029	else
830	*cur++ = hi;	1030	*cur++ = hi;
831	}	1031	}
832	break;	1032	break;
…		…
834	default:	1034	default:
835	--dec_cur;	1035	--dec_cur;
836	ERR ("illegal backslash escape sequence in string");	1036	ERR ("illegal backslash escape sequence in string");
837	}	1037	}
838	}	1038	}
839	else if (expect_true (ch >= 0x20 && ch <= 0x7f))	1039	else if (expect_true (ch >= 0x20 && ch < 0x80))
840	*cur++ = ch;	1040	*cur++ = ch;
841	else if (ch >= 0x80)	1041	else if (ch >= 0x80)
842	{	1042	{
843	STRLEN clen;	1043	STRLEN clen;
844	UV uch;
845		1044
846	--dec_cur;	1045	--dec_cur;
847		1046
848	uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen);	1047	decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
849	if (clen == (STRLEN)-1)	1048	if (clen == (STRLEN)-1)
850	ERR ("malformed UTF-8 character in JSON string");	1049	ERR ("malformed UTF-8 character in JSON string");
851		1050
852	do	1051	do
853	cur++ = dec_cur++;	1052	cur++ = dec_cur++;
…		…
870	{	1069	{
871	STRLEN len = cur - buf;	1070	STRLEN len = cur - buf;
872		1071
873	if (sv)	1072	if (sv)
874	{	1073	{
875	SvGROW (sv, SvCUR (sv) + len + 1);	1074	STRLEN cur = SvCUR (sv);
		1075
		1076	if (SvLEN (sv) <= cur + len)
		1077	SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
		1078
876	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);	1079	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
877	SvCUR_set (sv, SvCUR (sv) + len);	1080	SvCUR_set (sv, SvCUR (sv) + len);
878	}	1081	}
879	else	1082	else
880	sv = newSVpvn (buf, len);	1083	sv = newSVpvn (buf, len);
…		…
967		1170
968	if (!is_nv)	1171	if (!is_nv)
969	{	1172	{
970	int len = dec->cur - start;	1173	int len = dec->cur - start;
971		1174
972	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so	1175	// special case the rather common 1..5-digit-int case
973	if (*start == '-')	1176	if (*start == '-')
974	switch (len)	1177	switch (len)
975	{	1178	{
976	case 2: return newSViv (-( start [1] - '0' * 1));	1179	case 2: return newSViv (-(IV)( start [1] - '0' * 1));
977	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));	1180	case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11));
978	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));	1181	case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
979	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));	1182	case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
		1183	case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111));
980	}	1184	}
981	else	1185	else
982	switch (len)	1186	switch (len)
983	{	1187	{
984	case 1: return newSViv ( start [0] - '0' * 1);	1188	case 1: return newSViv ( start [0] - '0' * 1);
985	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);	1189	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
986	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);	1190	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
987	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);	1191	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		1192	case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111);
988	}	1193	}
989		1194
990	{	1195	{
991	UV uv;	1196	UV uv;
992	int numtype = grok_number (start, len, &uv);	1197	int numtype = grok_number (start, len, &uv);
…		…
1001	}	1206	}
1002		1207
1003	len -= *start == '-' ? 1 : 0;	1208	len -= *start == '-' ? 1 : 0;
1004		1209
1005	// does not fit into IV or UV, try NV	1210	// does not fit into IV or UV, try NV
1006	if ((sizeof (NV) == sizeof (double) && DBL_DIG >= len)	1211	if (len <= NV_DIG)
1007	#if defined (LDBL_DIG)
1008	\|\| (sizeof (NV) == sizeof (long double) && LDBL_DIG >= len)
1009	#endif
1010	)
1011	// fits into NV without loss of precision	1212	// fits into NV without loss of precision
1012	return newSVnv (Atof (start));	1213	return newSVnv (json_atof (start));
1013		1214
1014	// everything else fails, convert it to a string	1215	// everything else fails, convert it to a string
1015	return newSVpvn (start, dec->cur - start);	1216	return newSVpvn (start, dec->cur - start);
1016	}	1217	}
1017		1218
1018	// loss of precision here	1219	// loss of precision here
1019	return newSVnv (Atof (start));	1220	return newSVnv (json_atof (start));
1020		1221
1021	fail:	1222	fail:
1022	return 0;	1223	return 0;
1023	}	1224	}
1024		1225
…		…
1053		1254
1054	if (*dec->cur != ',')	1255	if (*dec->cur != ',')
1055	ERR (", or ] expected while parsing array");	1256	ERR (", or ] expected while parsing array");
1056		1257
1057	++dec->cur;	1258	++dec->cur;
		1259
		1260	decode_ws (dec);
		1261
		1262	if (*dec->cur == ']' && dec->json.flags & F_RELAXED)
		1263	{
		1264	++dec->cur;
		1265	break;
		1266	}
1058	}	1267	}
1059		1268
1060	DEC_DEC_DEPTH;	1269	DEC_DEC_DEPTH;
1061	return newRV_noinc ((SV *)av);	1270	return newRV_noinc ((SV *)av);
1062		1271
…		…
1078	if (*dec->cur == '}')	1287	if (*dec->cur == '}')
1079	++dec->cur;	1288	++dec->cur;
1080	else	1289	else
1081	for (;;)	1290	for (;;)
1082	{	1291	{
1083	decode_ws (dec); EXPECT_CH ('"');	1292	EXPECT_CH ('"');
1084		1293
1085	// heuristic: assume that	1294	// heuristic: assume that
1086	// a) decode_str + hv_store_ent are abysmally slow.	1295	// a) decode_str + hv_store_ent are abysmally slow.
1087	// b) most hash keys are short, simple ascii text.	1296	// b) most hash keys are short, simple ascii text.
1088	// => try to "fast-match" such strings to avoid	1297	// => try to "fast-match" such strings to avoid
…		…
1092	char *p = dec->cur;	1301	char *p = dec->cur;
1093	char *e = p + 24; // only try up to 24 bytes	1302	char *e = p + 24; // only try up to 24 bytes
1094		1303
1095	for (;;)	1304	for (;;)
1096	{	1305	{
1097	// the >= 0x80 is true on most architectures	1306	// the >= 0x80 is false on most architectures
1098	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')	1307	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')
1099	{	1308	{
1100	// slow path, back up and use decode_str	1309	// slow path, back up and use decode_str
1101	SV *key = decode_str (dec);	1310	SV *key = decode_str (dec);
1102	if (!key)	1311	if (!key)
1103	goto fail;	1312	goto fail;
1104		1313
1105	decode_ws (dec); EXPECT_CH (':');	1314	decode_ws (dec); EXPECT_CH (':');
1106		1315
		1316	decode_ws (dec);
1107	value = decode_sv (dec);	1317	value = decode_sv (dec);
1108	if (!value)	1318	if (!value)
1109	{	1319	{
1110	SvREFCNT_dec (key);	1320	SvREFCNT_dec (key);
1111	goto fail;	1321	goto fail;
…		…
1123	int len = p - key;	1333	int len = p - key;
1124	dec->cur = p + 1;	1334	dec->cur = p + 1;
1125		1335
1126	decode_ws (dec); EXPECT_CH (':');	1336	decode_ws (dec); EXPECT_CH (':');
1127		1337
		1338	decode_ws (dec);
1128	value = decode_sv (dec);	1339	value = decode_sv (dec);
1129	if (!value)	1340	if (!value)
1130	goto fail;	1341	goto fail;
1131		1342
1132	hv_store (hv, key, len, value, 0);	1343	hv_store (hv, key, len, value, 0);
…		…
1148		1359
1149	if (*dec->cur != ',')	1360	if (*dec->cur != ',')
1150	ERR (", or } expected while parsing object/hash");	1361	ERR (", or } expected while parsing object/hash");
1151		1362
1152	++dec->cur;	1363	++dec->cur;
		1364
		1365	decode_ws (dec);
		1366
		1367	if (*dec->cur == '}' && dec->json.flags & F_RELAXED)
		1368	{
		1369	++dec->cur;
		1370	break;
		1371	}
1153	}	1372	}
1154		1373
1155	DEC_DEC_DEPTH;	1374	DEC_DEC_DEPTH;
1156	sv = newRV_noinc ((SV *)hv);	1375	sv = newRV_noinc ((SV *)hv);
1157		1376
…		…
1222	}	1441	}
1223		1442
1224	static SV *	1443	static SV *
1225	decode_sv (dec_t *dec)	1444	decode_sv (dec_t *dec)
1226	{	1445	{
1227	decode_ws (dec);
1228
1229	// the beauty of JSON: you need exactly one character lookahead	1446	// the beauty of JSON: you need exactly one character lookahead
1230	// to parse anything.	1447	// to parse everything.
1231	switch (*dec->cur)	1448	switch (*dec->cur)
1232	{	1449	{
1233	case '"': ++dec->cur; return decode_str (dec);	1450	case '"': ++dec->cur; return decode_str (dec);
1234	case '[': ++dec->cur; return decode_av (dec);	1451	case '[': ++dec->cur; return decode_av (dec);
1235	case '{': ++dec->cur; return decode_hv (dec);	1452	case '{': ++dec->cur; return decode_hv (dec);
1236		1453
1237	case '-':	1454	case '-':
1238	case '0': case '1': case '2': case '3': case '4':	1455	case '0': case '1': case '2': case '3': case '4':
1239	case '5': case '6': case '7': case '8': case '9':	1456	case '5': case '6': case '7': case '8': case '9':
1240	return decode_num (dec);	1457	return decode_num (dec);
…		…
1242	case 't':	1459	case 't':
1243	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))	1460	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
1244	{	1461	{
1245	dec->cur += 4;	1462	dec->cur += 4;
1246	#if JSON_SLOW	1463	#if JSON_SLOW
1247	json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true);	1464	json_true = get_bool ("JSON::XS::true");
1248	#endif	1465	#endif
1249	return SvREFCNT_inc (json_true);	1466	return newSVsv (json_true);
1250	}	1467	}
1251	else	1468	else
1252	ERR ("'true' expected");	1469	ERR ("'true' expected");
1253		1470
1254	break;	1471	break;
…		…
1256	case 'f':	1473	case 'f':
1257	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))	1474	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
1258	{	1475	{
1259	dec->cur += 5;	1476	dec->cur += 5;
1260	#if JSON_SLOW	1477	#if JSON_SLOW
1261	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);	1478	json_false = get_bool ("JSON::XS::false");
1262	#endif	1479	#endif
1263	return SvREFCNT_inc (json_false);	1480	return newSVsv (json_false);
1264	}	1481	}
1265	else	1482	else
1266	ERR ("'false' expected");	1483	ERR ("'false' expected");
1267		1484
1268	break;	1485	break;
…		…
1286	fail:	1503	fail:
1287	return 0;	1504	return 0;
1288	}	1505	}
1289		1506
1290	static SV *	1507	static SV *
1291	decode_json (SV string, JSON json, UV *offset_return)	1508	decode_json (SV string, JSON json, char **offset_return)
1292	{	1509	{
1293	dec_t dec;	1510	dec_t dec;
1294	UV offset;
1295	SV *sv;	1511	SV *sv;
1296		1512
		1513	/* work around bugs in 5.10 where manipulating magic values
		1514	* will perl ignore the magic in subsequent accesses
		1515	*/
1297	SvGETMAGIC (string);	1516	/SvGETMAGIC (string);/
		1517	if (SvMAGICAL (string))
		1518	string = sv_2mortal (newSVsv (string));
		1519
1298	SvUPGRADE (string, SVt_PV);	1520	SvUPGRADE (string, SVt_PV);
1299		1521
1300	if (json->flags & F_MAXSIZE && SvCUR (string) > DEC_SIZE (json->flags))	1522	/* work around a bug in perl 5.10, which causes SvCUR to fail an
		1523	* assertion with -DDEBUGGING, although SvCUR is documented to
		1524	* return the xpv_cur field which certainly exists after upgrading.
		1525	* according to nicholas clark, calling SvPOK fixes this.
		1526	* But it doesn't fix it, so try another workaround, call SvPV_nolen
		1527	* and hope for the best.
		1528	* Damnit, SvPV_nolen still trips over yet another assertion. This
		1529	* assertion business is seriously broken, try yet another workaround
		1530	* for the broken -DDEBUGGING.
		1531	*/
		1532	{
		1533	#ifdef DEBUGGING
		1534	STRLEN offset = SvOK (string) ? sv_len (string) : 0;
		1535	#else
		1536	STRLEN offset = SvCUR (string);
		1537	#endif
		1538
		1539	if (offset > json->max_size && json->max_size)
1301	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",	1540	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
1302	(unsigned long)SvCUR (string), (unsigned long)DEC_SIZE (json->flags));	1541	(unsigned long)SvCUR (string), (unsigned long)json->max_size);
		1542	}
1303		1543
1304	if (json->flags & F_UTF8)	1544	if (json->flags & F_UTF8)
1305	sv_utf8_downgrade (string, 0);	1545	sv_utf8_downgrade (string, 0);
1306	else	1546	else
1307	sv_utf8_upgrade (string);	1547	sv_utf8_upgrade (string);
1308		1548
1309	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP	1549	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
1310		1550
1311	dec.json = *json;	1551	dec.json = *json;
1312	dec.cur = SvPVX (string);	1552	dec.cur = SvPVX (string);
1313	dec.end = SvEND (string);	1553	dec.end = SvEND (string);
1314	dec.err = 0;	1554	dec.err = 0;
1315	dec.depth = 0;	1555	dec.depth = 0;
1316	dec.maxdepth = DEC_DEPTH (dec.json.flags);
1317		1556
1318	if (dec.json.cb_object \|\| dec.json.cb_sk_object)	1557	if (dec.json.cb_object \|\| dec.json.cb_sk_object)
1319	dec.json.flags \|= F_HOOK;	1558	dec.json.flags \|= F_HOOK;
1320		1559
1321	*dec.end = 0; // this should basically be a nop, too, but make sure it's there	1560	*dec.end = 0; // this should basically be a nop, too, but make sure it's there
		1561
		1562	decode_ws (&dec);
1322	sv = decode_sv (&dec);	1563	sv = decode_sv (&dec);
		1564
		1565	if (offset_return)
		1566	*offset_return = dec.cur;
1323		1567
1324	if (!(offset_return \|\| !sv))	1568	if (!(offset_return \|\| !sv))
1325	{	1569	{
1326	// check for trailing garbage	1570	// check for trailing garbage
1327	decode_ws (&dec);	1571	decode_ws (&dec);
…		…
1330	{	1574	{
1331	dec.err = "garbage after JSON object";	1575	dec.err = "garbage after JSON object";
1332	SvREFCNT_dec (sv);	1576	SvREFCNT_dec (sv);
1333	sv = 0;	1577	sv = 0;
1334	}	1578	}
1335	}
1336
1337	if (offset_return \|\| !sv)
1338	{
1339	offset = dec.json.flags & F_UTF8
1340	? dec.cur - SvPVX (string)
1341	: utf8_distance (dec.cur, SvPVX (string));
1342
1343	if (offset_return)
1344	*offset_return = offset;
1345	}	1579	}
1346		1580
1347	if (!sv)	1581	if (!sv)
1348	{	1582	{
1349	SV *uni = sv_newmortal ();	1583	SV *uni = sv_newmortal ();
…		…
1355	SAVEVPTR (PL_curcop);	1589	SAVEVPTR (PL_curcop);
1356	PL_curcop = &cop;	1590	PL_curcop = &cop;
1357	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);	1591	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
1358	LEAVE;	1592	LEAVE;
1359		1593
1360	croak ("%s, at character offset %d [\"%s\"]",	1594	croak ("%s, at character offset %d (before \"%s\")",
1361	dec.err,	1595	dec.err,
1362	(int)offset,	1596	ptr_to_index (string, dec.cur),
1363	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");	1597	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
1364	}	1598	}
1365		1599
1366	sv = sv_2mortal (sv);	1600	sv = sv_2mortal (sv);
1367		1601
1368	if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))	1602	if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))
1369	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");	1603	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
1370		1604
1371	return sv;	1605	return sv;
		1606	}
		1607
		1608	/////////////////////////////////////////////////////////////////////////////
		1609	// incremental parser
		1610
		1611	static void
		1612	incr_parse (JSON *self)
		1613	{
		1614	const char *p = SvPVX (self->incr_text) + self->incr_pos;
		1615
		1616	// the state machine here is a bit convoluted and could be simplified a lot
		1617	// but this would make it slower, so...
		1618
		1619	for (;;)
		1620	{
		1621	//printf ("loop pod %d p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), p, p, self->incr_mode, self->incr_nest);//D
		1622	switch (self->incr_mode)
		1623	{
		1624	// only used for initial whitespace skipping
		1625	case INCR_M_WS:
		1626	for (;;)
		1627	{
		1628	if (*p > 0x20)
		1629	{
		1630	if (*p == '#')
		1631	{
		1632	self->incr_mode = INCR_M_C0;
		1633	goto incr_m_c;
		1634	}
		1635	else
		1636	{
		1637	self->incr_mode = INCR_M_JSON;
		1638	goto incr_m_json;
		1639	}
		1640	}
		1641	else if (!*p)
		1642	goto interrupt;
		1643
		1644	++p;
		1645	}
		1646
		1647	// skip a single char inside a string (for \\-processing)
		1648	case INCR_M_BS:
		1649	if (!*p)
		1650	goto interrupt;
		1651
		1652	++p;
		1653	self->incr_mode = INCR_M_STR;
		1654	goto incr_m_str;
		1655
		1656	// inside #-style comments
		1657	case INCR_M_C0:
		1658	case INCR_M_C1:
		1659	incr_m_c:
		1660	for (;;)
		1661	{
		1662	if (*p == '\n')
		1663	{
		1664	self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON;
		1665	break;
		1666	}
		1667	else if (!*p)
		1668	goto interrupt;
		1669
		1670	++p;
		1671	}
		1672
		1673	break;
		1674
		1675	// inside a string
		1676	case INCR_M_STR:
		1677	incr_m_str:
		1678	for (;;)
		1679	{
		1680	if (*p == '"')
		1681	{
		1682	++p;
		1683	self->incr_mode = INCR_M_JSON;
		1684
		1685	if (!self->incr_nest)
		1686	goto interrupt;
		1687
		1688	goto incr_m_json;
		1689	}
		1690	else if (*p == '\\')
		1691	{
		1692	++p; // "virtually" consumes character after \
		1693
		1694	if (!*p) // if at end of string we have to switch modes
		1695	{
		1696	self->incr_mode = INCR_M_BS;
		1697	goto interrupt;
		1698	}
		1699	}
		1700	else if (!*p)
		1701	goto interrupt;
		1702
		1703	++p;
		1704	}
		1705
		1706	// after initial ws, outside string
		1707	case INCR_M_JSON:
		1708	incr_m_json:
		1709	for (;;)
		1710	{
		1711	switch (*p++)
		1712	{
		1713	case 0:
		1714	--p;
		1715	goto interrupt;
		1716
		1717	case 0x09:
		1718	case 0x0a:
		1719	case 0x0d:
		1720	case 0x20:
		1721	if (!self->incr_nest)
		1722	{
		1723	--p; // do not eat the whitespace, let the next round do it
		1724	goto interrupt;
		1725	}
		1726	break;
		1727
		1728	case '"':
		1729	self->incr_mode = INCR_M_STR;
		1730	goto incr_m_str;
		1731
		1732	case '[':
		1733	case '{':
		1734	if (++self->incr_nest > self->max_depth)
		1735	croak (ERR_NESTING_EXCEEDED);
		1736	break;
		1737
		1738	case ']':
		1739	case '}':
		1740	if (--self->incr_nest <= 0)
		1741	goto interrupt;
		1742	break;
		1743
		1744	case '#':
		1745	self->incr_mode = INCR_M_C1;
		1746	goto incr_m_c;
		1747	}
		1748	}
		1749	}
		1750
		1751	modechange:
		1752	;
		1753	}
		1754
		1755	interrupt:
		1756	self->incr_pos = p - SvPVX (self->incr_text);
		1757	//printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D
		1758	//printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
1372	}	1759	}
1373		1760
1374	/////////////////////////////////////////////////////////////////////////////	1761	/////////////////////////////////////////////////////////////////////////////
1375	// XS interface functions	1762	// XS interface functions
1376		1763
…		…
1388	: -1;	1775	: -1;
1389		1776
1390	json_stash = gv_stashpv ("JSON::XS" , 1);	1777	json_stash = gv_stashpv ("JSON::XS" , 1);
1391	json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1);	1778	json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1);
1392		1779
1393	json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true );	1780	json_true = get_bool ("JSON::XS::true");
1394	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);	1781	json_false = get_bool ("JSON::XS::false");
		1782
		1783	CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */
1395	}	1784	}
1396		1785
1397	PROTOTYPES: DISABLE	1786	PROTOTYPES: DISABLE
1398		1787
1399	void CLONE (...)	1788	void CLONE (...)
…		…
1404	void new (char *klass)	1793	void new (char *klass)
1405	PPCODE:	1794	PPCODE:
1406	{	1795	{
1407	SV *pv = NEWSV (0, sizeof (JSON));	1796	SV *pv = NEWSV (0, sizeof (JSON));
1408	SvPOK_only (pv);	1797	SvPOK_only (pv);
1409	Zero (SvPVX (pv), 1, JSON);	1798	json_init ((JSON *)SvPVX (pv));
1410	((JSON *)SvPVX (pv))->flags = F_DEFAULT;	1799	XPUSHs (sv_2mortal (sv_bless (
1411	XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), JSON_STASH)));	1800	newRV_noinc (pv),
		1801	strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
		1802	)));
1412	}	1803	}
1413		1804
1414	void ascii (JSON *self, int enable = 1)	1805	void ascii (JSON *self, int enable = 1)
1415	ALIAS:	1806	ALIAS:
1416	ascii = F_ASCII	1807	ascii = F_ASCII
…		…
1423	pretty = F_PRETTY	1814	pretty = F_PRETTY
1424	allow_nonref = F_ALLOW_NONREF	1815	allow_nonref = F_ALLOW_NONREF
1425	shrink = F_SHRINK	1816	shrink = F_SHRINK
1426	allow_blessed = F_ALLOW_BLESSED	1817	allow_blessed = F_ALLOW_BLESSED
1427	convert_blessed = F_CONV_BLESSED	1818	convert_blessed = F_CONV_BLESSED
		1819	relaxed = F_RELAXED
		1820	allow_unknown = F_ALLOW_UNKNOWN
1428	PPCODE:	1821	PPCODE:
1429	{	1822	{
1430	if (enable)	1823	if (enable)
1431	self->flags \|= ix;	1824	self->flags \|= ix;
1432	else	1825	else
1433	self->flags &= ~ix;	1826	self->flags &= ~ix;
1434		1827
1435	XPUSHs (ST (0));	1828	XPUSHs (ST (0));
1436	}	1829	}
1437		1830
1438	void max_depth (JSON *self, UV max_depth = 0x80000000UL)	1831	void get_ascii (JSON *self)
		1832	ALIAS:
		1833	get_ascii = F_ASCII
		1834	get_latin1 = F_LATIN1
		1835	get_utf8 = F_UTF8
		1836	get_indent = F_INDENT
		1837	get_canonical = F_CANONICAL
		1838	get_space_before = F_SPACE_BEFORE
		1839	get_space_after = F_SPACE_AFTER
		1840	get_allow_nonref = F_ALLOW_NONREF
		1841	get_shrink = F_SHRINK
		1842	get_allow_blessed = F_ALLOW_BLESSED
		1843	get_convert_blessed = F_CONV_BLESSED
		1844	get_relaxed = F_RELAXED
		1845	get_allow_unknown = F_ALLOW_UNKNOWN
1439	PPCODE:	1846	PPCODE:
1440	{	1847	XPUSHs (boolSV (self->flags & ix));
1441	UV log2 = 0;
1442		1848
1443	if (max_depth > 0x80000000UL) max_depth = 0x80000000UL;	1849	void max_depth (JSON *self, U32 max_depth = 0x80000000UL)
1444		1850	PPCODE:
1445	while ((1UL << log2) < max_depth)	1851	self->max_depth = max_depth;
1446	++log2;
1447
1448	self->flags = self->flags & ~F_MAXDEPTH \| (log2 << S_MAXDEPTH);
1449
1450	XPUSHs (ST (0));	1852	XPUSHs (ST (0));
1451	}
1452		1853
		1854	U32 get_max_depth (JSON *self)
		1855	CODE:
		1856	RETVAL = self->max_depth;
		1857	OUTPUT:
		1858	RETVAL
		1859
1453	void max_size (JSON *self, UV max_size = 0)	1860	void max_size (JSON *self, U32 max_size = 0)
1454	PPCODE:	1861	PPCODE:
1455	{	1862	self->max_size = max_size;
1456	UV log2 = 0;
1457
1458	if (max_size > 0x80000000UL) max_size = 0x80000000UL;
1459	if (max_size == 1) max_size = 2;
1460
1461	while ((1UL << log2) < max_size)
1462	++log2;
1463
1464	self->flags = self->flags & ~F_MAXSIZE \| (log2 << S_MAXSIZE);
1465
1466	XPUSHs (ST (0));	1863	XPUSHs (ST (0));
1467	}	1864
		1865	int get_max_size (JSON *self)
		1866	CODE:
		1867	RETVAL = self->max_size;
		1868	OUTPUT:
		1869	RETVAL
1468		1870
1469	void filter_json_object (JSON self, SV cb = &PL_sv_undef)	1871	void filter_json_object (JSON self, SV cb = &PL_sv_undef)
1470	PPCODE:	1872	PPCODE:
1471	{	1873	{
1472	SvREFCNT_dec (self->cb_object);	1874	SvREFCNT_dec (self->cb_object);
…		…
1506	XPUSHs (decode_json (jsonstr, self, 0));	1908	XPUSHs (decode_json (jsonstr, self, 0));
1507		1909
1508	void decode_prefix (JSON self, SV jsonstr)	1910	void decode_prefix (JSON self, SV jsonstr)
1509	PPCODE:	1911	PPCODE:
1510	{	1912	{
1511	UV offset;	1913	char *offset;
1512	EXTEND (SP, 2);	1914	EXTEND (SP, 2);
1513	PUSHs (decode_json (jsonstr, self, &offset));	1915	PUSHs (decode_json (jsonstr, self, &offset));
1514	PUSHs (sv_2mortal (newSVuv (offset)));	1916	PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset))));
		1917	}
		1918
		1919	void incr_parse (JSON self, SV jsonstr = 0)
		1920	PPCODE:
		1921	{
		1922	if (!self->incr_text)
		1923	self->incr_text = newSVpvn ("", 0);
		1924
		1925	// append data, if any
		1926	if (jsonstr)
		1927	{
		1928	if (SvUTF8 (jsonstr))
		1929	{
		1930	if (!SvUTF8 (self->incr_text))
		1931	{
		1932	/* utf-8-ness differs, need to upgrade */
		1933	sv_utf8_upgrade (self->incr_text);
		1934
		1935	if (self->incr_pos)
		1936	self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
		1937	- (U8 *)SvPVX (self->incr_text);
		1938	}
		1939	}
		1940	else if (SvUTF8 (self->incr_text))
		1941	sv_utf8_upgrade (jsonstr);
		1942
		1943	{
		1944	STRLEN len;
		1945	const char *str = SvPV (jsonstr, len);
		1946	STRLEN cur = SvCUR (self->incr_text);
		1947
		1948	if (SvLEN (self->incr_text) <= cur + len)
		1949	SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
		1950
		1951	Move (str, SvEND (self->incr_text), len, char);
		1952	SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
		1953	*SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
		1954	}
		1955	}
		1956
		1957	if (GIMME_V != G_VOID)
		1958	do
		1959	{
		1960	char *offset;
		1961
		1962	if (!INCR_DONE (self))
		1963	{
		1964	incr_parse (self);
		1965
		1966	if (self->incr_pos > self->max_size && self->max_size)
		1967	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
		1968	(unsigned long)self->incr_pos, (unsigned long)self->max_size);
		1969
		1970	if (!INCR_DONE (self))
		1971	break;
		1972	}
		1973
		1974	XPUSHs (decode_json (self->incr_text, self, &offset));
		1975
		1976	self->incr_pos -= offset - SvPVX (self->incr_text);
		1977	self->incr_nest = 0;
		1978	self->incr_mode = 0;
		1979
		1980	sv_chop (self->incr_text, offset);
		1981	}
		1982	while (GIMME_V == G_ARRAY);
		1983	}
		1984
		1985	SV incr_text (JSON self)
		1986	ATTRS: lvalue
		1987	CODE:
		1988	{
		1989	if (self->incr_pos)
		1990	croak ("incr_text can not be called when the incremental parser already started parsing");
		1991
		1992	RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
		1993	}
		1994	OUTPUT:
		1995	RETVAL
		1996
		1997	void incr_skip (JSON *self)
		1998	CODE:
		1999	{
		2000	if (self->incr_pos)
		2001	{
		2002	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos);
		2003	self->incr_pos = 0;
		2004	self->incr_nest = 0;
		2005	self->incr_mode = 0;
		2006	}
		2007	}
		2008
		2009	void incr_reset (JSON *self)
		2010	CODE:
		2011	{
		2012	SvREFCNT_dec (self->incr_text);
		2013	self->incr_text = 0;
		2014	self->incr_pos = 0;
		2015	self->incr_nest = 0;
		2016	self->incr_mode = 0;
1515	}	2017	}
1516		2018
1517	void DESTROY (JSON *self)	2019	void DESTROY (JSON *self)
1518	CODE:	2020	CODE:
1519	SvREFCNT_dec (self->cb_sk_object);	2021	SvREFCNT_dec (self->cb_sk_object);
1520	SvREFCNT_dec (self->cb_object);	2022	SvREFCNT_dec (self->cb_object);
		2023	SvREFCNT_dec (self->incr_text);
1521		2024
1522	PROTOTYPES: ENABLE	2025	PROTOTYPES: ENABLE
1523		2026
1524	void to_json (SV *scalar)	2027	void encode_json (SV *scalar)
		2028	ALIAS:
		2029	to_json_ = 0
		2030	encode_json = F_UTF8
1525	PPCODE:	2031	PPCODE:
1526	{	2032	{
1527	JSON json = { F_DEFAULT \| F_UTF8 };	2033	JSON json;
		2034	json_init (&json);
		2035	json.flags \|= ix;
1528	XPUSHs (encode_json (scalar, &json));	2036	XPUSHs (encode_json (scalar, &json));
1529	}	2037	}
1530		2038
1531	void from_json (SV *jsonstr)	2039	void decode_json (SV *jsonstr)
		2040	ALIAS:
		2041	from_json_ = 0
		2042	decode_json = F_UTF8
1532	PPCODE:	2043	PPCODE:
1533	{	2044	{
1534	JSON json = { F_DEFAULT \| F_UTF8 };	2045	JSON json;
		2046	json_init (&json);
		2047	json.flags \|= ix;
1535	XPUSHs (decode_json (jsonstr, &json, 0));	2048	XPUSHs (decode_json (jsonstr, &json, 0));
1536	}	2049	}
1537		2050

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.62 by root, Sun Aug 26 22:27:32 2007 UTC vs. Revision 1.105 by root, Tue Jan 19 01:07:27 2010 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.62 by root, Sun Aug 26 22:27:32 2007 UTC vs.
Revision 1.105 by root, Tue Jan 19 01:07:27 2010 UTC