[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC vs.
Revision 1.48 by root, Sun Jul 1 22:20:00 2007 UTC

…		…
3	#include "XSUB.h"	3	#include "XSUB.h"
4		4
5	#include "assert.h"	5	#include "assert.h"
6	#include "string.h"	6	#include "string.h"
7	#include "stdlib.h"	7	#include "stdlib.h"
		8	#include "stdio.h"
8		9
		10	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		11	# define snprintf _snprintf // C compilers have this in stdio.h
		12	#endif
		13
		14	// some old perls do not have this, try to make it work, no
		15	// guarentees, though. if it breaks, you get to keep the pieces.
		16	#ifndef UTF8_MAXBYTES
		17	# define UTF8_MAXBYTES 13
		18	#endif
		19
9	#define F_ASCII 0x00000001	20	#define F_ASCII 0x00000001UL
		21	#define F_LATIN1 0x00000002UL
10	#define F_UTF8 0x00000002	22	#define F_UTF8 0x00000004UL
11	#define F_INDENT 0x00000004	23	#define F_INDENT 0x00000008UL
12	#define F_CANONICAL 0x00000008	24	#define F_CANONICAL 0x00000010UL
13	#define F_SPACE_BEFORE 0x00000010	25	#define F_SPACE_BEFORE 0x00000020UL
14	#define F_SPACE_AFTER 0x00000020	26	#define F_SPACE_AFTER 0x00000040UL
15	#define F_JSON_RPC 0x00000040
16	#define F_ALLOW_NONREF 0x00000080	27	#define F_ALLOW_NONREF 0x00000100UL
17	#define F_SHRINK 0x00000100	28	#define F_SHRINK 0x00000200UL
		29	#define F_ALLOW_BLESSED 0x00000400UL
		30	#define F_CONV_BLESSED 0x00000800UL // NYI
		31	#define F_MAXDEPTH 0xf8000000UL
		32	#define S_MAXDEPTH 27
		33	#define F_MAXSIZE 0x01f00000UL
		34	#define S_MAXSIZE 20
		35
		36	#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH))
		37	#define DEC_SIZE(flags) (1UL << ((flags & F_MAXSIZE ) >> S_MAXSIZE ))
18		38
19	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	39	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
20	#define F_DEFAULT 0	40	#define F_DEFAULT (9UL << S_MAXDEPTH)
21		41
22	#define INIT_SIZE 32 // initial scalar size to be allocated	42	#define INIT_SIZE 32 // initial scalar size to be allocated
		43	#define INDENT_STEP 3 // spaces per indentation level
		44
		45	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
23		46
24	#define SB do {	47	#define SB do {
25	#define SE } while (0)	48	#define SE } while (0)
26		49
27	static HV *json_stash;	50	#if __GNUC__ >= 3
		51	# define expect(expr,value) __builtin_expect ((expr),(value))
		52	# define inline inline
		53	#else
		54	# define expect(expr,value) (expr)
		55	# define inline static
		56	#endif
		57
		58	#define expect_false(expr) expect ((expr) != 0, 0)
		59	#define expect_true(expr) expect ((expr) != 0, 1)
		60
		61	static HV json_stash, json_boolean_stash; // JSON::XS::
		62	static SV json_true, json_false;
		63
		64	typedef struct {
		65	U32 flags;
		66	} JSON;
		67
		68	/////////////////////////////////////////////////////////////////////////////
		69	// utility functions
		70
		71	inline void
		72	shrink (SV *sv)
		73	{
		74	sv_utf8_downgrade (sv, 1);
		75	if (SvLEN (sv) > SvCUR (sv) + 1)
		76	{
		77	#ifdef SvPV_shrink_to_cur
		78	SvPV_shrink_to_cur (sv);
		79	#elif defined (SvPV_renew)
		80	SvPV_renew (sv, SvCUR (sv) + 1);
		81	#endif
		82	}
		83	}
		84
		85	// decode an utf-8 character and return it, or (UV)-1 in
		86	// case of an error.
		87	// we special-case "safe" characters from U+80 .. U+7FF,
		88	// but use the very good perl function to parse anything else.
		89	// note that we never call this function for a ascii codepoints
		90	inline UV
		91	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
		92	{
		93	if (expect_false (s[0] > 0xdf \|\| s[0] < 0xc2))
		94	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
		95	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)
		96	{
		97	*clen = 2;
		98	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
		99	}
		100	else
		101	{
		102	*clen = (STRLEN)-1;
		103	return (UV)-1;
		104	}
		105	}
		106
		107	/////////////////////////////////////////////////////////////////////////////
		108	// encoder
28		109
29	// structure used for encoding JSON	110	// structure used for encoding JSON
30	typedef struct	111	typedef struct
31	{	112	{
32	char *cur;	113	char *cur; // SvPVX (sv) + current output position
33	STRLEN len; // SvLEN (sv)
34	char *end; // SvEND (sv)	114	char *end; // SvEND (sv)
35	SV *sv;	115	SV *sv; // result scalar
36	UV flags;	116	JSON json;
37	int max_recurse;	117	U32 indent; // indentation level
38	int indent;	118	U32 maxdepth; // max. indentation/recursion level
39	} enc_t;	119	} enc_t;
40		120
41	// structure used for decoding JSON	121	inline void
42	typedef struct
43	{
44	char *cur;
45	char *end;
46	const char *err;
47	UV flags;
48	} dec_t;
49
50	static UV *
51	SvJSON (SV *sv)
52	{
53	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
54	croak ("object is not of type JSON::XS");
55
56	return &SvUVX (SvRV (sv));
57	}
58
59	static void
60	shrink (SV *sv)
61	{
62	sv_utf8_downgrade (sv, 1);
63	#ifdef SvPV_shrink_to_cur
64	SvPV_shrink_to_cur (sv);
65	#endif
66	}
67
68	/////////////////////////////////////////////////////////////////////////////
69
70	static void
71	need (enc_t *enc, STRLEN len)	122	need (enc_t *enc, STRLEN len)
72	{	123	{
73	if (enc->cur + len >= enc->end)	124	if (expect_false (enc->cur + len >= enc->end))
74	{	125	{
75	STRLEN cur = enc->cur - SvPVX (enc->sv);	126	STRLEN cur = enc->cur - SvPVX (enc->sv);
76	SvGROW (enc->sv, cur + len + 1);	127	SvGROW (enc->sv, cur + len + 1);
77	enc->cur = SvPVX (enc->sv) + cur;	128	enc->cur = SvPVX (enc->sv) + cur;
78	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);	129	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
79	}	130	}
80	}	131	}
81		132
82	static void	133	inline void
83	encode_ch (enc_t *enc, char ch)	134	encode_ch (enc_t *enc, char ch)
84	{	135	{
85	need (enc, 1);	136	need (enc, 1);
86	*enc->cur++ = ch;	137	*enc->cur++ = ch;
87	}	138	}
…		…
95		146
96	while (str < end)	147	while (str < end)
97	{	148	{
98	unsigned char ch = (unsigned char )str;	149	unsigned char ch = (unsigned char )str;
99		150
100	if (ch >= 0x20 && ch < 0x80) // most common case	151	if (expect_true (ch >= 0x20 && ch < 0x80)) // most common case
101	{	152	{
102	if (ch == '"') // but with slow exceptions	153	if (expect_false (ch == '"')) // but with slow exceptions
103	{	154	{
104	need (enc, len += 1);	155	need (enc, len += 1);
105	*enc->cur++ = '\\';	156	*enc->cur++ = '\\';
106	*enc->cur++ = '"';	157	*enc->cur++ = '"';
107	}	158	}
108	else if (ch == '\\')	159	else if (expect_false (ch == '\\'))
109	{	160	{
110	need (enc, len += 1);	161	need (enc, len += 1);
111	*enc->cur++ = '\\';	162	*enc->cur++ = '\\';
112	*enc->cur++ = '\\';	163	*enc->cur++ = '\\';
113	}	164	}
…		…
131	STRLEN clen;	182	STRLEN clen;
132	UV uch;	183	UV uch;
133		184
134	if (is_utf8)	185	if (is_utf8)
135	{	186	{
136	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);	187	uch = decode_utf8 (str, end - str, &clen);
137	if (clen == (STRLEN)-1)	188	if (clen == (STRLEN)-1)
138	croak ("malformed UTF-8 character in string, cannot convert to JSON");	189	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
139	}	190	}
140	else	191	else
141	{	192	{
142	uch = ch;	193	uch = ch;
143	clen = 1;	194	clen = 1;
144	}	195	}
145		196
146	if (uch < 0x80 \|\| enc->flags & F_ASCII)	197	if (uch > 0x10FFFFUL)
		198	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		199
		200	if (uch < 0x80 \|\| enc->json.flags & F_ASCII \|\| (enc->json.flags & F_LATIN1 && uch > 0xFF))
147	{	201	{
148	if (uch > 0xFFFFUL)	202	if (uch > 0xFFFFUL)
149	{	203	{
150	need (enc, len += 11);	204	need (enc, len += 11);
151	sprintf (enc->cur, "\\u%04x\\u%04x",	205	sprintf (enc->cur, "\\u%04x\\u%04x",
152	(uch - 0x10000) / 0x400 + 0xD800,	206	(int)((uch - 0x10000) / 0x400 + 0xD800),
153	(uch - 0x10000) % 0x400 + 0xDC00);	207	(int)((uch - 0x10000) % 0x400 + 0xDC00));
154	enc->cur += 12;	208	enc->cur += 12;
155	}	209	}
156	else	210	else
157	{	211	{
158	static char hexdigit [16] = "0123456789abcdef";	212	static char hexdigit [16] = "0123456789abcdef";
…		…
165	*enc->cur++ = hexdigit [(uch >> 0) & 15];	219	*enc->cur++ = hexdigit [(uch >> 0) & 15];
166	}	220	}
167		221
168	str += clen;	222	str += clen;
169	}	223	}
		224	else if (enc->json.flags & F_LATIN1)
		225	{
		226	*enc->cur++ = uch;
		227	str += clen;
		228	}
170	else if (is_utf8)	229	else if (is_utf8)
171	{	230	{
172	need (enc, len += clen);	231	need (enc, len += clen);
173	do	232	do
174	{	233	{
…		…
176	}	235	}
177	while (--clen);	236	while (--clen);
178	}	237	}
179	else	238	else
180	{	239	{
181	need (enc, len += 10); // never more than 11 bytes needed	240	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
182	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	241	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
183	++str;	242	++str;
184	}	243	}
185	}	244	}
186	}	245	}
…		…
188		247
189	--len;	248	--len;
190	}	249	}
191	}	250	}
192		251
193	#define INDENT SB \	252	inline void
		253	encode_indent (enc_t *enc)
		254	{
194	if (enc->flags & F_INDENT) \	255	if (enc->json.flags & F_INDENT)
195	{ \	256	{
196	int i_; \	257	int spaces = enc->indent * INDENT_STEP;
197	need (enc, enc->indent); \	258
198	for (i_ = enc->indent * 3; i_--; )\	259	need (enc, spaces);
		260	memset (enc->cur, ' ', spaces);
		261	enc->cur += spaces;
		262	}
		263	}
		264
		265	inline void
		266	encode_space (enc_t *enc)
		267	{
		268	need (enc, 1);
		269	encode_ch (enc, ' ');
		270	}
		271
		272	inline void
		273	encode_nl (enc_t *enc)
		274	{
		275	if (enc->json.flags & F_INDENT)
		276	{
		277	need (enc, 1);
199	encode_ch (enc, ' '); \	278	encode_ch (enc, '\n');
200	} \	279	}
201	SE	280	}
202		281
203	#define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE	282	inline void
204	#define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE	283	encode_comma (enc_t *enc)
205	#define COMMA SB \	284	{
206	encode_ch (enc, ','); \	285	encode_ch (enc, ',');
		286
207	if (enc->flags & F_INDENT) \	287	if (enc->json.flags & F_INDENT)
208	NL; \	288	encode_nl (enc);
209	else if (enc->flags & F_SPACE_AFTER) \	289	else if (enc->json.flags & F_SPACE_AFTER)
210	SPACE; \	290	encode_space (enc);
211	SE	291	}
212		292
213	static void encode_sv (enc_t enc, SV sv);	293	static void encode_sv (enc_t enc, SV sv);
214		294
215	static void	295	static void
216	encode_av (enc_t enc, AV av)	296	encode_av (enc_t enc, AV av)
217	{	297	{
218	int i, len = av_len (av);	298	int i, len = av_len (av);
219		299
220	encode_ch (enc, '['); NL;	300	if (enc->indent >= enc->maxdepth)
		301	croak ("data structure too deep (hit recursion limit)");
		302
		303	encode_ch (enc, '['); encode_nl (enc);
221	++enc->indent;	304	++enc->indent;
222		305
223	for (i = 0; i <= len; ++i)	306	for (i = 0; i <= len; ++i)
224	{	307	{
225	INDENT;	308	encode_indent (enc);
226	encode_sv (enc, *av_fetch (av, i, 0));	309	encode_sv (enc, *av_fetch (av, i, 0));
227		310
228	if (i < len)	311	if (i < len)
229	COMMA;	312	encode_comma (enc);
230	}	313	}
231		314
232	NL;	315	encode_nl (enc);
233		316
234	--enc->indent;	317	--enc->indent;
235	INDENT; encode_ch (enc, ']');	318	encode_indent (enc); encode_ch (enc, ']');
236	}	319	}
237		320
238	static void	321	static void
239	encode_he (enc_t enc, HE he)	322	encode_he (enc_t enc, HE he)
240	{	323	{
…		…
254	else	337	else
255	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));	338	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
256		339
257	encode_ch (enc, '"');	340	encode_ch (enc, '"');
258		341
259	if (enc->flags & F_SPACE_BEFORE) SPACE;	342	if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc);
260	encode_ch (enc, ':');	343	encode_ch (enc, ':');
261	if (enc->flags & F_SPACE_AFTER ) SPACE;	344	if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc);
262	encode_sv (enc, HeVAL (he));	345	encode_sv (enc, HeVAL (he));
263	}	346	}
264		347
265	// compare hash entries, used when all keys are bytestrings	348	// compare hash entries, used when all keys are bytestrings
266	static int	349	static int
…		…
272	HE b = (HE **)b_;	355	HE b = (HE **)b_;
273		356
274	STRLEN la = HeKLEN (a);	357	STRLEN la = HeKLEN (a);
275	STRLEN lb = HeKLEN (b);	358	STRLEN lb = HeKLEN (b);
276		359
277	if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))	360	if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
278	cmp = la < lb ? -1 : la == lb ? 0 : 1;	361	cmp = la - lb;
279		362
280	return cmp;	363	return cmp;
281	}	364	}
282		365
283	// compare hash entries, used when some keys are sv's or utf-x	366	// compare hash entries, used when some keys are sv's or utf-x
…		…
290	static void	373	static void
291	encode_hv (enc_t enc, HV hv)	374	encode_hv (enc_t enc, HV hv)
292	{	375	{
293	int count, i;	376	int count, i;
294		377
		378	if (enc->indent >= enc->maxdepth)
		379	croak ("data structure too deep (hit recursion limit)");
		380
295	encode_ch (enc, '{'); NL; ++enc->indent;	381	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
296		382
297	if ((count = hv_iterinit (hv)))	383	if ((count = hv_iterinit (hv)))
298	{	384	{
299	// for canonical output we have to sort by keys first	385	// for canonical output we have to sort by keys first
300	// actually, this is mostly due to the stupid so-called	386	// actually, this is mostly due to the stupid so-called
301	// security workaround added somewhere in 5.8.x.	387	// security workaround added somewhere in 5.8.x.
302	// that randomises hash orderings	388	// that randomises hash orderings
303	if (enc->flags & F_CANONICAL)	389	if (enc->json.flags & F_CANONICAL)
304	{	390	{
305	HE he, hes [count];
306	int fast = 1;	391	int fast = 1;
		392	HE *he;
		393	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		394	HE *hes = _alloca (count sizeof (HE));
		395	#else
		396	HE *hes [count]; // if your compiler dies here, you need to enable C99 mode
		397	#endif
307		398
308	i = 0;	399	i = 0;
309	while ((he = hv_iternext (hv)))	400	while ((he = hv_iternext (hv)))
310	{	401	{
311	hes [i++] = he;	402	hes [i++] = he;
…		…
317		408
318	if (fast)	409	if (fast)
319	qsort (hes, count, sizeof (HE *), he_cmp_fast);	410	qsort (hes, count, sizeof (HE *), he_cmp_fast);
320	else	411	else
321	{	412	{
322	// hack to disable "use bytes"	413	// hack to forcefully disable "use bytes"
323	COP *oldcop = PL_curcop, cop;	414	COP cop = *PL_curcop;
324	cop.op_private = 0;	415	cop.op_private = 0;
		416
		417	ENTER;
		418	SAVETMPS;
		419
		420	SAVEVPTR (PL_curcop);
325	PL_curcop = &cop;	421	PL_curcop = &cop;
326		422
327	SAVETMPS;
328	qsort (hes, count, sizeof (HE *), he_cmp_slow);	423	qsort (hes, count, sizeof (HE *), he_cmp_slow);
		424
329	FREETMPS;	425	FREETMPS;
330		426	LEAVE;
331	PL_curcop = oldcop;
332	}	427	}
333		428
334	for (i = 0; i < count; ++i)	429	for (i = 0; i < count; ++i)
335	{	430	{
336	INDENT;	431	encode_indent (enc);
337	encode_he (enc, hes [i]);	432	encode_he (enc, hes [i]);
338		433
339	if (i < count - 1)	434	if (i < count - 1)
340	COMMA;	435	encode_comma (enc);
341	}
342
343	NL;	436	}
		437
		438	encode_nl (enc);
344	}	439	}
345	else	440	else
346	{	441	{
347	SV *sv;
348	HE *he = hv_iternext (hv);	442	HE *he = hv_iternext (hv);
349		443
350	for (;;)	444	for (;;)
351	{	445	{
352	INDENT;	446	encode_indent (enc);
353	encode_he (enc, he);	447	encode_he (enc, he);
354		448
355	if (!(he = hv_iternext (hv)))	449	if (!(he = hv_iternext (hv)))
356	break;	450	break;
357		451
358	COMMA;	452	encode_comma (enc);
359	}
360
361	NL;	453	}
		454
		455	encode_nl (enc);
362	}	456	}
363	}	457	}
364		458
365	--enc->indent; INDENT; encode_ch (enc, '}');	459	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
		460	}
		461
		462	// encode objects, arrays and special \0=false and \1=true values.
		463	static void
		464	encode_rv (enc_t enc, SV sv)
		465	{
		466	svtype svt;
		467
		468	SvGETMAGIC (sv);
		469	svt = SvTYPE (sv);
		470
		471	if (expect_false (SvOBJECT (sv)))
		472	{
		473	if (SvSTASH (sv) == json_boolean_stash)
		474	{
		475	if (SvIV (sv) == 0)
		476	encode_str (enc, "false", 5, 0);
		477	else
		478	encode_str (enc, "true", 4, 0);
		479	}
		480	else
		481	{
		482	#if 0
		483	if (0 && sv_derived_from (rv, "JSON::Literal"))
		484	{
		485	// not yet
		486	}
		487	#endif
		488	if (enc->json.flags & F_CONV_BLESSED)
		489	{
		490	// we re-bless the reference to get overload and other niceties right
		491	GV *to_json = gv_fetchmethod_autoload (SvSTASH (sv), "TO_JSON", 1);
		492
		493	if (to_json)
		494	{
		495	dSP;
		496	ENTER;
		497	SAVETMPS;
		498	PUSHMARK (SP);
		499	XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv)));
		500
		501	// calling with G_SCALAR ensures that we always get a 1 reutrn value
		502	// check anyways.
		503	PUTBACK;
		504	assert (1 == call_sv ((SV *)GvCV (to_json), G_SCALAR));
		505	SPAGAIN;
		506
		507	encode_sv (enc, POPs);
		508
		509	FREETMPS;
		510	LEAVE;
		511	}
		512	else if (enc->json.flags & F_ALLOW_BLESSED)
		513	encode_str (enc, "null", 4, 0);
		514	else
		515	croak ("encountered object '%s', but neither allow_blessed enabled nor TO_JSON method available on it",
		516	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		517	}
		518	else if (enc->json.flags & F_ALLOW_BLESSED)
		519	encode_str (enc, "null", 4, 0);
		520	else
		521	croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled",
		522	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		523	}
		524	}
		525	else if (svt == SVt_PVHV)
		526	encode_hv (enc, (HV *)sv);
		527	else if (svt == SVt_PVAV)
		528	encode_av (enc, (AV *)sv);
		529	else if (svt < SVt_PVAV)
		530	{
		531	if (SvNIOK (sv) && SvIV (sv) == 0)
		532	encode_str (enc, "false", 5, 0);
		533	else if (SvNIOK (sv) && SvIV (sv) == 1)
		534	encode_str (enc, "true", 4, 0);
		535	else
		536	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
		537	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		538	}
		539	else
		540	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
		541	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
366	}	542	}
367		543
368	static void	544	static void
369	encode_sv (enc_t enc, SV sv)	545	encode_sv (enc_t enc, SV sv)
370	{	546	{
…		…
378	encode_str (enc, str, len, SvUTF8 (sv));	554	encode_str (enc, str, len, SvUTF8 (sv));
379	encode_ch (enc, '"');	555	encode_ch (enc, '"');
380	}	556	}
381	else if (SvNOKp (sv))	557	else if (SvNOKp (sv))
382	{	558	{
		559	// trust that perl will do the right thing w.r.t. JSON syntax.
383	need (enc, NV_DIG + 32);	560	need (enc, NV_DIG + 32);
384	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	561	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
385	enc->cur += strlen (enc->cur);	562	enc->cur += strlen (enc->cur);
386	}	563	}
387	else if (SvIOKp (sv))	564	else if (SvIOKp (sv))
388	{	565	{
389	need (enc, 64);	566	// we assume we can always read an IV as a UV
		567	if (SvUV (sv) & ~(UV)0x7fff)
		568	{
		569	// large integer, use the (rather slow) snprintf way.
		570	need (enc, sizeof (UV) * 3);
390	enc->cur +=	571	enc->cur +=
391	SvIsUV(sv)	572	SvIsUV(sv)
392	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))	573	? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv))
393	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));	574	: snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv));
		575	}
		576	else
		577	{
		578	// optimise the "small number case"
		579	// code will likely be branchless and use only a single multiplication
		580	I32 i = SvIV (sv);
		581	U32 u;
		582	char digit, nz = 0;
		583
		584	need (enc, 6);
		585
		586	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
		587	u = i < 0 ? -i : i;
		588
		589	// convert to 4.28 fixed-point representation
		590	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
		591
		592	// now output digit by digit, each time masking out the integer part
		593	// and multiplying by 5 while moving the decimal point one to the right,
		594	// resulting in a net multiplication by 10.
		595	// we always write the digit to memory but conditionally increment
		596	// the pointer, to ease the usage of conditional move instructions.
		597	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffff) 5;
		598	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffff) 5;
		599	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffff) 5;
		600	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffff) 5;
		601	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
		602	}
394	}	603	}
395	else if (SvROK (sv))	604	else if (SvROK (sv))
396	{	605	encode_rv (enc, SvRV (sv));
397	if (!--enc->max_recurse)
398	croak ("data structure too deep (hit recursion limit)");
399
400	sv = SvRV (sv);
401
402	switch (SvTYPE (sv))
403	{
404	case SVt_PVAV: encode_av (enc, (AV *)sv); break;
405	case SVt_PVHV: encode_hv (enc, (HV *)sv); break;
406
407	default:
408	croak ("JSON can only represent references to arrays or hashes");
409	}
410	}
411	else if (!SvOK (sv))	606	else if (!SvOK (sv))
412	encode_str (enc, "null", 4, 0);	607	encode_str (enc, "null", 4, 0);
413	else	608	else
414	croak ("encountered perl type that JSON cannot handle");	609	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
		610	SvPV_nolen (sv), SvFLAGS (sv));
415	}	611	}
416		612
417	static SV *	613	static SV *
418	encode_json (SV *scalar, UV flags)	614	encode_json (SV scalar, JSON json)
419	{	615	{
420	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
421	croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)");
422
423	enc_t enc;	616	enc_t enc;
424	enc.flags = flags;	617
		618	if (!(json->flags & F_ALLOW_NONREF) && !SvROK (scalar))
		619	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
		620
		621	enc.json = *json;
425	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	622	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
426	enc.cur = SvPVX (enc.sv);	623	enc.cur = SvPVX (enc.sv);
427	enc.end = SvEND (enc.sv);	624	enc.end = SvEND (enc.sv);
428	enc.max_recurse = 0;
429	enc.indent = 0;	625	enc.indent = 0;
		626	enc.maxdepth = DEC_DEPTH (enc.json.flags);
430		627
431	SvPOK_only (enc.sv);	628	SvPOK_only (enc.sv);
432	encode_sv (&enc, scalar);	629	encode_sv (&enc, scalar);
433		630
		631	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
		632	*SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
		633
434	if (!(flags & (F_ASCII \| F_UTF8)))	634	if (!(enc.json.flags & (F_ASCII \| F_LATIN1 \| F_UTF8)))
435	SvUTF8_on (enc.sv);	635	SvUTF8_on (enc.sv);
436		636
437	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
438
439	if (enc.flags & F_SHRINK)	637	if (enc.json.flags & F_SHRINK)
440	shrink (enc.sv);	638	shrink (enc.sv);
441		639
442	return enc.sv;	640	return enc.sv;
443	}	641	}
444		642
445	/////////////////////////////////////////////////////////////////////////////	643	/////////////////////////////////////////////////////////////////////////////
		644	// decoder
446		645
447	#define WS \	646	// structure used for decoding JSON
		647	typedef struct
		648	{
		649	char *cur; // current parser pointer
		650	char *end; // end of input string
		651	const char *err; // parse error, if != 0
		652	JSON json;
		653	U32 depth; // recursion depth
		654	U32 maxdepth; // recursion depth limit
		655	} dec_t;
		656
		657	inline void
		658	decode_ws (dec_t *dec)
		659	{
448	for (;;) \	660	for (;;)
449	{ \	661	{
450	char ch = *dec->cur; \	662	char ch = *dec->cur;
		663
451	if (ch > 0x20 \	664	if (ch > 0x20
452	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \	665	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
453	break; \	666	break;
		667
454	++dec->cur; \	668	++dec->cur;
455	}	669	}
		670	}
456		671
457	#define ERR(reason) SB dec->err = reason; goto fail; SE	672	#define ERR(reason) SB dec->err = reason; goto fail; SE
		673
458	#define EXPECT_CH(ch) SB \	674	#define EXPECT_CH(ch) SB \
459	if (*dec->cur != ch) \	675	if (*dec->cur != ch) \
460	ERR (# ch " expected"); \	676	ERR (# ch " expected"); \
461	++dec->cur; \	677	++dec->cur; \
462	SE	678	SE
463		679
		680	#define DEC_INC_DEPTH if (++dec->depth > dec->maxdepth) ERR ("json datastructure exceeds maximum nesting level (set a higher max_depth)")
		681	#define DEC_DEC_DEPTH --dec->depth
		682
464	static SV decode_sv (dec_t dec);	683	static SV decode_sv (dec_t dec);
465		684
466	static signed char decode_hexdigit[256];	685	static signed char decode_hexdigit[256];
467		686
468	static UV	687	static UV
469	decode_4hex (dec_t *dec)	688	decode_4hex (dec_t *dec)
470	{	689	{
471	signed char d1, d2, d3, d4;	690	signed char d1, d2, d3, d4;
		691	unsigned char cur = (unsigned char )dec->cur;
472		692
473	d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];	693	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected");
474	if (d1 < 0) ERR ("four hexadecimal digits expected");	694	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected");
475	d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];	695	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected");
476	if (d2 < 0) ERR ("four hexadecimal digits expected");	696	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected");
477	d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
478	if (d3 < 0) ERR ("four hexadecimal digits expected");
479	d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
480	if (d4 < 0) ERR ("four hexadecimal digits expected");
481		697
482	dec->cur += 4;	698	dec->cur += 4;
483		699
484	return ((UV)d1) << 12	700	return ((UV)d1) << 12
485	\| ((UV)d2) << 8	701	\| ((UV)d2) << 8
…		…
488		704
489	fail:	705	fail:
490	return (UV)-1;	706	return (UV)-1;
491	}	707	}
492		708
493	#define APPEND_GROW(n) SB \
494	if (cur + (n) >= end) \
495	{ \
496	STRLEN ofs = cur - SvPVX (sv); \
497	SvGROW (sv, ofs + (n) + 1); \
498	cur = SvPVX (sv) + ofs; \
499	end = SvEND (sv); \
500	} \
501	SE
502
503	#define APPEND_CH(ch) SB \
504	APPEND_GROW (1); \
505	*cur++ = (ch); \
506	SE
507
508	static SV *	709	static SV *
509	decode_str (dec_t *dec)	710	decode_str (dec_t *dec)
510	{	711	{
511	SV *sv = NEWSV (0,2);	712	SV *sv = 0;
512	int utf8 = 0;	713	int utf8 = 0;
513	char *cur = SvPVX (sv);	714	char *dec_cur = dec->cur;
514	char *end = SvEND (sv);
515		715
516	for (;;)	716	do
517	{	717	{
518	unsigned char ch = (unsigned char )dec->cur;	718	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
		719	char *cur = buf;
519		720
520	if (ch == '"')	721	do
521	break;
522	else if (ch == '\\')
523	{	722	{
524	switch (*++dec->cur)	723	unsigned char ch = (unsigned char )dec_cur++;
		724
		725	if (expect_false (ch == '"'))
		726	{
		727	--dec_cur;
		728	break;
525	{	729	}
526	case '\\':	730	else if (expect_false (ch == '\\'))
527	case '/':	731	{
528	case '"': APPEND_CH (*dec->cur++); break;	732	switch (*dec_cur)
529
530	case 'b': APPEND_CH ('\010'); ++dec->cur; break;
531	case 't': APPEND_CH ('\011'); ++dec->cur; break;
532	case 'n': APPEND_CH ('\012'); ++dec->cur; break;
533	case 'f': APPEND_CH ('\014'); ++dec->cur; break;
534	case 'r': APPEND_CH ('\015'); ++dec->cur; break;
535
536	case 'u':
537	{	733	{
538	UV lo, hi;	734	case '\\':
539	++dec->cur;	735	case '/':
		736	case '"': cur++ = dec_cur++; break;
540		737
541	hi = decode_4hex (dec);	738	case 'b': ++dec_cur; *cur++ = '\010'; break;
542	if (hi == (UV)-1)	739	case 't': ++dec_cur; *cur++ = '\011'; break;
543	goto fail;	740	case 'n': ++dec_cur; *cur++ = '\012'; break;
		741	case 'f': ++dec_cur; *cur++ = '\014'; break;
		742	case 'r': ++dec_cur; *cur++ = '\015'; break;
544		743
545	// possibly a surrogate pair	744	case 'u':
546	if (hi >= 0xd800 && hi < 0xdc00)
547	{	745	{
548	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	746	UV lo, hi;
549	ERR ("missing low surrogate character in surrogate pair");	747	++dec_cur;
550		748
551	dec->cur += 2;	749	dec->cur = dec_cur;
552
553	lo = decode_4hex (dec);	750	hi = decode_4hex (dec);
		751	dec_cur = dec->cur;
554	if (lo == (UV)-1)	752	if (hi == (UV)-1)
555	goto fail;	753	goto fail;
556		754
		755	// possibly a surrogate pair
		756	if (hi >= 0xd800)
		757	if (hi < 0xdc00)
		758	{
		759	if (dec_cur [0] != '\\' \|\| dec_cur [1] != 'u')
		760	ERR ("missing low surrogate character in surrogate pair");
		761
		762	dec_cur += 2;
		763
		764	dec->cur = dec_cur;
		765	lo = decode_4hex (dec);
		766	dec_cur = dec->cur;
		767	if (lo == (UV)-1)
		768	goto fail;
		769
557	if (lo < 0xdc00 \|\| lo >= 0xe000)	770	if (lo < 0xdc00 \|\| lo >= 0xe000)
558	ERR ("surrogate pair expected");	771	ERR ("surrogate pair expected");
559		772
560	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;	773	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
		774	}
		775	else if (hi < 0xe000)
		776	ERR ("missing high surrogate character in surrogate pair");
		777
		778	if (hi >= 0x80)
		779	{
		780	utf8 = 1;
		781
		782	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
		783	}
		784	else
		785	*cur++ = hi;
561	}	786	}
562	else if (hi >= 0xdc00 && hi < 0xe000)
563	ERR ("missing high surrogate character in surrogate pair");
564
565	if (hi >= 0x80)
566	{	787	break;
567	utf8 = 1;
568		788
569	APPEND_GROW (4); // at most 4 bytes for 21 bits
570	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
571	}
572	else	789	default:
573	APPEND_CH (hi);	790	--dec_cur;
		791	ERR ("illegal backslash escape sequence in string");
574	}	792	}
575	break;	793	}
		794	else if (expect_true (ch >= 0x20 && ch <= 0x7f))
		795	*cur++ = ch;
		796	else if (ch >= 0x80)
		797	{
		798	STRLEN clen;
		799	UV uch;
576		800
577	default:
578	--dec->cur;	801	--dec_cur;
579	ERR ("illegal backslash escape sequence in string");	802
		803	uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
		804	if (clen == (STRLEN)-1)
		805	ERR ("malformed UTF-8 character in JSON string");
		806
		807	do
		808	cur++ = dec_cur++;
		809	while (--clen);
		810
		811	utf8 = 1;
		812	}
		813	else
		814	{
		815	--dec_cur;
		816
		817	if (!ch)
		818	ERR ("unexpected end of string while parsing JSON string");
		819	else
		820	ERR ("invalid character encountered while parsing JSON string");
580	}	821	}
581	}	822	}
582	else if (ch >= 0x20 && ch <= 0x7f)	823	while (cur < buf + SHORT_STRING_LEN);
583	APPEND_CH (*dec->cur++);	824
584	else if (ch >= 0x80)
585	{	825	{
586	STRLEN clen;	826	STRLEN len = cur - buf;
587	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
588	if (clen == (STRLEN)-1)
589	ERR ("malformed UTF-8 character in JSON string");
590		827
591	APPEND_GROW (clen);	828	if (sv)
592	do
593	{	829	{
594	cur++ = dec->cur++;	830	SvGROW (sv, SvCUR (sv) + len + 1);
		831	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
		832	SvCUR_set (sv, SvCUR (sv) + len);
595	}	833	}
596	while (--clen);
597
598	utf8 = 1;
599	}
600	else if (dec->cur == dec->end)
601	ERR ("unexpected end of string while parsing json string");
602	else	834	else
603	ERR ("invalid character encountered");	835	sv = newSVpvn (buf, len);
604	}	836	}
		837	}
		838	while (*dec_cur != '"');
605		839
606	++dec->cur;	840	++dec_cur;
607		841
608	SvCUR_set (sv, cur - SvPVX (sv));	842	if (sv)
609		843	{
610	SvPOK_only (sv);	844	SvPOK_only (sv);
611	*SvEND (sv) = 0;	845	*SvEND (sv) = 0;
612		846
613	if (utf8)	847	if (utf8)
614	SvUTF8_on (sv);	848	SvUTF8_on (sv);
		849	}
		850	else
		851	sv = newSVpvn ("", 0);
615		852
616	if (dec->flags & F_SHRINK)	853	dec->cur = dec_cur;
617	shrink (sv);
618
619	return sv;	854	return sv;
620		855
621	fail:	856	fail:
622	SvREFCNT_dec (sv);	857	dec->cur = dec_cur;
623	return 0;	858	return 0;
624	}	859	}
625		860
626	static SV *	861	static SV *
627	decode_num (dec_t *dec)	862	decode_num (dec_t *dec)
…		…
685	is_nv = 1;	920	is_nv = 1;
686	}	921	}
687		922
688	if (!is_nv)	923	if (!is_nv)
689	{	924	{
690	UV uv;	925	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so
691	int numtype = grok_number (start, dec->cur - start, &uv);	926	if (*start == '-')
692	if (numtype & IS_NUMBER_IN_UV)	927	switch (dec->cur - start)
693	if (numtype & IS_NUMBER_NEG)
694	{	928	{
695	if (uv < (UV)IV_MIN)	929	case 2: return newSViv (-( start [1] - '0' * 1));
696	return newSViv (-(IV)uv);	930	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
		931	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
		932	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
697	}	933	}
		934	else
		935	switch (dec->cur - start)
		936	{
		937	case 1: return newSViv ( start [0] - '0' * 1);
		938	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
		939	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
		940	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		941	}
		942
		943	{
		944	UV uv;
		945	int numtype = grok_number (start, dec->cur - start, &uv);
		946	if (numtype & IS_NUMBER_IN_UV)
		947	if (numtype & IS_NUMBER_NEG)
		948	{
		949	if (uv < (UV)IV_MIN)
		950	return newSViv (-(IV)uv);
		951	}
698	else	952	else
699	return newSVuv (uv);	953	return newSVuv (uv);
		954
		955	// here would likely be the place for bigint support
700	}	956	}
		957	}
701		958
		959	// if we ever support bigint or bigfloat, this is the place for bigfloat
702	return newSVnv (Atof (start));	960	return newSVnv (Atof (start));
703		961
704	fail:	962	fail:
705	return 0;	963	return 0;
706	}	964	}
…		…
708	static SV *	966	static SV *
709	decode_av (dec_t *dec)	967	decode_av (dec_t *dec)
710	{	968	{
711	AV *av = newAV ();	969	AV *av = newAV ();
712		970
713	WS;	971	DEC_INC_DEPTH;
		972	decode_ws (dec);
		973
714	if (*dec->cur == ']')	974	if (*dec->cur == ']')
715	++dec->cur;	975	++dec->cur;
716	else	976	else
717	for (;;)	977	for (;;)
718	{	978	{
…		…
722	if (!value)	982	if (!value)
723	goto fail;	983	goto fail;
724		984
725	av_push (av, value);	985	av_push (av, value);
726		986
727	WS;	987	decode_ws (dec);
728		988
729	if (*dec->cur == ']')	989	if (*dec->cur == ']')
730	{	990	{
731	++dec->cur;	991	++dec->cur;
732	break;	992	break;
…		…
736	ERR (", or ] expected while parsing array");	996	ERR (", or ] expected while parsing array");
737		997
738	++dec->cur;	998	++dec->cur;
739	}	999	}
740		1000
		1001	DEC_DEC_DEPTH;
741	return newRV_noinc ((SV *)av);	1002	return newRV_noinc ((SV *)av);
742		1003
743	fail:	1004	fail:
744	SvREFCNT_dec (av);	1005	SvREFCNT_dec (av);
		1006	DEC_DEC_DEPTH;
745	return 0;	1007	return 0;
746	}	1008	}
747		1009
748	static SV *	1010	static SV *
749	decode_hv (dec_t *dec)	1011	decode_hv (dec_t *dec)
750	{	1012	{
751	HV *hv = newHV ();	1013	HV *hv = newHV ();
752		1014
753	WS;	1015	DEC_INC_DEPTH;
		1016	decode_ws (dec);
		1017
754	if (*dec->cur == '}')	1018	if (*dec->cur == '}')
755	++dec->cur;	1019	++dec->cur;
756	else	1020	else
757	for (;;)	1021	for (;;)
758	{	1022	{
		1023	decode_ws (dec); EXPECT_CH ('"');
		1024
		1025	// heuristic: assume that
		1026	// a) decode_str + hv_store_ent are abysmally slow.
		1027	// b) most hash keys are short, simple ascii text.
		1028	// => try to "fast-match" such strings to avoid
		1029	// the overhead of decode_str + hv_store_ent.
		1030	{
759	SV key, value;	1031	SV *value;
		1032	char *p = dec->cur;
		1033	char *e = p + 24; // only try up to 24 bytes
760		1034
761	WS; EXPECT_CH ('"');	1035	for (;;)
762
763	key = decode_str (dec);
764	if (!key)
765	goto fail;
766
767	WS; EXPECT_CH (':');
768
769	value = decode_sv (dec);
770	if (!value)
771	{	1036	{
		1037	// the >= 0x80 is true on most architectures
		1038	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')
		1039	{
		1040	// slow path, back up and use decode_str
		1041	SV *key = decode_str (dec);
		1042	if (!key)
		1043	goto fail;
		1044
		1045	decode_ws (dec); EXPECT_CH (':');
		1046
		1047	value = decode_sv (dec);
		1048	if (!value)
		1049	{
		1050	SvREFCNT_dec (key);
		1051	goto fail;
		1052	}
		1053
		1054	hv_store_ent (hv, key, value, 0);
772	SvREFCNT_dec (key);	1055	SvREFCNT_dec (key);
		1056
		1057	break;
		1058	}
		1059	else if (*p == '"')
		1060	{
		1061	// fast path, got a simple key
		1062	char *key = dec->cur;
		1063	int len = p - key;
		1064	dec->cur = p + 1;
		1065
		1066	decode_ws (dec); EXPECT_CH (':');
		1067
		1068	value = decode_sv (dec);
		1069	if (!value)
773	goto fail;	1070	goto fail;
		1071
		1072	hv_store (hv, key, len, value, 0);
		1073
		1074	break;
		1075	}
		1076
		1077	++p;
774	}	1078	}
775
776	//TODO: optimise
777	hv_store_ent (hv, key, value, 0);
778
779	WS;	1079	}
		1080
		1081	decode_ws (dec);
780		1082
781	if (*dec->cur == '}')	1083	if (*dec->cur == '}')
782	{	1084	{
783	++dec->cur;	1085	++dec->cur;
784	break;	1086	break;
…		…
788	ERR (", or } expected while parsing object/hash");	1090	ERR (", or } expected while parsing object/hash");
789		1091
790	++dec->cur;	1092	++dec->cur;
791	}	1093	}
792		1094
		1095	DEC_DEC_DEPTH;
793	return newRV_noinc ((SV *)hv);	1096	return newRV_noinc ((SV *)hv);
794		1097
795	fail:	1098	fail:
796	SvREFCNT_dec (hv);	1099	SvREFCNT_dec (hv);
		1100	DEC_DEC_DEPTH;
797	return 0;	1101	return 0;
798	}	1102	}
799		1103
800	static SV *	1104	static SV *
801	decode_sv (dec_t *dec)	1105	decode_sv (dec_t *dec)
802	{	1106	{
803	WS;	1107	decode_ws (dec);
		1108
		1109	// the beauty of JSON: you need exactly one character lookahead
		1110	// to parse anything.
804	switch (*dec->cur)	1111	switch (*dec->cur)
805	{	1112	{
806	case '"': ++dec->cur; return decode_str (dec);	1113	case '"': ++dec->cur; return decode_str (dec);
807	case '[': ++dec->cur; return decode_av (dec);	1114	case '[': ++dec->cur; return decode_av (dec);
808	case '{': ++dec->cur; return decode_hv (dec);	1115	case '{': ++dec->cur; return decode_hv (dec);
…		…
814		1121
815	case 't':	1122	case 't':
816	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))	1123	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
817	{	1124	{
818	dec->cur += 4;	1125	dec->cur += 4;
819	return newSViv (1);	1126	return SvREFCNT_inc (json_true);
820	}	1127	}
821	else	1128	else
822	ERR ("'true' expected");	1129	ERR ("'true' expected");
823		1130
824	break;	1131	break;
825		1132
826	case 'f':	1133	case 'f':
827	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))	1134	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
828	{	1135	{
829	dec->cur += 5;	1136	dec->cur += 5;
830	return newSViv (0);	1137	return SvREFCNT_inc (json_false);
831	}	1138	}
832	else	1139	else
833	ERR ("'false' expected");	1140	ERR ("'false' expected");
834		1141
835	break;	1142	break;
…		…
844	ERR ("'null' expected");	1151	ERR ("'null' expected");
845		1152
846	break;	1153	break;
847		1154
848	default:	1155	default:
849	ERR ("malformed json string, neither array, object, number, string or atom");	1156	ERR ("malformed JSON string, neither array, object, number, string or atom");
850	break;	1157	break;
851	}	1158	}
852		1159
853	fail:	1160	fail:
854	return 0;	1161	return 0;
855	}	1162	}
856		1163
857	static SV *	1164	static SV *
858	decode_json (SV *string, UV flags)	1165	decode_json (SV string, JSON json, UV *offset_return)
859	{	1166	{
		1167	dec_t dec;
		1168	UV offset;
860	SV *sv;	1169	SV *sv;
861		1170
		1171	SvGETMAGIC (string);
		1172	SvUPGRADE (string, SVt_PV);
		1173
		1174	if (json->flags & F_MAXSIZE && SvCUR (string) > DEC_SIZE (json->flags))
		1175	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
		1176	(unsigned long)SvCUR (string), (unsigned long)DEC_SIZE (json->flags));
		1177
862	if (flags & F_UTF8)	1178	if (json->flags & F_UTF8)
863	sv_utf8_downgrade (string, 0);	1179	sv_utf8_downgrade (string, 0);
864	else	1180	else
865	sv_utf8_upgrade (string);	1181	sv_utf8_upgrade (string);
866		1182
867	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP	1183	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
868		1184
869	dec_t dec;	1185	dec.json = *json;
870	dec.flags = flags;
871	dec.cur = SvPVX (string);	1186	dec.cur = SvPVX (string);
872	dec.end = SvEND (string);	1187	dec.end = SvEND (string);
873	dec.err = 0;	1188	dec.err = 0;
		1189	dec.depth = 0;
		1190	dec.maxdepth = DEC_DEPTH (dec.json.flags);
874		1191
		1192	*dec.end = 0; // this should basically be a nop, too, but make sure it's there
875	sv = decode_sv (&dec);	1193	sv = decode_sv (&dec);
876		1194
		1195	if (!(offset_return \|\| !sv))
		1196	{
		1197	// check for trailing garbage
		1198	decode_ws (&dec);
		1199
		1200	if (*dec.cur)
		1201	{
		1202	dec.err = "garbage after JSON object";
		1203	SvREFCNT_dec (sv);
		1204	sv = 0;
		1205	}
		1206	}
		1207
		1208	if (offset_return \|\| !sv)
		1209	{
		1210	offset = dec.json.flags & F_UTF8
		1211	? dec.cur - SvPVX (string)
		1212	: utf8_distance (dec.cur, SvPVX (string));
		1213
		1214	if (offset_return)
		1215	*offset_return = offset;
		1216	}
		1217
877	if (!sv)	1218	if (!sv)
878	{	1219	{
879	IV offset = dec.flags & F_UTF8
880	? dec.cur - SvPVX (string)
881	: utf8_distance (dec.cur, SvPVX (string));
882	SV *uni = sv_newmortal ();	1220	SV *uni = sv_newmortal ();
		1221
883	// horrible hack to silence warning inside pv_uni_display	1222	// horrible hack to silence warning inside pv_uni_display
884	COP cop;	1223	COP cop = *PL_curcop;
885	memset (&cop, 0, sizeof (cop));
886	cop.cop_warnings = pWARN_NONE;	1224	cop.cop_warnings = pWARN_NONE;
		1225	ENTER;
887	SAVEVPTR (PL_curcop);	1226	SAVEVPTR (PL_curcop);
888	PL_curcop = &cop;	1227	PL_curcop = &cop;
889
890	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);	1228	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
		1229	LEAVE;
		1230
891	croak ("%s, at character offset %d (%s)",	1231	croak ("%s, at character offset %d [\"%s\"]",
892	dec.err,	1232	dec.err,
893	(int)offset,	1233	(int)offset,
894	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");	1234	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
895	}	1235	}
896		1236
897	sv = sv_2mortal (sv);	1237	sv = sv_2mortal (sv);
898		1238
899	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))	1239	if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))
900	croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)");	1240	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
901		1241
902	return sv;	1242	return sv;
903	}	1243	}
904		1244
		1245	/////////////////////////////////////////////////////////////////////////////
		1246	// XS interface functions
		1247
905	MODULE = JSON::XS PACKAGE = JSON::XS	1248	MODULE = JSON::XS PACKAGE = JSON::XS
906		1249
907	BOOT:	1250	BOOT:
908	{	1251	{
909	int i;	1252	int i;
910		1253
911	memset (decode_hexdigit, 0xff, 256);
912	for (i = 10; i--; )	1254	for (i = 0; i < 256; ++i)
913	decode_hexdigit ['0' + i] = i;	1255	decode_hexdigit [i] =
		1256	i >= '0' && i <= '9' ? i - '0'
		1257	: i >= 'a' && i <= 'f' ? i - 'a' + 10
		1258	: i >= 'A' && i <= 'F' ? i - 'A' + 10
		1259	: -1;
914		1260
915	for (i = 7; i--; )
916	{
917	decode_hexdigit ['a' + i] = 10 + i;
918	decode_hexdigit ['A' + i] = 10 + i;
919	}
920
921	json_stash = gv_stashpv ("JSON::XS", 1);	1261	json_stash = gv_stashpv ("JSON::XS" , 1);
		1262	json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1);
		1263
		1264	json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true );
		1265	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);
922	}	1266	}
923		1267
924	PROTOTYPES: DISABLE	1268	PROTOTYPES: DISABLE
925		1269
926	SV new (char dummy)	1270	void new (char *klass)
927	CODE:	1271	PPCODE:
928	RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);	1272	{
929	OUTPUT:	1273	SV *pv = NEWSV (0, sizeof (JSON));
930	RETVAL	1274	SvPOK_only (pv);
		1275	Zero (SvPVX (pv), 1, sizeof (JSON));
		1276	((JSON *)SvPVX (pv))->flags = F_DEFAULT;
		1277	XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), json_stash)));
		1278	}
931		1279
932	SV ascii (SV self, int enable = 1)	1280	void ascii (JSON *self, int enable = 1)
933	ALIAS:	1281	ALIAS:
934	ascii = F_ASCII	1282	ascii = F_ASCII
		1283	latin1 = F_LATIN1
935	utf8 = F_UTF8	1284	utf8 = F_UTF8
936	indent = F_INDENT	1285	indent = F_INDENT
937	canonical = F_CANONICAL	1286	canonical = F_CANONICAL
938	space_before = F_SPACE_BEFORE	1287	space_before = F_SPACE_BEFORE
939	space_after = F_SPACE_AFTER	1288	space_after = F_SPACE_AFTER
940	json_rpc = F_JSON_RPC
941	pretty = F_PRETTY	1289	pretty = F_PRETTY
942	allow_nonref = F_ALLOW_NONREF	1290	allow_nonref = F_ALLOW_NONREF
943	shrink = F_SHRINK	1291	shrink = F_SHRINK
		1292	allow_blessed = F_ALLOW_BLESSED
		1293	convert_blessed = F_CONV_BLESSED
944	CODE:	1294	PPCODE:
945	{	1295	{
946	UV *uv = SvJSON (self);
947	if (enable)	1296	if (enable)
948	*uv \|= ix;	1297	self->flags \|= ix;
949	else	1298	else
950	*uv &= ~ix;	1299	self->flags &= ~ix;
951		1300
952	RETVAL = newSVsv (self);	1301	XPUSHs (ST (0));
953	}	1302	}
954	OUTPUT:
955	RETVAL
956		1303
957	void encode (SV self, SV scalar)	1304	void max_depth (JSON *self, UV max_depth = 0x80000000UL)
958	PPCODE:	1305	PPCODE:
959	XPUSHs (encode_json (scalar, *SvJSON (self)));	1306	{
		1307	UV log2 = 0;
960		1308
961	void decode (SV self, SV jsonstr)	1309	if (max_depth > 0x80000000UL) max_depth = 0x80000000UL;
		1310
		1311	while ((1UL << log2) < max_depth)
		1312	++log2;
		1313
		1314	self->flags = self->flags & ~F_MAXDEPTH \| (log2 << S_MAXDEPTH);
		1315
		1316	XPUSHs (ST (0));
		1317	}
		1318
		1319	void max_size (JSON *self, UV max_size = 0)
962	PPCODE:	1320	PPCODE:
		1321	{
		1322	UV log2 = 0;
		1323
		1324	if (max_size > 0x80000000UL) max_size = 0x80000000UL;
		1325	if (max_size == 1) max_size = 2;
		1326
		1327	while ((1UL << log2) < max_size)
		1328	++log2;
		1329
		1330	self->flags = self->flags & ~F_MAXSIZE \| (log2 << S_MAXSIZE);
		1331
		1332	XPUSHs (ST (0));
		1333	}
		1334
		1335	void encode (JSON self, SV scalar)
		1336	PPCODE:
		1337	XPUSHs (encode_json (scalar, self));
		1338
		1339	void decode (JSON self, SV jsonstr)
		1340	PPCODE:
963	XPUSHs (decode_json (jsonstr, *SvJSON (self)));	1341	XPUSHs (decode_json (jsonstr, self, 0));
		1342
		1343	void decode_prefix (JSON self, SV jsonstr)
		1344	PPCODE:
		1345	{
		1346	UV offset;
		1347	EXTEND (SP, 2);
		1348	PUSHs (decode_json (jsonstr, self, &offset));
		1349	PUSHs (sv_2mortal (newSVuv (offset)));
		1350	}
964		1351
965	PROTOTYPES: ENABLE	1352	PROTOTYPES: ENABLE
966		1353
967	void to_json (SV *scalar)	1354	void to_json (SV *scalar)
968	PPCODE:	1355	PPCODE:
		1356	{
		1357	JSON json = { F_DEFAULT \| F_UTF8 };
969	XPUSHs (encode_json (scalar, F_UTF8));	1358	XPUSHs (encode_json (scalar, &json));
		1359	}
970		1360
971	void from_json (SV *jsonstr)	1361	void from_json (SV *jsonstr)
972	PPCODE:	1362	PPCODE:
		1363	{
		1364	JSON json = { F_DEFAULT \| F_UTF8 };
973	XPUSHs (decode_json (jsonstr, F_UTF8));	1365	XPUSHs (decode_json (jsonstr, &json, 0));
		1366	}
974		1367

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC vs. Revision 1.48 by root, Sun Jul 1 22:20:00 2007 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC vs.
Revision 1.48 by root, Sun Jul 1 22:20:00 2007 UTC