[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.3 by root, Thu Mar 22 18:10:29 2007 UTC vs.
Revision 1.77 by root, Tue Mar 25 06:37:38 2008 UTC

…		…
1	#include "EXTERN.h"	1	#include "EXTERN.h"
2	#include "perl.h"	2	#include "perl.h"
3	#include "XSUB.h"	3	#include "XSUB.h"
4		4
5	#include "assert.h"	5	#include <assert.h>
6	#include "string.h"	6	#include <string.h>
7	#include "stdlib.h"	7	#include <stdlib.h>
		8	#include <stdio.h>
		9	#include <limits.h>
		10	#include <float.h>
8		11
		12	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		13	# define snprintf _snprintf // C compilers have this in stdio.h
		14	#endif
		15
		16	// some old perls do not have this, try to make it work, no
		17	// guarentees, though. if it breaks, you get to keep the pieces.
		18	#ifndef UTF8_MAXBYTES
		19	# define UTF8_MAXBYTES 13
		20	#endif
		21
		22	#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2)
		23
9	#define F_ASCII 0x00000001	24	#define F_ASCII 0x00000001UL
		25	#define F_LATIN1 0x00000002UL
10	#define F_UTF8 0x00000002	26	#define F_UTF8 0x00000004UL
11	#define F_INDENT 0x00000004	27	#define F_INDENT 0x00000008UL
12	#define F_CANONICAL 0x00000008	28	#define F_CANONICAL 0x00000010UL
13	#define F_SPACE_BEFORE 0x00000010	29	#define F_SPACE_BEFORE 0x00000020UL
14	#define F_SPACE_AFTER 0x00000020	30	#define F_SPACE_AFTER 0x00000040UL
15	#define F_JSON_RPC 0x00000040
16	#define F_ALLOW_NONREF 0x00000080	31	#define F_ALLOW_NONREF 0x00000100UL
		32	#define F_SHRINK 0x00000200UL
		33	#define F_ALLOW_BLESSED 0x00000400UL
		34	#define F_CONV_BLESSED 0x00000800UL
		35	#define F_RELAXED 0x00001000UL
		36
		37	#define F_MAXDEPTH 0xf8000000UL
		38	#define S_MAXDEPTH 27
		39	#define F_MAXSIZE 0x01f00000UL
		40	#define S_MAXSIZE 20
		41	#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing
		42
		43	#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH))
		44	#define DEC_SIZE(flags) (1UL << ((flags & F_MAXSIZE ) >> S_MAXSIZE ))
17		45
18	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	46	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
19	#define F_DEFAULT 0	47	#define F_DEFAULT (9UL << S_MAXDEPTH)
20		48
21	#define INIT_SIZE 32 // initial scalar size to be allocated	49	#define INIT_SIZE 32 // initial scalar size to be allocated
		50	#define INDENT_STEP 3 // spaces per indentation level
		51
		52	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
22		53
23	#define SB do {	54	#define SB do {
24	#define SE } while (0)	55	#define SE } while (0)
25		56
26	static HV *json_stash;	57	#if __GNUC__ >= 3
		58	# define expect(expr,value) __builtin_expect ((expr), (value))
		59	# define INLINE static inline
		60	#else
		61	# define expect(expr,value) (expr)
		62	# define INLINE static
		63	#endif
		64
		65	#define expect_false(expr) expect ((expr) != 0, 0)
		66	#define expect_true(expr) expect ((expr) != 0, 1)
		67
		68	#define IN_RANGE_INC(type,val,beg,end) \
		69	((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
		70	<= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
		71
		72	#ifdef USE_ITHREADS
		73	# define JSON_SLOW 1
		74	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
		75	#else
		76	# define JSON_SLOW 0
		77	# define JSON_STASH json_stash
		78	#endif
		79
		80	static HV json_stash, json_boolean_stash; // JSON::XS::
		81	static SV json_true, json_false;
		82
		83	enum {
		84	INCR_M_WS = 0, // initial whitespace skipping, must be 0
		85	INCR_M_STR, // inside string
		86	INCR_M_BS, // inside backslash
		87	INCR_M_JSON // outside anything, count nesting
		88	};
		89
		90	#define INCR_DONE(json) (!(json)->incr_nest && (json)->incr_mode == INCR_M_JSON)
		91
		92	typedef struct {
		93	U32 flags;
		94	SV *cb_object;
		95	HV *cb_sk_object;
		96
		97	// for the incremental parser
		98	SV *incr_text; // the source text so far
		99	STRLEN incr_pos; // the current offset into the text
		100	int incr_nest; // {[]}-nesting level
		101	int incr_mode;
		102	} JSON;
		103
		104	/////////////////////////////////////////////////////////////////////////////
		105	// utility functions
		106
		107	INLINE void
		108	shrink (SV *sv)
		109	{
		110	sv_utf8_downgrade (sv, 1);
		111	if (SvLEN (sv) > SvCUR (sv) + 1)
		112	{
		113	#ifdef SvPV_shrink_to_cur
		114	SvPV_shrink_to_cur (sv);
		115	#elif defined (SvPV_renew)
		116	SvPV_renew (sv, SvCUR (sv) + 1);
		117	#endif
		118	}
		119	}
		120
		121	// decode an utf-8 character and return it, or (UV)-1 in
		122	// case of an error.
		123	// we special-case "safe" characters from U+80 .. U+7FF,
		124	// but use the very good perl function to parse anything else.
		125	// note that we never call this function for a ascii codepoints
		126	INLINE UV
		127	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
		128	{
		129	if (expect_true (len >= 2
		130	&& IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
		131	&& IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
		132	{
		133	*clen = 2;
		134	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
		135	}
		136	else
		137	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
		138	}
		139
		140	// likewise for encoding, also never called for ascii codepoints
		141	// this function takes advantage of this fact, although current gccs
		142	// seem to optimise the check for >= 0x80 away anyways
		143	INLINE unsigned char *
		144	encode_utf8 (unsigned char *s, UV ch)
		145	{
		146	if (expect_false (ch < 0x000080))
		147	*s++ = ch;
		148	else if (expect_true (ch < 0x000800))
		149	*s++ = 0xc0 \| ( ch >> 6),
		150	*s++ = 0x80 \| ( ch & 0x3f);
		151	else if ( ch < 0x010000)
		152	*s++ = 0xe0 \| ( ch >> 12),
		153	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		154	*s++ = 0x80 \| ( ch & 0x3f);
		155	else if ( ch < 0x110000)
		156	*s++ = 0xf0 \| ( ch >> 18),
		157	*s++ = 0x80 \| ((ch >> 12) & 0x3f),
		158	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		159	*s++ = 0x80 \| ( ch & 0x3f);
		160
		161	return s;
		162	}
		163
		164	/////////////////////////////////////////////////////////////////////////////
		165	// encoder
27		166
28	// structure used for encoding JSON	167	// structure used for encoding JSON
29	typedef struct	168	typedef struct
30	{	169	{
31	char *cur;	170	char *cur; // SvPVX (sv) + current output position
32	STRLEN len; // SvLEN (sv)
33	char *end; // SvEND (sv)	171	char *end; // SvEND (sv)
34	SV *sv;	172	SV *sv; // result scalar
35	UV flags;	173	JSON json;
36	int max_recurse;	174	U32 indent; // indentation level
37	int indent;	175	U32 maxdepth; // max. indentation/recursion level
		176	UV limit; // escape character values >= this value when encoding
38	} enc_t;	177	} enc_t;
39		178
40	// structure used for decoding JSON	179	INLINE void
41	typedef struct
42	{
43	char *cur;
44	char *end;
45	char *err;
46	UV flags;
47	} dec_t;
48
49	static UV *
50	SvJSON (SV *sv)
51	{
52	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
53	croak ("object is not of type JSON::XS");
54
55	return &SvUVX (SvRV (sv));
56	}
57
58	/////////////////////////////////////////////////////////////////////////////
59
60	static void
61	need (enc_t *enc, STRLEN len)	180	need (enc_t *enc, STRLEN len)
62	{	181	{
63	if (enc->cur + len >= enc->end)	182	if (expect_false (enc->cur + len >= enc->end))
64	{	183	{
65	STRLEN cur = enc->cur - SvPVX (enc->sv);	184	STRLEN cur = enc->cur - SvPVX (enc->sv);
66	SvGROW (enc->sv, cur + len + 1);	185	SvGROW (enc->sv, cur + len + 1);
67	enc->cur = SvPVX (enc->sv) + cur;	186	enc->cur = SvPVX (enc->sv) + cur;
68	enc->end = SvEND (enc->sv);	187	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
69	}	188	}
70	}	189	}
71		190
72	static void	191	INLINE void
73	encode_ch (enc_t *enc, char ch)	192	encode_ch (enc_t *enc, char ch)
74	{	193	{
75	need (enc, 1);	194	need (enc, 1);
76	*enc->cur++ = ch;	195	*enc->cur++ = ch;
77	}	196	}
79	static void	198	static void
80	encode_str (enc_t enc, char str, STRLEN len, int is_utf8)	199	encode_str (enc_t enc, char str, STRLEN len, int is_utf8)
81	{	200	{
82	char *end = str + len;	201	char *end = str + len;
83		202
		203	need (enc, len);
		204
84	while (str < end)	205	while (str < end)
85	{	206	{
86	unsigned char ch = (unsigned char )str;	207	unsigned char ch = (unsigned char )str;
		208
87	if (ch >= 0x20 && ch < 0x80) // most common case	209	if (expect_true (ch >= 0x20 && ch < 0x80)) // most common case
88	{	210	{
		211	if (expect_false (ch == '"')) // but with slow exceptions
		212	{
		213	need (enc, len += 1);
		214	*enc->cur++ = '\\';
		215	*enc->cur++ = '"';
		216	}
		217	else if (expect_false (ch == '\\'))
		218	{
		219	need (enc, len += 1);
		220	*enc->cur++ = '\\';
		221	*enc->cur++ = '\\';
		222	}
		223	else
89	*enc->cur++ = ch;	224	*enc->cur++ = ch;
		225
90	str++;	226	++str;
91	}	227	}
92	else	228	else
93	{	229	{
94	STRLEN clen;	230	switch (ch)
95	UV uch;
96
97	if (is_utf8)
98	{	231	{
99	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);	232	case '\010': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'b'; ++str; break;
100	if (clen < 0)	233	case '\011': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 't'; ++str; break;
101	croak ("malformed UTF-8 character in string, cannot convert to JSON");	234	case '\012': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'n'; ++str; break;
102	}	235	case '\014': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'f'; ++str; break;
103	else	236	case '\015': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'r'; ++str; break;
104	{
105	uch = ch;
106	clen = 1;
107	}
108		237
109	need (enc, len += 6);	238	default:
110
111	if (uch < 0xa0 \|\| enc->flags & F_ASCII)
112	{
113	if (uch > 0xFFFFUL)
114	{	239	{
		240	STRLEN clen;
		241	UV uch;
		242
		243	if (is_utf8)
		244	{
		245	uch = decode_utf8 (str, end - str, &clen);
		246	if (clen == (STRLEN)-1)
		247	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
		248	}
		249	else
		250	{
		251	uch = ch;
115	len += 6;	252	clen = 1;
		253	}
		254
		255	if (uch < 0x80/0x20/ \|\| uch >= enc->limit)
		256	{
		257	if (uch >= 0x10000UL)
		258	{
		259	if (uch >= 0x110000UL)
		260	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		261
116	need (enc, len += 6);	262	need (enc, len += 11);
117	sprintf (enc->cur, "\\u%04x\\u%04x",	263	sprintf (enc->cur, "\\u%04x\\u%04x",
118	(uch - 0x10000) / 0x400 + 0xD800,	264	(int)((uch - 0x10000) / 0x400 + 0xD800),
119	(uch - 0x10000) % 0x400 + 0xDC00);	265	(int)((uch - 0x10000) % 0x400 + 0xDC00));
120	enc->cur += 12;	266	enc->cur += 12;
		267	}
		268	else
		269	{
		270	static char hexdigit [16] = "0123456789abcdef";
		271	need (enc, len += 5);
		272	*enc->cur++ = '\\';
		273	*enc->cur++ = 'u';
		274	*enc->cur++ = hexdigit [ uch >> 12 ];
		275	*enc->cur++ = hexdigit [(uch >> 8) & 15];
		276	*enc->cur++ = hexdigit [(uch >> 4) & 15];
		277	*enc->cur++ = hexdigit [(uch >> 0) & 15];
		278	}
		279
		280	str += clen;
121	}	281	}
		282	else if (enc->json.flags & F_LATIN1)
		283	{
		284	*enc->cur++ = uch;
		285	str += clen;
		286	}
		287	else if (is_utf8)
		288	{
		289	need (enc, len += clen);
		290	do
		291	{
		292	enc->cur++ = str++;
		293	}
		294	while (--clen);
		295	}
122	else	296	else
123	{	297	{
124	sprintf (enc->cur, "\\u%04x", uch);	298	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
125	enc->cur += 6;	299	enc->cur = encode_utf8 (enc->cur, uch);
		300	++str;
		301	}
126	}	302	}
127	}	303	}
128	else if (is_utf8)
129	{
130	memcpy (enc->cur, str, clen);
131	enc->cur += clen;
132	}
133	else
134	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
135
136	str += clen;
137	}	304	}
138		305
139	--len;	306	--len;
140	}	307	}
141	}	308	}
142		309
143	#define INDENT SB \	310	INLINE void
		311	encode_indent (enc_t *enc)
		312	{
144	if (enc->flags & F_INDENT) \	313	if (enc->json.flags & F_INDENT)
145	{ \	314	{
146	int i_; \	315	int spaces = enc->indent * INDENT_STEP;
147	need (enc, enc->indent); \	316
148	for (i_ = enc->indent * 3; i_--; )\	317	need (enc, spaces);
		318	memset (enc->cur, ' ', spaces);
		319	enc->cur += spaces;
		320	}
		321	}
		322
		323	INLINE void
		324	encode_space (enc_t *enc)
		325	{
		326	need (enc, 1);
		327	encode_ch (enc, ' ');
		328	}
		329
		330	INLINE void
		331	encode_nl (enc_t *enc)
		332	{
		333	if (enc->json.flags & F_INDENT)
		334	{
		335	need (enc, 1);
149	encode_ch (enc, ' '); \	336	encode_ch (enc, '\n');
150	} \	337	}
151	SE	338	}
152		339
153	#define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE	340	INLINE void
154	#define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE	341	encode_comma (enc_t *enc)
155	#define COMMA SB \	342	{
156	encode_ch (enc, ','); \	343	encode_ch (enc, ',');
		344
157	if (enc->flags & F_INDENT) \	345	if (enc->json.flags & F_INDENT)
158	NL; \	346	encode_nl (enc);
159	else if (enc->flags & F_SPACE_AFTER) \	347	else if (enc->json.flags & F_SPACE_AFTER)
160	SPACE; \	348	encode_space (enc);
161	SE	349	}
162		350
163	static void encode_sv (enc_t enc, SV sv);	351	static void encode_sv (enc_t enc, SV sv);
164		352
165	static void	353	static void
166	encode_av (enc_t enc, AV av)	354	encode_av (enc_t enc, AV av)
167	{	355	{
168	int i, len = av_len (av);	356	int i, len = av_len (av);
169		357
		358	if (enc->indent >= enc->maxdepth)
		359	croak ("data structure too deep (hit recursion limit)");
		360
170	encode_ch (enc, '['); NL;	361	encode_ch (enc, '[');
171	++enc->indent;	362
		363	if (len >= 0)
		364	{
		365	encode_nl (enc); ++enc->indent;
172		366
173	for (i = 0; i <= len; ++i)	367	for (i = 0; i <= len; ++i)
174	{	368	{
175	INDENT;	369	SV **svp = av_fetch (av, i, 0);
176	encode_sv (enc, *av_fetch (av, i, 0));
177		370
		371	encode_indent (enc);
		372
		373	if (svp)
		374	encode_sv (enc, *svp);
		375	else
		376	encode_str (enc, "null", 4, 0);
		377
178	if (i < len)	378	if (i < len)
179	COMMA;	379	encode_comma (enc);
180	}	380	}
181		381
182	NL;	382	encode_nl (enc); --enc->indent; encode_indent (enc);
183		383	}
184	--enc->indent;	384
185	INDENT; encode_ch (enc, ']');	385	encode_ch (enc, ']');
186	}	386	}
187		387
188	static void	388	static void
189	encode_he (enc_t enc, HE he)	389	encode_hk (enc_t enc, HE he)
190	{	390	{
191	encode_ch (enc, '"');	391	encode_ch (enc, '"');
192		392
193	if (HeKLEN (he) == HEf_SVKEY)	393	if (HeKLEN (he) == HEf_SVKEY)
194	{	394	{
195	SV *sv = HeSVKEY (he);	395	SV *sv = HeSVKEY (he);
196	STRLEN len;	396	STRLEN len;
		397	char *str;
		398
		399	SvGETMAGIC (sv);
197	char *str = SvPV (sv, len);	400	str = SvPV (sv, len);
198		401
199	encode_str (enc, str, len, SvUTF8 (sv));	402	encode_str (enc, str, len, SvUTF8 (sv));
200	}	403	}
201	else	404	else
202	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));	405	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
203		406
204	encode_ch (enc, '"');	407	encode_ch (enc, '"');
205		408
206	if (enc->flags & F_SPACE_BEFORE) SPACE;	409	if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc);
207	encode_ch (enc, ':');	410	encode_ch (enc, ':');
208	if (enc->flags & F_SPACE_AFTER ) SPACE;	411	if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc);
209	encode_sv (enc, HeVAL (he));
210	}	412	}
211		413
212	// compare hash entries, used when all keys are bytestrings	414	// compare hash entries, used when all keys are bytestrings
213	static int	415	static int
214	he_cmp_fast (const void a_, const void b_)	416	he_cmp_fast (const void a_, const void b_)
…		…
219	HE b = (HE **)b_;	421	HE b = (HE **)b_;
220		422
221	STRLEN la = HeKLEN (a);	423	STRLEN la = HeKLEN (a);
222	STRLEN lb = HeKLEN (b);	424	STRLEN lb = HeKLEN (b);
223		425
224	if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))	426	if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la)))
225	cmp = la < lb ? -1 : la == lb ? 0 : 1;	427	cmp = lb - la;
226		428
227	return cmp;	429	return cmp;
228	}	430	}
229		431
230	// compare hash entries, used when some keys are sv's or utf-x	432	// compare hash entries, used when some keys are sv's or utf-x
231	static int	433	static int
232	he_cmp_slow (const void a, const void b)	434	he_cmp_slow (const void a, const void b)
233	{	435	{
234	return sv_cmp (HeSVKEY_force ((HE )a), HeSVKEY_force ((HE **)b));	436	return sv_cmp (HeSVKEY_force ((HE )b), HeSVKEY_force ((HE **)a));
235	}	437	}
236		438
237	static void	439	static void
238	encode_hv (enc_t enc, HV hv)	440	encode_hv (enc_t enc, HV hv)
239	{	441	{
		442	HE *he;
240	int count, i;	443	int count;
241		444
242	encode_ch (enc, '{'); NL; ++enc->indent;	445	if (enc->indent >= enc->maxdepth)
		446	croak ("data structure too deep (hit recursion limit)");
243		447
244	if ((count = hv_iterinit (hv)))	448	encode_ch (enc, '{');
245	{	449
246	// for canonical output we have to sort by keys first	450	// for canonical output we have to sort by keys first
247	// actually, this is mostly due to the stupid so-called	451	// actually, this is mostly due to the stupid so-called
248	// security workaround added somewhere in 5.8.x.	452	// security workaround added somewhere in 5.8.x.
249	// that randomises hash orderings	453	// that randomises hash orderings
250	if (enc->flags & F_CANONICAL)	454	if (enc->json.flags & F_CANONICAL)
		455	{
		456	int count = hv_iterinit (hv);
		457
		458	if (SvMAGICAL (hv))
251	{	459	{
252	HE he, hes [count];	460	// need to count by iterating. could improve by dynamically building the vector below
		461	// but I don't care for the speed of this special case.
		462	// note also that we will run into undefined behaviour when the two iterations
		463	// do not result in the same count, something I might care for in some later release.
		464
		465	count = 0;
		466	while (hv_iternext (hv))
		467	++count;
		468
		469	hv_iterinit (hv);
		470	}
		471
		472	if (count)
		473	{
253	int fast = 1;	474	int i, fast = 1;
		475	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		476	HE *hes = _alloca (count sizeof (HE));
		477	#else
		478	HE *hes [count]; // if your compiler dies here, you need to enable C99 mode
		479	#endif
254		480
255	i = 0;	481	i = 0;
256	while ((he = hv_iternext (hv)))	482	while ((he = hv_iternext (hv)))
257	{	483	{
258	hes [i++] = he;	484	hes [i++] = he;
…		…
264		490
265	if (fast)	491	if (fast)
266	qsort (hes, count, sizeof (HE *), he_cmp_fast);	492	qsort (hes, count, sizeof (HE *), he_cmp_fast);
267	else	493	else
268	{	494	{
269	// hack to disable "use bytes"	495	// hack to forcefully disable "use bytes"
270	COP *oldcop = PL_curcop, cop;	496	COP cop = *PL_curcop;
271	cop.op_private = 0;	497	cop.op_private = 0;
		498
		499	ENTER;
		500	SAVETMPS;
		501
		502	SAVEVPTR (PL_curcop);
272	PL_curcop = &cop;	503	PL_curcop = &cop;
273		504
274	SAVETMPS;
275	qsort (hes, count, sizeof (HE *), he_cmp_slow);	505	qsort (hes, count, sizeof (HE *), he_cmp_slow);
		506
276	FREETMPS;	507	FREETMPS;
277		508	LEAVE;
278	PL_curcop = oldcop;
279	}	509	}
280		510
281	for (i = 0; i < count; ++i)	511	encode_nl (enc); ++enc->indent;
		512
		513	while (count--)
282	{	514	{
283	INDENT;	515	encode_indent (enc);
		516	he = hes [count];
284	encode_he (enc, hes [i]);	517	encode_hk (enc, he);
		518	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
285		519
286	if (i < count - 1)	520	if (count)
287	COMMA;	521	encode_comma (enc);
288	}	522	}
289		523
		524	encode_nl (enc); --enc->indent; encode_indent (enc);
		525	}
		526	}
		527	else
		528	{
		529	if (hv_iterinit (hv) \|\| SvMAGICAL (hv))
		530	if ((he = hv_iternext (hv)))
290	NL;	531	{
		532	encode_nl (enc); ++enc->indent;
		533
		534	for (;;)
		535	{
		536	encode_indent (enc);
		537	encode_hk (enc, he);
		538	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
		539
		540	if (!(he = hv_iternext (hv)))
		541	break;
		542
		543	encode_comma (enc);
		544	}
		545
		546	encode_nl (enc); --enc->indent; encode_indent (enc);
		547	}
		548	}
		549
		550	encode_ch (enc, '}');
		551	}
		552
		553	// encode objects, arrays and special \0=false and \1=true values.
		554	static void
		555	encode_rv (enc_t enc, SV sv)
		556	{
		557	svtype svt;
		558
		559	SvGETMAGIC (sv);
		560	svt = SvTYPE (sv);
		561
		562	if (expect_false (SvOBJECT (sv)))
		563	{
		564	HV *stash = !JSON_SLOW \|\| json_boolean_stash
		565	? json_boolean_stash
		566	: gv_stashpv ("JSON::XS::Boolean", 1);
		567
		568	if (SvSTASH (sv) == stash)
		569	{
		570	if (SvIV (sv))
		571	encode_str (enc, "true", 4, 0);
		572	else
		573	encode_str (enc, "false", 5, 0);
291	}	574	}
292	else	575	else
293	{	576	{
294	SV *sv;	577	#if 0
295	HE *he = hv_iternext (hv);	578	if (0 && sv_derived_from (rv, "JSON::Literal"))
296
297	for (;;)
298	{	579	{
299	INDENT;	580	// not yet
300	encode_he (enc, he);
301
302	if (!(he = hv_iternext (hv)))
303	break;
304
305	COMMA;
306	}	581	}
307		582	#endif
		583	if (enc->json.flags & F_CONV_BLESSED)
308	NL;	584	{
		585	// we re-bless the reference to get overload and other niceties right
		586	GV *to_json = gv_fetchmethod_autoload (SvSTASH (sv), "TO_JSON", 0);
		587
		588	if (to_json)
		589	{
		590	dSP;
		591
		592	ENTER; SAVETMPS; PUSHMARK (SP);
		593	XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv)));
		594
		595	// calling with G_SCALAR ensures that we always get a 1 return value
		596	PUTBACK;
		597	call_sv ((SV *)GvCV (to_json), G_SCALAR);
		598	SPAGAIN;
		599
		600	// catch this surprisingly common error
		601	if (SvROK (TOPs) && SvRV (TOPs) == sv)
		602	croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv)));
		603
		604	sv = POPs;
		605	PUTBACK;
		606
		607	encode_sv (enc, sv);
		608
		609	FREETMPS; LEAVE;
		610	}
		611	else if (enc->json.flags & F_ALLOW_BLESSED)
		612	encode_str (enc, "null", 4, 0);
		613	else
		614	croak ("encountered object '%s', but neither allow_blessed enabled nor TO_JSON method available on it",
		615	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		616	}
		617	else if (enc->json.flags & F_ALLOW_BLESSED)
		618	encode_str (enc, "null", 4, 0);
		619	else
		620	croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled",
		621	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
309	}	622	}
310	}	623	}
		624	else if (svt == SVt_PVHV)
		625	encode_hv (enc, (HV *)sv);
		626	else if (svt == SVt_PVAV)
		627	encode_av (enc, (AV *)sv);
		628	else if (svt < SVt_PVAV)
		629	{
		630	STRLEN len = 0;
		631	char *pv = svt ? SvPV (sv, len) : 0;
311		632
312	--enc->indent; INDENT; encode_ch (enc, '}');	633	if (len == 1 && *pv == '1')
		634	encode_str (enc, "true", 4, 0);
		635	else if (len == 1 && *pv == '0')
		636	encode_str (enc, "false", 5, 0);
		637	else
		638	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
		639	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		640	}
		641	else
		642	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
		643	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
313	}	644	}
314		645
315	static void	646	static void
316	encode_sv (enc_t enc, SV sv)	647	encode_sv (enc_t enc, SV sv)
317	{	648	{
		649	SvGETMAGIC (sv);
		650
318	if (SvPOKp (sv))	651	if (SvPOKp (sv))
319	{	652	{
320	STRLEN len;	653	STRLEN len;
321	char *str = SvPV (sv, len);	654	char *str = SvPV (sv, len);
322	encode_ch (enc, '"');	655	encode_ch (enc, '"');
323	encode_str (enc, str, len, SvUTF8 (sv));	656	encode_str (enc, str, len, SvUTF8 (sv));
324	encode_ch (enc, '"');	657	encode_ch (enc, '"');
325	}	658	}
326	else if (SvNOKp (sv))	659	else if (SvNOKp (sv))
327	{	660	{
		661	// trust that perl will do the right thing w.r.t. JSON syntax.
328	need (enc, NV_DIG + 32);	662	need (enc, NV_DIG + 32);
329	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	663	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
330	enc->cur += strlen (enc->cur);	664	enc->cur += strlen (enc->cur);
331	}	665	}
332	else if (SvIOKp (sv))	666	else if (SvIOKp (sv))
333	{	667	{
		668	// we assume we can always read an IV as a UV and vice versa
		669	// we assume two's complement
		670	// we assume no aliasing issues in the union
		671	if (SvIsUV (sv) ? SvUVX (sv) <= 59000
		672	: SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
		673	{
		674	// optimise the "small number case"
		675	// code will likely be branchless and use only a single multiplication
		676	// works for numbers up to 59074
		677	I32 i = SvIVX (sv);
		678	U32 u;
		679	char digit, nz = 0;
		680
334	need (enc, 64);	681	need (enc, 6);
		682
		683	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
		684	u = i < 0 ? -i : i;
		685
		686	// convert to 4.28 fixed-point representation
		687	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
		688
		689	// now output digit by digit, each time masking out the integer part
		690	// and multiplying by 5 while moving the decimal point one to the right,
		691	// resulting in a net multiplication by 10.
		692	// we always write the digit to memory but conditionally increment
		693	// the pointer, to enable the use of conditional move instructions.
		694	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffffUL) 5;
		695	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffffUL) 5;
		696	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffffUL) 5;
		697	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffffUL) 5;
		698	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
		699	}
		700	else
		701	{
		702	// large integer, use the (rather slow) snprintf way.
		703	need (enc, IVUV_MAXCHARS);
335	enc->cur +=	704	enc->cur +=
336	SvIsUV(sv)	705	SvIsUV(sv)
337	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))	706	? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
338	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));	707	: snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
		708	}
339	}	709	}
340	else if (SvROK (sv))	710	else if (SvROK (sv))
341	{	711	encode_rv (enc, SvRV (sv));
342	if (!--enc->max_recurse)
343	croak ("data structure too deep (hit recursion limit)");
344
345	sv = SvRV (sv);
346
347	switch (SvTYPE (sv))
348	{
349	case SVt_PVAV: encode_av (enc, (AV *)sv); break;
350	case SVt_PVHV: encode_hv (enc, (HV *)sv); break;
351
352	default:
353	croak ("JSON can only represent references to arrays or hashes");
354	}
355	}
356	else if (!SvOK (sv))	712	else if (!SvOK (sv))
357	encode_str (enc, "null", 4, 0);	713	encode_str (enc, "null", 4, 0);
358	else	714	else
359	croak ("encountered perl type that JSON cannot handle");	715	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
		716	SvPV_nolen (sv), SvFLAGS (sv));
360	}	717	}
361		718
362	static SV *	719	static SV *
363	encode_json (SV *scalar, UV flags)	720	encode_json (SV scalar, JSON json)
364	{	721	{
365	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
366	croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)");
367
368	enc_t enc;	722	enc_t enc;
369	enc.flags = flags;	723
		724	if (!(json->flags & F_ALLOW_NONREF) && !SvROK (scalar))
		725	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
		726
		727	enc.json = *json;
370	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	728	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
371	enc.cur = SvPVX (enc.sv);	729	enc.cur = SvPVX (enc.sv);
372	enc.end = SvEND (enc.sv);	730	enc.end = SvEND (enc.sv);
373	enc.max_recurse = 0;
374	enc.indent = 0;	731	enc.indent = 0;
		732	enc.maxdepth = DEC_DEPTH (enc.json.flags);
		733	enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
		734	: enc.json.flags & F_LATIN1 ? 0x000100UL
		735	: 0x110000UL;
375		736
376	SvPOK_only (enc.sv);	737	SvPOK_only (enc.sv);
377	encode_sv (&enc, scalar);	738	encode_sv (&enc, scalar);
378		739
		740	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
		741	*SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
		742
379	if (!(flags & (F_ASCII \| F_UTF8)))	743	if (!(enc.json.flags & (F_ASCII \| F_LATIN1 \| F_UTF8)))
380	SvUTF8_on (enc.sv);	744	SvUTF8_on (enc.sv);
381		745
382	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));	746	if (enc.json.flags & F_SHRINK)
		747	shrink (enc.sv);
		748
383	return enc.sv;	749	return enc.sv;
384	}	750	}
385		751
386	/////////////////////////////////////////////////////////////////////////////	752	/////////////////////////////////////////////////////////////////////////////
		753	// decoder
387		754
388	#define WS \	755	// structure used for decoding JSON
		756	typedef struct
		757	{
		758	char *cur; // current parser pointer
		759	char *end; // end of input string
		760	const char *err; // parse error, if != 0
		761	JSON json;
		762	U32 depth; // recursion depth
		763	U32 maxdepth; // recursion depth limit
		764	} dec_t;
		765
		766	INLINE void
		767	decode_comment (dec_t *dec)
		768	{
		769	// only '#'-style comments allowed a.t.m.
		770
		771	while (dec->cur && dec->cur != 0x0a && *dec->cur != 0x0d)
		772	++dec->cur;
		773	}
		774
		775	INLINE void
		776	decode_ws (dec_t *dec)
		777	{
389	for (;;) \	778	for (;;)
390	{ \	779	{
391	char ch = *dec->cur; \	780	char ch = *dec->cur;
		781
392	if (ch > 0x20 \	782	if (ch > 0x20)
		783	{
		784	if (expect_false (ch == '#'))
		785	{
		786	if (dec->json.flags & F_RELAXED)
		787	decode_comment (dec);
		788	else
		789	break;
		790	}
		791	else
		792	break;
		793	}
393	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \	794	else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)
394	break; \	795	break; // parse error, but let higher level handle it, gives better error messages
		796
395	++dec->cur; \	797	++dec->cur;
396	}	798	}
		799	}
397		800
398	#define ERR(reason) SB dec->err = reason; goto fail; SE	801	#define ERR(reason) SB dec->err = reason; goto fail; SE
		802
399	#define EXPECT_CH(ch) SB \	803	#define EXPECT_CH(ch) SB \
400	if (*dec->cur != ch) \	804	if (*dec->cur != ch) \
401	ERR (# ch " expected"); \	805	ERR (# ch " expected"); \
402	++dec->cur; \	806	++dec->cur; \
403	SE	807	SE
404		808
		809	#define DEC_INC_DEPTH if (++dec->depth > dec->maxdepth) ERR ("json datastructure exceeds maximum nesting level (set a higher max_depth)")
		810	#define DEC_DEC_DEPTH --dec->depth
		811
405	static SV decode_sv (dec_t dec);	812	static SV decode_sv (dec_t dec);
406
407	#define APPEND_CH(ch) SB \
408	SvGROW (sv, cur + 1 + 1); \
409	SvPVX (sv)[cur++] = (ch); \
410	SE
411		813
412	static signed char decode_hexdigit[256];	814	static signed char decode_hexdigit[256];
413		815
414	static UV	816	static UV
415	decode_4hex (dec_t *dec)	817	decode_4hex (dec_t *dec)
416	{	818	{
417	signed char d1, d2, d3, d4;	819	signed char d1, d2, d3, d4;
		820	unsigned char cur = (unsigned char )dec->cur;
418		821
419	d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];	822	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected");
420	if (d1 < 0) ERR ("four hexadecimal digits expected");	823	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected");
421	d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];	824	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected");
422	if (d2 < 0) ERR ("four hexadecimal digits expected");	825	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected");
423	d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
424	if (d3 < 0) ERR ("four hexadecimal digits expected");
425	d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
426	if (d4 < 0) ERR ("four hexadecimal digits expected");
427		826
428	dec->cur += 4;	827	dec->cur += 4;
429		828
430	return ((UV)d1) << 12	829	return ((UV)d1) << 12
431	\| ((UV)d2) << 8	830	\| ((UV)d2) << 8
…		…
437	}	836	}
438		837
439	static SV *	838	static SV *
440	decode_str (dec_t *dec)	839	decode_str (dec_t *dec)
441	{	840	{
442	SV *sv = NEWSV (0,2);	841	SV *sv = 0;
443	STRLEN cur = 0;
444	int utf8 = 0;	842	int utf8 = 0;
		843	char *dec_cur = dec->cur;
445		844
446	for (;;)	845	do
447	{	846	{
448	unsigned char ch = (unsigned char )dec->cur;	847	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
		848	char *cur = buf;
449		849
450	if (ch == '"')	850	do
451	break;
452	else if (ch == '\\')
453	{	851	{
454	switch (*++dec->cur)	852	unsigned char ch = (unsigned char )dec_cur++;
		853
		854	if (expect_false (ch == '"'))
455	{	855	{
456	case '\\':	856	--dec_cur;
457	case '/':	857	break;
458	case '"': APPEND_CH (*dec->cur++); break;	858	}
459		859	else if (expect_false (ch == '\\'))
460	case 'b': APPEND_CH ('\010'); ++dec->cur; break;	860	{
461	case 't': APPEND_CH ('\011'); ++dec->cur; break;	861	switch (*dec_cur)
462	case 'n': APPEND_CH ('\012'); ++dec->cur; break;
463	case 'f': APPEND_CH ('\014'); ++dec->cur; break;
464	case 'r': APPEND_CH ('\015'); ++dec->cur; break;
465
466	case 'u':
467	{	862	{
468	UV lo, hi;	863	case '\\':
469	++dec->cur;	864	case '/':
		865	case '"': cur++ = dec_cur++; break;
470		866
471	hi = decode_4hex (dec);	867	case 'b': ++dec_cur; *cur++ = '\010'; break;
472	if (hi == (UV)-1)	868	case 't': ++dec_cur; *cur++ = '\011'; break;
473	goto fail;	869	case 'n': ++dec_cur; *cur++ = '\012'; break;
		870	case 'f': ++dec_cur; *cur++ = '\014'; break;
		871	case 'r': ++dec_cur; *cur++ = '\015'; break;
474		872
475	// possibly a surrogate pair	873	case 'u':
476	if (hi >= 0xd800 && hi < 0xdc00)
477	{	874	{
478	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	875	UV lo, hi;
479	ERR ("illegal surrogate character");	876	++dec_cur;
480		877
481	dec->cur += 2;	878	dec->cur = dec_cur;
482
483	lo = decode_4hex (dec);	879	hi = decode_4hex (dec);
		880	dec_cur = dec->cur;
484	if (lo == (UV)-1)	881	if (hi == (UV)-1)
485	goto fail;	882	goto fail;
486		883
		884	// possibly a surrogate pair
		885	if (hi >= 0xd800)
		886	if (hi < 0xdc00)
		887	{
		888	if (dec_cur [0] != '\\' \|\| dec_cur [1] != 'u')
		889	ERR ("missing low surrogate character in surrogate pair");
		890
		891	dec_cur += 2;
		892
		893	dec->cur = dec_cur;
		894	lo = decode_4hex (dec);
		895	dec_cur = dec->cur;
		896	if (lo == (UV)-1)
		897	goto fail;
		898
487	if (lo < 0xdc00 \|\| lo >= 0xe000)	899	if (lo < 0xdc00 \|\| lo >= 0xe000)
488	ERR ("surrogate pair expected");	900	ERR ("surrogate pair expected");
489		901
490	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;	902	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
		903	}
		904	else if (hi < 0xe000)
		905	ERR ("missing high surrogate character in surrogate pair");
		906
		907	if (hi >= 0x80)
		908	{
		909	utf8 = 1;
		910
		911	cur = encode_utf8 (cur, hi);
		912	}
		913	else
		914	*cur++ = hi;
491	}	915	}
492	else if (lo >= 0xdc00 && lo < 0xe000)
493	ERR ("illegal surrogate character");
494
495	if (hi >= 0x80)
496	{	916	break;
497	utf8 = 1;
498		917
499	SvGROW (sv, cur + 4 + 1); // at most 4 bytes for 21 bits
500	cur = (char *)uvuni_to_utf8_flags (SvPVX (sv) + cur, hi, 0) - SvPVX (sv);
501	}
502	else	918	default:
503	APPEND_CH (hi);	919	--dec_cur;
		920	ERR ("illegal backslash escape sequence in string");
504	}	921	}
		922	}
		923	else if (expect_true (ch >= 0x20 && ch < 0x80))
		924	*cur++ = ch;
		925	else if (ch >= 0x80)
		926	{
		927	STRLEN clen;
		928	UV uch;
		929
		930	--dec_cur;
		931
		932	uch = decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
		933	if (clen == (STRLEN)-1)
		934	ERR ("malformed UTF-8 character in JSON string");
		935
		936	do
		937	cur++ = dec_cur++;
		938	while (--clen);
		939
		940	utf8 = 1;
		941	}
		942	else
		943	{
		944	--dec_cur;
		945
		946	if (!ch)
		947	ERR ("unexpected end of string while parsing JSON string");
505	break;	948	else
		949	ERR ("invalid character encountered while parsing JSON string");
506	}	950	}
507	}	951	}
508	else if (ch >= 0x20 && ch <= 0x7f)	952	while (cur < buf + SHORT_STRING_LEN);
509	APPEND_CH (*dec->cur++);	953
510	else if (ch >= 0x80)	954	{
		955	STRLEN len = cur - buf;
		956
		957	if (sv)
511	{	958	{
512	STRLEN clen;	959	SvGROW (sv, SvCUR (sv) + len + 1);
513	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);	960	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
514	if (clen < 0)	961	SvCUR_set (sv, SvCUR (sv) + len);
515	ERR ("malformed UTF-8 character in string, cannot convert to JSON");
516
517	SvGROW (sv, cur + clen + 1); // at most 4 bytes for 21 bits
518	memcpy (SvPVX (sv) + cur, dec->cur, clen);
519	dec->cur += clen;
520	}	962	}
521	else	963	else
522	ERR ("invalid character encountered");	964	sv = newSVpvn (buf, len);
523	}	965	}
		966	}
		967	while (*dec_cur != '"');
524		968
525	++dec->cur;	969	++dec_cur;
526		970
		971	if (sv)
		972	{
527	SvPOK_only (sv);	973	SvPOK_only (sv);
528
529	SvCUR_set (sv, cur);
530	*SvEND (sv) = 0;	974	*SvEND (sv) = 0;
531		975
532	if (utf8)	976	if (utf8)
533	SvUTF8_on (sv);	977	SvUTF8_on (sv);
		978	}
		979	else
		980	sv = newSVpvn ("", 0);
534		981
		982	dec->cur = dec_cur;
535	return sv;	983	return sv;
536		984
537	fail:	985	fail:
538	SvREFCNT_dec (sv);	986	dec->cur = dec_cur;
539	return 0;	987	return 0;
540	}	988	}
541		989
542	static SV *	990	static SV *
543	decode_num (dec_t *dec)	991	decode_num (dec_t *dec)
…		…
553	{	1001	{
554	++dec->cur;	1002	++dec->cur;
555	if (dec->cur >= '0' && dec->cur <= '9')	1003	if (dec->cur >= '0' && dec->cur <= '9')
556	ERR ("malformed number (leading zero must not be followed by another digit)");	1004	ERR ("malformed number (leading zero must not be followed by another digit)");
557	}	1005	}
558		1006	else if (dec->cur < '0' \|\| dec->cur > '9')
559	// int	1007	ERR ("malformed number (no digits after initial minus)");
		1008	else
		1009	do
		1010	{
		1011	++dec->cur;
		1012	}
560	while (dec->cur >= '0' && dec->cur <= '9')	1013	while (dec->cur >= '0' && dec->cur <= '9');
561	++dec->cur;
562		1014
563	// [frac]	1015	// [frac]
564	if (*dec->cur == '.')	1016	if (*dec->cur == '.')
565	{	1017	{
566	is_nv = 1;	1018	++dec->cur;
		1019
		1020	if (dec->cur < '0' \|\| dec->cur > '9')
		1021	ERR ("malformed number (no digits after decimal point)");
567		1022
568	do	1023	do
569	{	1024	{
570	++dec->cur;	1025	++dec->cur;
571	}	1026	}
572	while (dec->cur >= '0' && dec->cur <= '9');	1027	while (dec->cur >= '0' && dec->cur <= '9');
		1028
		1029	is_nv = 1;
573	}	1030	}
574		1031
575	// [exp]	1032	// [exp]
576	if (dec->cur == 'e' \|\| dec->cur == 'E')	1033	if (dec->cur == 'e' \|\| dec->cur == 'E')
577	{	1034	{
578	is_nv = 1;
579
580	++dec->cur;	1035	++dec->cur;
		1036
581	if (dec->cur == '-' \|\| dec->cur == '+')	1037	if (dec->cur == '-' \|\| dec->cur == '+')
582	++dec->cur;	1038	++dec->cur;
583		1039
		1040	if (dec->cur < '0' \|\| dec->cur > '9')
		1041	ERR ("malformed number (no digits after exp sign)");
		1042
		1043	do
		1044	{
		1045	++dec->cur;
		1046	}
584	while (dec->cur >= '0' && dec->cur <= '9')	1047	while (dec->cur >= '0' && dec->cur <= '9');
585	++dec->cur;	1048
		1049	is_nv = 1;
586	}	1050	}
587		1051
588	if (!is_nv)	1052	if (!is_nv)
589	{	1053	{
590	UV uv;	1054	int len = dec->cur - start;
591	int numtype = grok_number (start, dec->cur - start, &uv);	1055
592	if (numtype & IS_NUMBER_IN_UV)	1056	// special case the rather common 1..5-digit-int case
593	if (numtype & IS_NUMBER_NEG)	1057	if (*start == '-')
		1058	switch (len)
594	{	1059	{
595	if (uv < (UV)IV_MIN)	1060	case 2: return newSViv (-( start [1] - '0' * 1));
596	return newSViv (-(IV)uv);	1061	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
		1062	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
		1063	case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
		1064	case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111));
597	}	1065	}
		1066	else
		1067	switch (len)
		1068	{
		1069	case 1: return newSViv ( start [0] - '0' * 1);
		1070	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
		1071	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
		1072	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		1073	case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111);
		1074	}
		1075
		1076	{
		1077	UV uv;
		1078	int numtype = grok_number (start, len, &uv);
		1079	if (numtype & IS_NUMBER_IN_UV)
		1080	if (numtype & IS_NUMBER_NEG)
		1081	{
		1082	if (uv < (UV)IV_MIN)
		1083	return newSViv (-(IV)uv);
		1084	}
598	else	1085	else
599	return newSVuv (uv);	1086	return newSVuv (uv);
600	}	1087	}
601		1088
		1089	len -= *start == '-' ? 1 : 0;
		1090
		1091	// does not fit into IV or UV, try NV
		1092	if ((sizeof (NV) == sizeof (double) && DBL_DIG >= len)
		1093	#if defined (LDBL_DIG)
		1094	\|\| (sizeof (NV) == sizeof (long double) && LDBL_DIG >= len)
		1095	#endif
		1096	)
		1097	// fits into NV without loss of precision
		1098	return newSVnv (Atof (start));
		1099
		1100	// everything else fails, convert it to a string
		1101	return newSVpvn (start, dec->cur - start);
		1102	}
		1103
		1104	// loss of precision here
602	return newSVnv (Atof (start));	1105	return newSVnv (Atof (start));
603		1106
604	fail:	1107	fail:
605	return 0;	1108	return 0;
606	}	1109	}
…		…
608	static SV *	1111	static SV *
609	decode_av (dec_t *dec)	1112	decode_av (dec_t *dec)
610	{	1113	{
611	AV *av = newAV ();	1114	AV *av = newAV ();
612		1115
		1116	DEC_INC_DEPTH;
		1117	decode_ws (dec);
		1118
		1119	if (*dec->cur == ']')
		1120	++dec->cur;
		1121	else
613	for (;;)	1122	for (;;)
614	{	1123	{
615	SV *value;	1124	SV *value;
616		1125
617	value = decode_sv (dec);	1126	value = decode_sv (dec);
618	if (!value)	1127	if (!value)
619	goto fail;	1128	goto fail;
620		1129
621	av_push (av, value);	1130	av_push (av, value);
622		1131
623	WS;	1132	decode_ws (dec);
624		1133
625	if (*dec->cur == ']')	1134	if (*dec->cur == ']')
626	{	1135	{
627	++dec->cur;	1136	++dec->cur;
628	break;	1137	break;
		1138	}
629	}	1139
630
631	if (*dec->cur != ',')	1140	if (*dec->cur != ',')
632	ERR (", or ] expected while parsing array");	1141	ERR (", or ] expected while parsing array");
633		1142
634	++dec->cur;	1143	++dec->cur;
		1144
		1145	decode_ws (dec);
		1146
		1147	if (*dec->cur == ']' && dec->json.flags & F_RELAXED)
		1148	{
		1149	++dec->cur;
		1150	break;
		1151	}
635	}	1152	}
636		1153
		1154	DEC_DEC_DEPTH;
637	return newRV_noinc ((SV *)av);	1155	return newRV_noinc ((SV *)av);
638		1156
639	fail:	1157	fail:
640	SvREFCNT_dec (av);	1158	SvREFCNT_dec (av);
		1159	DEC_DEC_DEPTH;
641	return 0;	1160	return 0;
642	}	1161	}
643		1162
644	static SV *	1163	static SV *
645	decode_hv (dec_t *dec)	1164	decode_hv (dec_t *dec)
646	{	1165	{
		1166	SV *sv;
647	HV *hv = newHV ();	1167	HV *hv = newHV ();
648		1168
		1169	DEC_INC_DEPTH;
		1170	decode_ws (dec);
		1171
		1172	if (*dec->cur == '}')
		1173	++dec->cur;
		1174	else
649	for (;;)	1175	for (;;)
650	{	1176	{
651	SV key, value;
652
653	WS; EXPECT_CH ('"');	1177	EXPECT_CH ('"');
654		1178
655	key = decode_str (dec);	1179	// heuristic: assume that
656	if (!key)	1180	// a) decode_str + hv_store_ent are abysmally slow.
657	goto fail;	1181	// b) most hash keys are short, simple ascii text.
658		1182	// => try to "fast-match" such strings to avoid
659	WS; EXPECT_CH (':');	1183	// the overhead of decode_str + hv_store_ent.
660
661	value = decode_sv (dec);
662	if (!value)
663	{	1184	{
		1185	SV *value;
		1186	char *p = dec->cur;
		1187	char *e = p + 24; // only try up to 24 bytes
		1188
		1189	for (;;)
		1190	{
		1191	// the >= 0x80 is false on most architectures
		1192	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')
		1193	{
		1194	// slow path, back up and use decode_str
		1195	SV *key = decode_str (dec);
		1196	if (!key)
		1197	goto fail;
		1198
		1199	decode_ws (dec); EXPECT_CH (':');
		1200
		1201	decode_ws (dec);
		1202	value = decode_sv (dec);
		1203	if (!value)
		1204	{
		1205	SvREFCNT_dec (key);
		1206	goto fail;
		1207	}
		1208
		1209	hv_store_ent (hv, key, value, 0);
664	SvREFCNT_dec (key);	1210	SvREFCNT_dec (key);
		1211
		1212	break;
		1213	}
		1214	else if (*p == '"')
		1215	{
		1216	// fast path, got a simple key
		1217	char *key = dec->cur;
		1218	int len = p - key;
		1219	dec->cur = p + 1;
		1220
		1221	decode_ws (dec); EXPECT_CH (':');
		1222
		1223	decode_ws (dec);
		1224	value = decode_sv (dec);
		1225	if (!value)
665	goto fail;	1226	goto fail;
		1227
		1228	hv_store (hv, key, len, value, 0);
		1229
		1230	break;
		1231	}
		1232
		1233	++p;
		1234	}
666	}	1235	}
667		1236
668	//TODO: optimise	1237	decode_ws (dec);
669	hv_store_ent (hv, key, value, 0);
670		1238
671	WS;
672
673	if (*dec->cur == '}')	1239	if (*dec->cur == '}')
		1240	{
		1241	++dec->cur;
		1242	break;
		1243	}
		1244
		1245	if (*dec->cur != ',')
		1246	ERR (", or } expected while parsing object/hash");
		1247
		1248	++dec->cur;
		1249
		1250	decode_ws (dec);
		1251
		1252	if (*dec->cur == '}' && dec->json.flags & F_RELAXED)
		1253	{
		1254	++dec->cur;
		1255	break;
		1256	}
		1257	}
		1258
		1259	DEC_DEC_DEPTH;
		1260	sv = newRV_noinc ((SV *)hv);
		1261
		1262	// check filter callbacks
		1263	if (dec->json.flags & F_HOOK)
		1264	{
		1265	if (dec->json.cb_sk_object && HvKEYS (hv) == 1)
674	{	1266	{
675	++dec->cur;	1267	HE cb, he;
676	break;	1268
		1269	hv_iterinit (hv);
		1270	he = hv_iternext (hv);
		1271	hv_iterinit (hv);
		1272
		1273	// the next line creates a mortal sv each time its called.
		1274	// might want to optimise this for common cases.
		1275	cb = hv_fetch_ent (dec->json.cb_sk_object, hv_iterkeysv (he), 0, 0);
		1276
		1277	if (cb)
		1278	{
		1279	dSP;
		1280	int count;
		1281
		1282	ENTER; SAVETMPS; PUSHMARK (SP);
		1283	XPUSHs (HeVAL (he));
		1284
		1285	PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
		1286
		1287	if (count == 1)
		1288	{
		1289	sv = newSVsv (POPs);
		1290	FREETMPS; LEAVE;
		1291	return sv;
		1292	}
		1293
		1294	FREETMPS; LEAVE;
		1295	}
677	}	1296	}
678		1297
679	if (*dec->cur != ',')	1298	if (dec->json.cb_object)
680	ERR (", or } expected while parsing object/hash");	1299	{
		1300	dSP;
		1301	int count;
681		1302
682	++dec->cur;	1303	ENTER; SAVETMPS; PUSHMARK (SP);
683	}	1304	XPUSHs (sv_2mortal (sv));
684		1305
685	return newRV_noinc ((SV *)hv);	1306	PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN;
		1307
		1308	if (count == 1)
		1309	{
		1310	sv = newSVsv (POPs);
		1311	FREETMPS; LEAVE;
		1312	return sv;
		1313	}
		1314
		1315	SvREFCNT_inc (sv);
		1316	FREETMPS; LEAVE;
		1317	}
		1318	}
		1319
		1320	return sv;
686		1321
687	fail:	1322	fail:
688	SvREFCNT_dec (hv);	1323	SvREFCNT_dec (hv);
		1324	DEC_DEC_DEPTH;
689	return 0;	1325	return 0;
690	}	1326	}
691		1327
692	static SV *	1328	static SV *
693	decode_sv (dec_t *dec)	1329	decode_sv (dec_t *dec)
694	{	1330	{
695	WS;	1331	// the beauty of JSON: you need exactly one character lookahead
		1332	// to parse everything.
696	switch (*dec->cur)	1333	switch (*dec->cur)
697	{	1334	{
698	case '"': ++dec->cur; return decode_str (dec);	1335	case '"': ++dec->cur; return decode_str (dec);
699	case '[': ++dec->cur; return decode_av (dec);	1336	case '[': ++dec->cur; return decode_av (dec);
700	case '{': ++dec->cur; return decode_hv (dec);	1337	case '{': ++dec->cur; return decode_hv (dec);
701		1338
702	case '-':	1339	case '-':
703	case '0': case '1': case '2': case '3': case '4':	1340	case '0': case '1': case '2': case '3': case '4':
704	case '5': case '6': case '7': case '8': case '9':	1341	case '5': case '6': case '7': case '8': case '9':
705	return decode_num (dec);	1342	return decode_num (dec);
706		1343
707	case 't':	1344	case 't':
708	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))	1345	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
709	{	1346	{
710	dec->cur += 4;	1347	dec->cur += 4;
711	return newSViv (1);	1348	#if JSON_SLOW
		1349	json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true);
		1350	#endif
		1351	return SvREFCNT_inc (json_true);
712	}	1352	}
713	else	1353	else
714	ERR ("'true' expected");	1354	ERR ("'true' expected");
715		1355
716	break;	1356	break;
717		1357
718	case 'f':	1358	case 'f':
719	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))	1359	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
720	{	1360	{
721	dec->cur += 5;	1361	dec->cur += 5;
722	return newSViv (0);	1362	#if JSON_SLOW
		1363	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);
		1364	#endif
		1365	return SvREFCNT_inc (json_false);
723	}	1366	}
724	else	1367	else
725	ERR ("'false' expected");	1368	ERR ("'false' expected");
726		1369
727	break;	1370	break;
728		1371
729	case 'n':	1372	case 'n':
730	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))	1373	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
731	{	1374	{
732	dec->cur += 4;	1375	dec->cur += 4;
733	return newSViv (1);	1376	return newSVsv (&PL_sv_undef);
734	}	1377	}
735	else	1378	else
736	ERR ("'null' expected");	1379	ERR ("'null' expected");
737		1380
738	break;	1381	break;
739		1382
740	default:	1383	default:
741	ERR ("malformed json string");	1384	ERR ("malformed JSON string, neither array, object, number, string or atom");
742	break;	1385	break;
743	}	1386	}
744		1387
745	fail:	1388	fail:
746	return 0;	1389	return 0;
747	}	1390	}
748		1391
749	static SV *	1392	static SV *
750	decode_json (SV *string, UV flags)	1393	decode_json (SV string, JSON json, STRLEN *offset_return)
751	{	1394	{
		1395	dec_t dec;
		1396	STRLEN offset;
752	SV *sv;	1397	SV *sv;
753		1398
		1399	SvGETMAGIC (string);
		1400	SvUPGRADE (string, SVt_PV);
		1401
		1402	if (json->flags & F_MAXSIZE && SvCUR (string) > DEC_SIZE (json->flags))
		1403	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
		1404	(unsigned long)SvCUR (string), (unsigned long)DEC_SIZE (json->flags));
		1405
754	if (!(flags & F_UTF8))	1406	if (json->flags & F_UTF8)
		1407	sv_utf8_downgrade (string, 0);
		1408	else
755	sv_utf8_upgrade (string);	1409	sv_utf8_upgrade (string);
756		1410
757	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP	1411	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
758		1412
759	dec_t dec;	1413	dec.json = *json;
760	dec.flags = flags;
761	dec.cur = SvPVX (string);	1414	dec.cur = SvPVX (string);
762	dec.end = SvEND (string);	1415	dec.end = SvEND (string);
763	dec.err = 0;	1416	dec.err = 0;
		1417	dec.depth = 0;
		1418	dec.maxdepth = DEC_DEPTH (dec.json.flags);
764		1419
765	*dec.end = 1; // invalid anywhere	1420	if (dec.json.cb_object \|\| dec.json.cb_sk_object)
		1421	dec.json.flags \|= F_HOOK;
		1422
		1423	*dec.end = 0; // this should basically be a nop, too, but make sure it's there
		1424
		1425	decode_ws (&dec);
766	sv = decode_sv (&dec);	1426	sv = decode_sv (&dec);
767	*dec.end = 0;	1427
		1428	if (!(offset_return \|\| !sv))
		1429	{
		1430	// check for trailing garbage
		1431	decode_ws (&dec);
		1432
		1433	if (*dec.cur)
		1434	{
		1435	dec.err = "garbage after JSON object";
		1436	SvREFCNT_dec (sv);
		1437	sv = 0;
		1438	}
		1439	}
		1440
		1441	if (offset_return \|\| !sv)
		1442	{
		1443	offset = dec.json.flags & F_UTF8
		1444	? dec.cur - SvPVX (string)
		1445	: utf8_distance (dec.cur, SvPVX (string));
		1446
		1447	if (offset_return)
		1448	*offset_return = offset;
		1449	}
768		1450
769	if (!sv)	1451	if (!sv)
770	{	1452	{
771	IV offset = utf8_distance (dec.cur, SvPVX (string));
772	SV *uni = sv_newmortal ();	1453	SV *uni = sv_newmortal ();
773		1454
		1455	// horrible hack to silence warning inside pv_uni_display
		1456	COP cop = *PL_curcop;
		1457	cop.cop_warnings = pWARN_NONE;
		1458	ENTER;
		1459	SAVEVPTR (PL_curcop);
		1460	PL_curcop = &cop;
774	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);	1461	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
		1462	LEAVE;
		1463
775	croak ("%s, at character %d (%s)",	1464	croak ("%s, at character offset %d [\"%s\"]",
776	dec.err,	1465	dec.err,
777	(int)offset,	1466	(int)offset,
778	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");	1467	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
779	}	1468	}
780		1469
781	sv = sv_2mortal (sv);	1470	sv = sv_2mortal (sv);
782		1471
783	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))	1472	if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))
784	croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)");	1473	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
785		1474
786	return sv;	1475	return sv;
787	}	1476	}
788		1477
		1478	/////////////////////////////////////////////////////////////////////////////
		1479	// incremental parser
		1480
		1481	static void
		1482	incr_parse (JSON *self)
		1483	{
		1484	const char *p = SvPVX (self->incr_text) + self->incr_pos;
		1485
		1486	for (;;)
		1487	{
		1488	//printf ("loop pod %d p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), p, p, self->incr_mode, self->incr_nest);//D
		1489	switch (self->incr_mode)
		1490	{
		1491	// only used for intiial whitespace skipping
		1492	case INCR_M_WS:
		1493	for (;;)
		1494	{
		1495	if (*p > 0x20)
		1496	{
		1497	self->incr_mode = INCR_M_JSON;
		1498	goto incr_m_json;
		1499	}
		1500	else if (!*p)
		1501	goto interrupt;
		1502
		1503	++p;
		1504	}
		1505
		1506	// skip a single char inside a string (for \\-processing)
		1507	case INCR_M_BS:
		1508	if (!*p)
		1509	goto interrupt;
		1510
		1511	++p;
		1512	self->incr_mode = INCR_M_STR;
		1513	goto incr_m_str;
		1514
		1515	// inside a string
		1516	case INCR_M_STR:
		1517	incr_m_str:
		1518	for (;;)
		1519	{
		1520	if (*p == '"')
		1521	{
		1522	++p;
		1523	self->incr_mode = INCR_M_JSON;
		1524
		1525	if (!self->incr_nest)
		1526	goto interrupt;
		1527
		1528	goto incr_m_json;
		1529	}
		1530	else if (*p == '\\')
		1531	{
		1532	++p; // "virtually" consumes character after \
		1533
		1534	if (!*p) // if at end of string we have to switch modes
		1535	{
		1536	self->incr_mode = INCR_M_BS;
		1537	goto interrupt;
		1538	}
		1539	}
		1540	else if (!*p)
		1541	goto interrupt;
		1542
		1543	++p;
		1544	}
		1545
		1546	// after initial ws, outside string
		1547	case INCR_M_JSON:
		1548	incr_m_json:
		1549	for (;;)
		1550	{
		1551	switch (*p++)
		1552	{
		1553	case 0:
		1554	--p;
		1555	goto interrupt;
		1556
		1557	case 0x09:
		1558	case 0x0a:
		1559	case 0x0d:
		1560	case 0x20:
		1561	if (!self->incr_nest)
		1562	{
		1563	--p; // do not eat the whitespace, let the next round do it
		1564	goto interrupt;
		1565	}
		1566	break;
		1567
		1568	case '"':
		1569	self->incr_mode = INCR_M_STR;
		1570	goto incr_m_str;
		1571
		1572	case '[':
		1573	case '{':
		1574	++self->incr_nest;
		1575	break;
		1576
		1577	case ']':
		1578	case '}':
		1579	if (!--self->incr_nest)
		1580	goto interrupt;
		1581	}
		1582	}
		1583	}
		1584
		1585	modechange:
		1586	;
		1587	}
		1588
		1589	interrupt:
		1590	self->incr_pos = p - SvPVX (self->incr_text);
		1591	//printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
		1592	}
		1593
		1594	/////////////////////////////////////////////////////////////////////////////
		1595	// XS interface functions
		1596
789	MODULE = JSON::XS PACKAGE = JSON::XS	1597	MODULE = JSON::XS PACKAGE = JSON::XS
790		1598
791	BOOT:	1599	BOOT:
792	{	1600	{
793	int i;	1601	int i;
794		1602
795	memset (decode_hexdigit, 0xff, 256);
796	for (i = 10; i--; )	1603	for (i = 0; i < 256; ++i)
797	decode_hexdigit ['0' + i] = i;	1604	decode_hexdigit [i] =
		1605	i >= '0' && i <= '9' ? i - '0'
		1606	: i >= 'a' && i <= 'f' ? i - 'a' + 10
		1607	: i >= 'A' && i <= 'F' ? i - 'A' + 10
		1608	: -1;
798		1609
799	for (i = 6; --i; )
800	{
801	decode_hexdigit ['a' + i] = 10 + i;
802	decode_hexdigit ['A' + i] = 10 + i;
803	}
804
805	json_stash = gv_stashpv ("JSON::XS", 1);	1610	json_stash = gv_stashpv ("JSON::XS" , 1);
806	}	1611	json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1);
807		1612
808	SV new (char dummy)	1613	json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true );
		1614	json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);
		1615	}
		1616
		1617	PROTOTYPES: DISABLE
		1618
		1619	void CLONE (...)
809	CODE:	1620	CODE:
810	RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);	1621	json_stash = 0;
		1622	json_boolean_stash = 0;
		1623
		1624	void new (char *klass)
		1625	PPCODE:
		1626	{
		1627	SV *pv = NEWSV (0, sizeof (JSON));
		1628	SvPOK_only (pv);
		1629	Zero (SvPVX (pv), 1, JSON);
		1630	((JSON *)SvPVX (pv))->flags = F_DEFAULT;
		1631	XPUSHs (sv_2mortal (sv_bless (
		1632	newRV_noinc (pv),
		1633	strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
		1634	)));
		1635	}
		1636
		1637	void ascii (JSON *self, int enable = 1)
		1638	ALIAS:
		1639	ascii = F_ASCII
		1640	latin1 = F_LATIN1
		1641	utf8 = F_UTF8
		1642	indent = F_INDENT
		1643	canonical = F_CANONICAL
		1644	space_before = F_SPACE_BEFORE
		1645	space_after = F_SPACE_AFTER
		1646	pretty = F_PRETTY
		1647	allow_nonref = F_ALLOW_NONREF
		1648	shrink = F_SHRINK
		1649	allow_blessed = F_ALLOW_BLESSED
		1650	convert_blessed = F_CONV_BLESSED
		1651	relaxed = F_RELAXED
		1652	PPCODE:
		1653	{
		1654	if (enable)
		1655	self->flags \|= ix;
		1656	else
		1657	self->flags &= ~ix;
		1658
		1659	XPUSHs (ST (0));
		1660	}
		1661
		1662	void get_ascii (JSON *self)
		1663	ALIAS:
		1664	get_ascii = F_ASCII
		1665	get_latin1 = F_LATIN1
		1666	get_utf8 = F_UTF8
		1667	get_indent = F_INDENT
		1668	get_canonical = F_CANONICAL
		1669	get_space_before = F_SPACE_BEFORE
		1670	get_space_after = F_SPACE_AFTER
		1671	get_allow_nonref = F_ALLOW_NONREF
		1672	get_shrink = F_SHRINK
		1673	get_allow_blessed = F_ALLOW_BLESSED
		1674	get_convert_blessed = F_CONV_BLESSED
		1675	get_relaxed = F_RELAXED
		1676	PPCODE:
		1677	XPUSHs (boolSV (self->flags & ix));
		1678
		1679	void max_depth (JSON *self, UV max_depth = 0x80000000UL)
		1680	PPCODE:
		1681	{
		1682	UV log2 = 0;
		1683
		1684	if (max_depth > 0x80000000UL) max_depth = 0x80000000UL;
		1685
		1686	while ((1UL << log2) < max_depth)
		1687	++log2;
		1688
		1689	self->flags = self->flags & ~F_MAXDEPTH \| (log2 << S_MAXDEPTH);
		1690
		1691	XPUSHs (ST (0));
		1692	}
		1693
		1694	U32 get_max_depth (JSON *self)
		1695	CODE:
		1696	RETVAL = DEC_DEPTH (self->flags);
811	OUTPUT:	1697	OUTPUT:
812	RETVAL	1698	RETVAL
813		1699
814	SV ascii (SV self, int enable)	1700	void max_size (JSON *self, UV max_size = 0)
815	ALIAS:	1701	PPCODE:
816	ascii = F_ASCII	1702	{
817	utf8 = F_UTF8	1703	UV log2 = 0;
818	indent = F_INDENT	1704
819	canonical = F_CANONICAL	1705	if (max_size > 0x80000000UL) max_size = 0x80000000UL;
820	space_before = F_SPACE_BEFORE	1706	if (max_size == 1) max_size = 2;
821	space_after = F_SPACE_AFTER	1707
822	json_rpc = F_JSON_RPC	1708	while ((1UL << log2) < max_size)
823	pretty = F_PRETTY	1709	++log2;
824	allow_nonref = F_ALLOW_NONREF	1710
		1711	self->flags = self->flags & ~F_MAXSIZE \| (log2 << S_MAXSIZE);
		1712
		1713	XPUSHs (ST (0));
		1714	}
		1715
		1716	int get_max_size (JSON *self)
825	CODE:	1717	CODE:
826	{	1718	RETVAL = DEC_SIZE (self->flags);
827	UV *uv = SvJSON (self);
828	if (enable)
829	*uv \|= ix;
830	else
831	*uv &= ~ix;
832
833	RETVAL = newSVsv (self);
834	}
835	OUTPUT:	1719	OUTPUT:
836	RETVAL	1720	RETVAL
837		1721
838	void encode (SV self, SV scalar)	1722	void filter_json_object (JSON self, SV cb = &PL_sv_undef)
839	PPCODE:	1723	PPCODE:
840	XPUSHs (encode_json (scalar, *SvJSON (self)));	1724	{
		1725	SvREFCNT_dec (self->cb_object);
		1726	self->cb_object = SvOK (cb) ? newSVsv (cb) : 0;
841		1727
842	void decode (SV self, SV jsonstr)	1728	XPUSHs (ST (0));
		1729	}
		1730
		1731	void filter_json_single_key_object (JSON self, SV key, SV *cb = &PL_sv_undef)
843	PPCODE:	1732	PPCODE:
844	XPUSHs (decode_json (jsonstr, *SvJSON (self)));	1733	{
		1734	if (!self->cb_sk_object)
		1735	self->cb_sk_object = newHV ();
845		1736
846	void to_json (SV *scalar)	1737	if (SvOK (cb))
		1738	hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0);
		1739	else
		1740	{
		1741	hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0);
		1742
		1743	if (!HvKEYS (self->cb_sk_object))
		1744	{
		1745	SvREFCNT_dec (self->cb_sk_object);
		1746	self->cb_sk_object = 0;
		1747	}
		1748	}
		1749
		1750	XPUSHs (ST (0));
		1751	}
		1752
		1753	void encode (JSON self, SV scalar)
847	PPCODE:	1754	PPCODE:
848	XPUSHs (encode_json (scalar, F_UTF8));	1755	XPUSHs (encode_json (scalar, self));
849		1756
850	void from_json (SV *jsonstr)	1757	void decode (JSON self, SV jsonstr)
851	PPCODE:	1758	PPCODE:
852	XPUSHs (decode_json (jsonstr, F_UTF8));	1759	XPUSHs (decode_json (jsonstr, self, 0));
853		1760
		1761	void decode_prefix (JSON self, SV jsonstr)
		1762	PPCODE:
		1763	{
		1764	STRLEN offset;
		1765	EXTEND (SP, 2);
		1766	PUSHs (decode_json (jsonstr, self, &offset));
		1767	PUSHs (sv_2mortal (newSVuv (offset)));
		1768	}
		1769
		1770	void incr_parse (JSON self, SV jsonstr = 0)
		1771	PPCODE:
		1772	{
		1773	if (!self->incr_text)
		1774	self->incr_text = newSVpvn ("", 0);
		1775
		1776	// append data, if any
		1777	if (jsonstr)
		1778	{
		1779	if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text))
		1780	{
		1781	/* utf-8-ness differs, need to upgrade */
		1782	sv_utf8_upgrade (self->incr_text);
		1783
		1784	if (self->incr_pos)
		1785	self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
		1786	- (U8 *)SvPVX (self->incr_text);
		1787	}
		1788
		1789	{
		1790	STRLEN len;
		1791	const char *str = SvPV (jsonstr, len);
		1792	SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1);
		1793	Move (str, SvEND (self->incr_text), len, char);
		1794	SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
		1795	*SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
		1796	}
		1797	}
		1798
		1799	if (GIMME_V != G_VOID)
		1800	do
		1801	{
		1802	STRLEN offset;
		1803
		1804	incr_parse (self);
		1805
		1806	if (!INCR_DONE (self))
		1807	break;
		1808
		1809	XPUSHs (decode_json (self->incr_text, self, &offset));
		1810
		1811	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset);
		1812	self->incr_pos -= offset;
		1813	self->incr_nest = 0;
		1814	self->incr_mode = 0;
		1815	}
		1816	while (GIMME_V == G_ARRAY);
		1817	}
		1818
		1819	SV incr_text (JSON self)
		1820	ATTRS: lvalue
		1821	CODE:
		1822	{
		1823	if (self->incr_pos)
		1824	croak ("incr_text can only be called after a successful incr_parse call in scalar context %d", self->incr_pos);//D
		1825
		1826	RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
		1827	}
		1828	OUTPUT:
		1829	RETVAL
		1830
		1831	void DESTROY (JSON *self)
		1832	CODE:
		1833	SvREFCNT_dec (self->cb_sk_object);
		1834	SvREFCNT_dec (self->cb_object);
		1835	SvREFCNT_dec (self->incr_text);
		1836
		1837	PROTOTYPES: ENABLE
		1838
		1839	void encode_json (SV *scalar)
		1840	ALIAS:
		1841	to_json_ = 0
		1842	encode_json = F_UTF8
		1843	PPCODE:
		1844	{
		1845	JSON json = { F_DEFAULT \| ix };
		1846	XPUSHs (encode_json (scalar, &json));
		1847	}
		1848
		1849	void decode_json (SV *jsonstr)
		1850	ALIAS:
		1851	from_json_ = 0
		1852	decode_json = F_UTF8
		1853	PPCODE:
		1854	{
		1855	JSON json = { F_DEFAULT \| ix };
		1856	XPUSHs (decode_json (jsonstr, &json, 0));
		1857	}
		1858
		1859

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.3 by root, Thu Mar 22 18:10:29 2007 UTC vs. Revision 1.77 by root, Tue Mar 25 06:37:38 2008 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.3 by root, Thu Mar 22 18:10:29 2007 UTC vs.
Revision 1.77 by root, Tue Mar 25 06:37:38 2008 UTC