[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.24 by root, Tue Apr 3 23:59:04 2007 UTC vs.
Revision 1.122 by root, Tue Oct 29 15:55:49 2013 UTC

…		…
1	#include "EXTERN.h"	1	#include "EXTERN.h"
2	#include "perl.h"	2	#include "perl.h"
3	#include "XSUB.h"	3	#include "XSUB.h"
4		4
5	#include "assert.h"	5	#include <assert.h>
6	#include "string.h"	6	#include <string.h>
7	#include "stdlib.h"	7	#include <stdlib.h>
		8	#include <stdio.h>
		9	#include <limits.h>
		10	#include <float.h>
8		11
		12	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		13	# define snprintf _snprintf // C compilers have this in stdio.h
		14	#endif
		15
		16	// some old perls do not have this, try to make it work, no
		17	// guarantees, though. if it breaks, you get to keep the pieces.
		18	#ifndef UTF8_MAXBYTES
		19	# define UTF8_MAXBYTES 13
		20	#endif
		21
		22	// compatibility with perl <5.18
		23	#ifndef HvNAMELEN_get
		24	# define HvNAMELEN_get(hv) strlen (HvNAME (hv))
		25	#endif
		26	#ifndef HvNAMELEN
		27	# define HvNAMELEN(hv) HvNAMELEN_get (hv)
		28	#endif
		29	#ifndef HvNAMEUTF8
		30	# define HvNAMEUTF8(hv) 0
		31	#endif
		32
		33	// three extra for rounding, sign, and end of string
		34	#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3)
		35
9	#define F_ASCII 0x00000001UL	36	#define F_ASCII 0x00000001UL
		37	#define F_LATIN1 0x00000002UL
10	#define F_UTF8 0x00000002UL	38	#define F_UTF8 0x00000004UL
11	#define F_INDENT 0x00000004UL	39	#define F_INDENT 0x00000008UL
12	#define F_CANONICAL 0x00000008UL	40	#define F_CANONICAL 0x00000010UL
13	#define F_SPACE_BEFORE 0x00000010UL	41	#define F_SPACE_BEFORE 0x00000020UL
14	#define F_SPACE_AFTER 0x00000020UL	42	#define F_SPACE_AFTER 0x00000040UL
15	#define F_ALLOW_NONREF 0x00000080UL	43	#define F_ALLOW_NONREF 0x00000100UL
16	#define F_SHRINK 0x00000100UL	44	#define F_SHRINK 0x00000200UL
		45	#define F_ALLOW_BLESSED 0x00000400UL
		46	#define F_CONV_BLESSED 0x00000800UL
17	#define F_MAXDEPTH 0xf8000000UL	47	#define F_RELAXED 0x00001000UL
18	#define S_MAXDEPTH 27	48	#define F_ALLOW_UNKNOWN 0x00002000UL
19		49	#define F_ALLOW_TAGS 0x00004000UL
20	#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH))	50	#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing
21
22	// F_SELFCONVERT? <=> to_json/toJson
23	// F_BLESSED? <=> { $__class__$ => }
24		51
25	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	52	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
26	#define F_DEFAULT (9UL << S_MAXDEPTH)
27		53
28	#define INIT_SIZE 32 // initial scalar size to be allocated	54	#define INIT_SIZE 32 // initial scalar size to be allocated
29	#define INDENT_STEP 3 // spaces per indentation level	55	#define INDENT_STEP 3 // spaces per indentation level
30		56
31	#define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
32	#define SHORT_STRING_LEN 512 // special-case strings of up to this size	57	#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
		58
		59	#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8)
33		60
34	#define SB do {	61	#define SB do {
35	#define SE } while (0)	62	#define SE } while (0)
36		63
		64	#if __GNUC__ >= 3
		65	# define expect(expr,value) __builtin_expect ((expr), (value))
		66	# define INLINE static inline
		67	#else
		68	# define expect(expr,value) (expr)
		69	# define INLINE static
		70	#endif
		71
		72	#define expect_false(expr) expect ((expr) != 0, 0)
		73	#define expect_true(expr) expect ((expr) != 0, 1)
		74
		75	#define IN_RANGE_INC(type,val,beg,end) \
		76	((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
		77	<= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
		78
		79	#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)"
		80
		81	#ifdef USE_ITHREADS
		82	# define JSON_SLOW 1
		83	# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
		84	#else
		85	# define JSON_SLOW 0
		86	# define JSON_STASH json_stash
		87	#endif
		88
		89	// the amount of HEs to allocate on the stack, when sorting keys
		90	#define STACK_HES 64
		91
37	static HV *json_stash; // JSON::XS::	92	static HV json_stash, types_boolean_stash; // JSON::XS::
		93	static SV types_true, types_false, *sv_json;
		94
		95	enum {
		96	INCR_M_WS = 0, // initial whitespace skipping, must be 0
		97	INCR_M_STR, // inside string
		98	INCR_M_BS, // inside backslash
		99	INCR_M_C0, // inside comment in initial whitespace sequence
		100	INCR_M_C1, // inside comment in other places
		101	INCR_M_JSON // outside anything, count nesting
		102	};
		103
		104	#define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON)
		105
		106	typedef struct {
		107	U32 flags;
		108	U32 max_depth;
		109	STRLEN max_size;
		110
		111	SV *cb_object;
		112	HV *cb_sk_object;
		113
		114	// for the incremental parser
		115	SV *incr_text; // the source text so far
		116	STRLEN incr_pos; // the current offset into the text
		117	int incr_nest; // {[]}-nesting level
		118	unsigned char incr_mode;
		119	} JSON;
		120
		121	INLINE void
		122	json_init (JSON *json)
		123	{
		124	Zero (json, 1, JSON);
		125	json->max_depth = 512;
		126	}
38		127
39	/////////////////////////////////////////////////////////////////////////////	128	/////////////////////////////////////////////////////////////////////////////
40	// utility functions	129	// utility functions
41		130
42	static UV *	131	INLINE SV *
43	SvJSON (SV *sv)	132	get_bool (const char *name)
44	{	133	{
45	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))	134	SV *sv = get_sv (name, 1);
46	croak ("object is not of type JSON::XS");
47		135
48	return &SvUVX (SvRV (sv));	136	SvREADONLY_on (sv);
49	}	137	SvREADONLY_on (SvRV (sv));
50		138
51	static void	139	return sv;
		140	}
		141
		142	INLINE void
52	shrink (SV *sv)	143	shrink (SV *sv)
53	{	144	{
54	sv_utf8_downgrade (sv, 1);	145	sv_utf8_downgrade (sv, 1);
		146
55	if (SvLEN (sv) > SvCUR (sv) + 1)	147	if (SvLEN (sv) > SvCUR (sv) + 1)
56	{	148	{
57	#ifdef SvPV_shrink_to_cur	149	#ifdef SvPV_shrink_to_cur
58	SvPV_shrink_to_cur (sv);	150	SvPV_shrink_to_cur (sv);
59	#elif defined (SvPV_renew)	151	#elif defined (SvPV_renew)
65	// decode an utf-8 character and return it, or (UV)-1 in	157	// decode an utf-8 character and return it, or (UV)-1 in
66	// case of an error.	158	// case of an error.
67	// we special-case "safe" characters from U+80 .. U+7FF,	159	// we special-case "safe" characters from U+80 .. U+7FF,
68	// but use the very good perl function to parse anything else.	160	// but use the very good perl function to parse anything else.
69	// note that we never call this function for a ascii codepoints	161	// note that we never call this function for a ascii codepoints
70	static UV	162	INLINE UV
71	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)	163	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
72	{	164	{
73	if (s[0] > 0xdf \|\| s[0] < 0xc2)	165	if (expect_true (len >= 2
74	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);	166	&& IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
75	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)	167	&& IN_RANGE_INC (char, s[1], 0x80, 0xbf)))
76	{	168	{
77	*clen = 2;	169	*clen = 2;
78	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);	170	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
79	}	171	}
80	else	172	else
81	{	173	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
82	*clen = (STRLEN)-1;	174	}
83	return (UV)-1;	175
		176	// likewise for encoding, also never called for ascii codepoints
		177	// this function takes advantage of this fact, although current gccs
		178	// seem to optimise the check for >= 0x80 away anyways
		179	INLINE unsigned char *
		180	encode_utf8 (unsigned char *s, UV ch)
		181	{
		182	if (expect_false (ch < 0x000080))
		183	*s++ = ch;
		184	else if (expect_true (ch < 0x000800))
		185	*s++ = 0xc0 \| ( ch >> 6),
		186	*s++ = 0x80 \| ( ch & 0x3f);
		187	else if ( ch < 0x010000)
		188	*s++ = 0xe0 \| ( ch >> 12),
		189	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		190	*s++ = 0x80 \| ( ch & 0x3f);
		191	else if ( ch < 0x110000)
		192	*s++ = 0xf0 \| ( ch >> 18),
		193	*s++ = 0x80 \| ((ch >> 12) & 0x3f),
		194	*s++ = 0x80 \| ((ch >> 6) & 0x3f),
		195	*s++ = 0x80 \| ( ch & 0x3f);
		196
		197	return s;
		198	}
		199
		200	// convert offset pointer to character index, sv must be string
		201	static STRLEN
		202	ptr_to_index (SV sv, char offset)
		203	{
		204	return SvUTF8 (sv)
		205	? utf8_distance (offset, SvPVX (sv))
		206	: offset - SvPVX (sv);
		207	}
		208
		209	/////////////////////////////////////////////////////////////////////////////
		210	// fp hell
		211
		212	// scan a group of digits, and a trailing exponent
		213	static void
		214	json_atof_scan1 (const char s, NV accum, int *expo, int postdp, int maxdepth)
		215	{
		216	UV uaccum = 0;
		217	int eaccum = 0;
		218
		219	// if we recurse too deep, skip all remaining digits
		220	// to avoid a stack overflow attack
		221	if (expect_false (--maxdepth <= 0))
		222	while (((U8)*s - '0') < 10)
		223	++s;
		224
		225	for (;;)
84	}	226	{
85	}	227	U8 dig = (U8)*s - '0';
86		228
		229	if (expect_false (dig >= 10))
		230	{
		231	if (dig == (U8)((U8)'.' - (U8)'0'))
		232	{
		233	++s;
		234	json_atof_scan1 (s, accum, expo, 1, maxdepth);
		235	}
		236	else if ((dig \| ' ') == 'e' - '0')
		237	{
		238	int exp2 = 0;
		239	int neg = 0;
		240
		241	++s;
		242
		243	if (*s == '-')
		244	{
		245	++s;
		246	neg = 1;
		247	}
		248	else if (*s == '+')
		249	++s;
		250
		251	while ((dig = (U8)*s - '0') < 10)
		252	exp2 = exp2 * 10 + *s++ - '0';
		253
		254	*expo += neg ? -exp2 : exp2;
		255	}
		256
		257	break;
		258	}
		259
		260	++s;
		261
		262	uaccum = uaccum * 10 + dig;
		263	++eaccum;
		264
		265	// if we have too many digits, then recurse for more
		266	// we actually do this for rather few digits
		267	if (uaccum >= (UV_MAX - 9) / 10)
		268	{
		269	if (postdp) *expo -= eaccum;
		270	json_atof_scan1 (s, accum, expo, postdp, maxdepth);
		271	if (postdp) *expo += eaccum;
		272
		273	break;
		274	}
		275	}
		276
		277	// this relies greatly on the quality of the pow ()
		278	// implementation of the platform, but a good
		279	// implementation is hard to beat.
		280	// (IEEE 754 conformant ones are required to be exact)
		281	if (postdp) *expo -= eaccum;
		282	accum += uaccum Perl_pow (10., *expo);
		283	*expo += eaccum;
		284	}
		285
		286	static NV
		287	json_atof (const char *s)
		288	{
		289	NV accum = 0.;
		290	int expo = 0;
		291	int neg = 0;
		292
		293	if (*s == '-')
		294	{
		295	++s;
		296	neg = 1;
		297	}
		298
		299	// a recursion depth of ten gives us >>500 bits
		300	json_atof_scan1 (s, &accum, &expo, 0, 10);
		301
		302	return neg ? -accum : accum;
		303	}
87	/////////////////////////////////////////////////////////////////////////////	304	/////////////////////////////////////////////////////////////////////////////
88	// encoder	305	// encoder
89		306
90	// structure used for encoding JSON	307	// structure used for encoding JSON
91	typedef struct	308	typedef struct
92	{	309	{
93	char *cur; // SvPVX (sv) + current output position	310	char *cur; // SvPVX (sv) + current output position
94	char *end; // SvEND (sv)	311	char *end; // SvEND (sv)
95	SV *sv; // result scalar	312	SV *sv; // result scalar
96	U32 flags; // F_*	313	JSON json;
97	U32 indent; // indentation level	314	U32 indent; // indentation level
98	U32 maxdepth; // max. indentation/recursion level	315	UV limit; // escape character values >= this value when encoding
99	} enc_t;	316	} enc_t;
100		317
101	static void	318	INLINE void
102	need (enc_t *enc, STRLEN len)	319	need (enc_t *enc, STRLEN len)
103	{	320	{
104	if (enc->cur + len >= enc->end)	321	if (expect_false (enc->cur + len >= enc->end))
105	{	322	{
106	STRLEN cur = enc->cur - SvPVX (enc->sv);	323	STRLEN cur = enc->cur - (char *)SvPVX (enc->sv);
107	SvGROW (enc->sv, cur + len + 1);	324	SvGROW (enc->sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
108	enc->cur = SvPVX (enc->sv) + cur;	325	enc->cur = SvPVX (enc->sv) + cur;
109	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);	326	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
110	}	327	}
111	}	328	}
112		329
113	static void	330	INLINE void
114	encode_ch (enc_t *enc, char ch)	331	encode_ch (enc_t *enc, char ch)
115	{	332	{
116	need (enc, 1);	333	need (enc, 1);
117	*enc->cur++ = ch;	334	*enc->cur++ = ch;
118	}	335	}
…		…
126		343
127	while (str < end)	344	while (str < end)
128	{	345	{
129	unsigned char ch = (unsigned char )str;	346	unsigned char ch = (unsigned char )str;
130		347
131	if (ch >= 0x20 && ch < 0x80) // most common case	348	if (expect_true (ch >= 0x20 && ch < 0x80)) // most common case
132	{	349	{
133	if (ch == '"') // but with slow exceptions	350	if (expect_false (ch == '"')) // but with slow exceptions
134	{	351	{
135	need (enc, len += 1);	352	need (enc, len += 1);
136	*enc->cur++ = '\\';	353	*enc->cur++ = '\\';
137	*enc->cur++ = '"';	354	*enc->cur++ = '"';
138	}	355	}
139	else if (ch == '\\')	356	else if (expect_false (ch == '\\'))
140	{	357	{
141	need (enc, len += 1);	358	need (enc, len += 1);
142	*enc->cur++ = '\\';	359	*enc->cur++ = '\\';
143	*enc->cur++ = '\\';	360	*enc->cur++ = '\\';
144	}	361	}
…		…
162	STRLEN clen;	379	STRLEN clen;
163	UV uch;	380	UV uch;
164		381
165	if (is_utf8)	382	if (is_utf8)
166	{	383	{
167	//uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
168	uch = decode_utf8 (str, end - str, &clen);	384	uch = decode_utf8 (str, end - str, &clen);
169	if (clen == (STRLEN)-1)	385	if (clen == (STRLEN)-1)
170	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);	386	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
171	}	387	}
172	else	388	else
173	{	389	{
174	uch = ch;	390	uch = ch;
175	clen = 1;	391	clen = 1;
176	}	392	}
177		393
178	if (uch > 0x10FFFFUL)	394	if (uch < 0x80/0x20/ \|\| uch >= enc->limit)
179	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
180
181	if (uch < 0x80 \|\| enc->flags & F_ASCII)
182	{	395	{
183	if (uch > 0xFFFFUL)	396	if (uch >= 0x10000UL)
184	{	397	{
		398	if (uch >= 0x110000UL)
		399	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		400
185	need (enc, len += 11);	401	need (enc, len += 11);
186	sprintf (enc->cur, "\\u%04x\\u%04x",	402	sprintf (enc->cur, "\\u%04x\\u%04x",
187	(int)((uch - 0x10000) / 0x400 + 0xD800),	403	(int)((uch - 0x10000) / 0x400 + 0xD800),
188	(int)((uch - 0x10000) % 0x400 + 0xDC00));	404	(int)((uch - 0x10000) % 0x400 + 0xDC00));
189	enc->cur += 12;	405	enc->cur += 12;
190	}	406	}
191	else	407	else
192	{	408	{
193	static char hexdigit [16] = "0123456789abcdef";
194	need (enc, len += 5);	409	need (enc, len += 5);
195	*enc->cur++ = '\\';	410	*enc->cur++ = '\\';
196	*enc->cur++ = 'u';	411	*enc->cur++ = 'u';
197	*enc->cur++ = hexdigit [ uch >> 12 ];	412	*enc->cur++ = PL_hexdigit [ uch >> 12 ];
198	*enc->cur++ = hexdigit [(uch >> 8) & 15];	413	*enc->cur++ = PL_hexdigit [(uch >> 8) & 15];
199	*enc->cur++ = hexdigit [(uch >> 4) & 15];	414	*enc->cur++ = PL_hexdigit [(uch >> 4) & 15];
200	*enc->cur++ = hexdigit [(uch >> 0) & 15];	415	*enc->cur++ = PL_hexdigit [(uch >> 0) & 15];
201	}	416	}
202		417
		418	str += clen;
		419	}
		420	else if (enc->json.flags & F_LATIN1)
		421	{
		422	*enc->cur++ = uch;
203	str += clen;	423	str += clen;
204	}	424	}
205	else if (is_utf8)	425	else if (is_utf8)
206	{	426	{
207	need (enc, len += clen);	427	need (enc, len += clen);
…		…
211	}	431	}
212	while (--clen);	432	while (--clen);
213	}	433	}
214	else	434	else
215	{	435	{
216	need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed	436	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
217	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	437	enc->cur = encode_utf8 (enc->cur, uch);
218	++str;	438	++str;
219	}	439	}
220	}	440	}
221	}	441	}
222	}	442	}
223		443
224	--len;	444	--len;
225	}	445	}
226	}	446	}
227		447
228	static void	448	INLINE void
229	encode_indent (enc_t *enc)	449	encode_indent (enc_t *enc)
230	{	450	{
231	if (enc->flags & F_INDENT)	451	if (enc->json.flags & F_INDENT)
232	{	452	{
233	int spaces = enc->indent * INDENT_STEP;	453	int spaces = enc->indent * INDENT_STEP;
234		454
235	need (enc, spaces);	455	need (enc, spaces);
236	memset (enc->cur, ' ', spaces);	456	memset (enc->cur, ' ', spaces);
237	enc->cur += spaces;	457	enc->cur += spaces;
238	}	458	}
239	}	459	}
240		460
241	static void	461	INLINE void
242	encode_space (enc_t *enc)	462	encode_space (enc_t *enc)
243	{	463	{
244	need (enc, 1);	464	need (enc, 1);
245	encode_ch (enc, ' ');	465	encode_ch (enc, ' ');
246	}	466	}
247		467
248	static void	468	INLINE void
249	encode_nl (enc_t *enc)	469	encode_nl (enc_t *enc)
250	{	470	{
251	if (enc->flags & F_INDENT)	471	if (enc->json.flags & F_INDENT)
252	{	472	{
253	need (enc, 1);	473	need (enc, 1);
254	encode_ch (enc, '\n');	474	encode_ch (enc, '\n');
255	}	475	}
256	}	476	}
257		477
258	static void	478	INLINE void
259	encode_comma (enc_t *enc)	479	encode_comma (enc_t *enc)
260	{	480	{
261	encode_ch (enc, ',');	481	encode_ch (enc, ',');
262		482
263	if (enc->flags & F_INDENT)	483	if (enc->json.flags & F_INDENT)
264	encode_nl (enc);	484	encode_nl (enc);
265	else if (enc->flags & F_SPACE_AFTER)	485	else if (enc->json.flags & F_SPACE_AFTER)
266	encode_space (enc);	486	encode_space (enc);
267	}	487	}
268		488
269	static void encode_sv (enc_t enc, SV sv);	489	static void encode_sv (enc_t enc, SV sv);
270		490
271	static void	491	static void
272	encode_av (enc_t enc, AV av)	492	encode_av (enc_t enc, AV av)
273	{	493	{
274	int i, len = av_len (av);	494	int i, len = av_len (av);
275		495
276	if (enc->indent >= enc->maxdepth)	496	if (enc->indent >= enc->json.max_depth)
277	croak ("data structure too deep (hit recursion limit)");	497	croak (ERR_NESTING_EXCEEDED);
278		498
279	encode_ch (enc, '['); encode_nl (enc);	499	encode_ch (enc, '[');
280	++enc->indent;
281		500
		501	if (len >= 0)
		502	{
		503	encode_nl (enc); ++enc->indent;
		504
282	for (i = 0; i <= len; ++i)	505	for (i = 0; i <= len; ++i)
283	{	506	{
		507	SV **svp = av_fetch (av, i, 0);
		508
284	encode_indent (enc);	509	encode_indent (enc);
285	encode_sv (enc, *av_fetch (av, i, 0));
286		510
		511	if (svp)
		512	encode_sv (enc, *svp);
		513	else
		514	encode_str (enc, "null", 4, 0);
		515
287	if (i < len)	516	if (i < len)
288	encode_comma (enc);	517	encode_comma (enc);
289	}	518	}
290		519
		520	encode_nl (enc); --enc->indent; encode_indent (enc);
		521	}
		522
291	encode_nl (enc);	523	encode_ch (enc, ']');
292
293	--enc->indent;
294	encode_indent (enc); encode_ch (enc, ']');
295	}	524	}
296		525
297	static void	526	static void
298	encode_he (enc_t enc, HE he)	527	encode_hk (enc_t enc, HE he)
299	{	528	{
300	encode_ch (enc, '"');	529	encode_ch (enc, '"');
301		530
302	if (HeKLEN (he) == HEf_SVKEY)	531	if (HeKLEN (he) == HEf_SVKEY)
303	{	532	{
304	SV *sv = HeSVKEY (he);	533	SV *sv = HeSVKEY (he);
305	STRLEN len;	534	STRLEN len;
306	char *str;	535	char *str;
307		536
308	SvGETMAGIC (sv);	537	SvGETMAGIC (sv);
309	str = SvPV (sv, len);	538	str = SvPV (sv, len);
310		539
311	encode_str (enc, str, len, SvUTF8 (sv));	540	encode_str (enc, str, len, SvUTF8 (sv));
312	}	541	}
313	else	542	else
314	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));	543	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
315		544
316	encode_ch (enc, '"');	545	encode_ch (enc, '"');
317		546
318	if (enc->flags & F_SPACE_BEFORE) encode_space (enc);	547	if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc);
319	encode_ch (enc, ':');	548	encode_ch (enc, ':');
320	if (enc->flags & F_SPACE_AFTER ) encode_space (enc);	549	if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc);
321	encode_sv (enc, HeVAL (he));
322	}	550	}
323		551
324	// compare hash entries, used when all keys are bytestrings	552	// compare hash entries, used when all keys are bytestrings
325	static int	553	static int
326	he_cmp_fast (const void a_, const void b_)	554	he_cmp_fast (const void a_, const void b_)
…		…
331	HE b = (HE **)b_;	559	HE b = (HE **)b_;
332		560
333	STRLEN la = HeKLEN (a);	561	STRLEN la = HeKLEN (a);
334	STRLEN lb = HeKLEN (b);	562	STRLEN lb = HeKLEN (b);
335		563
336	if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))	564	if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la)))
337	cmp = la - lb;	565	cmp = lb - la;
338		566
339	return cmp;	567	return cmp;
340	}	568	}
341		569
342	// compare hash entries, used when some keys are sv's or utf-x	570	// compare hash entries, used when some keys are sv's or utf-x
343	static int	571	static int
344	he_cmp_slow (const void a, const void b)	572	he_cmp_slow (const void a, const void b)
345	{	573	{
346	return sv_cmp (HeSVKEY_force ((HE )a), HeSVKEY_force ((HE **)b));	574	return sv_cmp (HeSVKEY_force ((HE )b), HeSVKEY_force ((HE **)a));
347	}	575	}
348		576
349	static void	577	static void
350	encode_hv (enc_t enc, HV hv)	578	encode_hv (enc_t enc, HV hv)
351	{	579	{
352	int count, i;	580	HE *he;
353		581
354	if (enc->indent >= enc->maxdepth)	582	if (enc->indent >= enc->json.max_depth)
355	croak ("data structure too deep (hit recursion limit)");	583	croak (ERR_NESTING_EXCEEDED);
356		584
357	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;	585	encode_ch (enc, '{');
358		586
359	if ((count = hv_iterinit (hv)))
360	{
361	// for canonical output we have to sort by keys first	587	// for canonical output we have to sort by keys first
362	// actually, this is mostly due to the stupid so-called	588	// actually, this is mostly due to the stupid so-called
363	// security workaround added somewhere in 5.8.x.	589	// security workaround added somewhere in 5.8.x
364	// that randomises hash orderings	590	// that randomises hash orderings
365	if (enc->flags & F_CANONICAL)	591	if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv))
		592	{
		593	int count = hv_iterinit (hv);
		594
		595	if (SvMAGICAL (hv))
366	{	596	{
367	HE he, hes [count]; // if your compiler dies here, you need to enable C99 mode	597	// need to count by iterating. could improve by dynamically building the vector below
		598	// but I don't care for the speed of this special case.
		599	// note also that we will run into undefined behaviour when the two iterations
		600	// do not result in the same count, something I might care for in some later release.
		601
		602	count = 0;
		603	while (hv_iternext (hv))
		604	++count;
		605
		606	hv_iterinit (hv);
		607	}
		608
		609	if (count)
		610	{
368	int fast = 1;	611	int i, fast = 1;
		612	HE *hes_stack [STACK_HES];
		613	HE **hes = hes_stack;
		614
		615	// allocate larger arrays on the heap
		616	if (count > STACK_HES)
		617	{
		618	SV sv = sv_2mortal (NEWSV (0, count sizeof (*hes)));
		619	hes = (HE **)SvPVX (sv);
		620	}
369		621
370	i = 0;	622	i = 0;
371	while ((he = hv_iternext (hv)))	623	while ((he = hv_iternext (hv)))
372	{	624	{
373	hes [i++] = he;	625	hes [i++] = he;
…		…
395		647
396	FREETMPS;	648	FREETMPS;
397	LEAVE;	649	LEAVE;
398	}	650	}
399		651
400	for (i = 0; i < count; ++i)	652	encode_nl (enc); ++enc->indent;
		653
		654	while (count--)
401	{	655	{
402	encode_indent (enc);	656	encode_indent (enc);
		657	he = hes [count];
403	encode_he (enc, hes [i]);	658	encode_hk (enc, he);
		659	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
404		660
405	if (i < count - 1)	661	if (count)
406	encode_comma (enc);	662	encode_comma (enc);
407	}	663	}
408		664
409	encode_nl (enc);	665	encode_nl (enc); --enc->indent; encode_indent (enc);
410	}	666	}
		667	}
411	else	668	else
		669	{
		670	if (hv_iterinit (hv) \|\| SvMAGICAL (hv))
		671	if ((he = hv_iternext (hv)))
412	{	672	{
413	SV *sv;	673	encode_nl (enc); ++enc->indent;
414	HE *he = hv_iternext (hv);
415		674
416	for (;;)	675	for (;;)
417	{	676	{
418	encode_indent (enc);	677	encode_indent (enc);
419	encode_he (enc, he);	678	encode_hk (enc, he);
		679	encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
420		680
421	if (!(he = hv_iternext (hv)))	681	if (!(he = hv_iternext (hv)))
422	break;	682	break;
423		683
424	encode_comma (enc);	684	encode_comma (enc);
425	}	685	}
426		686
427	encode_nl (enc);	687	encode_nl (enc); --enc->indent; encode_indent (enc);
428	}	688	}
429	}	689	}
430		690
431	--enc->indent; encode_indent (enc); encode_ch (enc, '}');	691	encode_ch (enc, '}');
432	}	692	}
433		693
434	// encode objects, arrays and special \0=false and \1=true values.	694	// encode objects, arrays and special \0=false and \1=true values.
435	static void	695	static void
436	encode_rv (enc_t enc, SV sv)	696	encode_rv (enc_t enc, SV sv)
437	{	697	{
		698	svtype svt;
		699	GV *method;
		700
438	SvGETMAGIC (sv);	701	SvGETMAGIC (sv);
439
440	svtype svt = SvTYPE (sv);	702	svt = SvTYPE (sv);
441		703
		704	if (expect_false (SvOBJECT (sv)))
		705	{
		706	HV *boolean_stash = !JSON_SLOW \|\| types_boolean_stash
		707	? types_boolean_stash
		708	: gv_stashpv ("Types::Serialiser::Boolean", 1);
		709	HV *stash = SvSTASH (sv);
		710
		711	if (stash == boolean_stash)
		712	{
		713	if (SvIV (sv))
		714	encode_str (enc, "true", 4, 0);
		715	else
		716	encode_str (enc, "false", 5, 0);
		717	}
		718	else if ((enc->json.flags & F_ALLOW_TAGS) && (method = gv_fetchmethod_autoload (stash, "FREEZE", 0)))
		719	{
		720	int count;
		721	dSP;
		722
		723	ENTER; SAVETMPS; PUSHMARK (SP);
		724	EXTEND (SP, 2);
		725	// we re-bless the reference to get overload and other niceties right
		726	PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
		727	PUSHs (sv_json);
		728
		729	PUTBACK;
		730	count = call_sv ((SV *)GvCV (method), G_ARRAY);
		731	SPAGAIN;
		732
		733	// catch this surprisingly common error
		734	if (SvROK (TOPs) && SvRV (TOPs) == sv)
		735	croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv)));
		736
		737	encode_ch (enc, '(');
		738	encode_ch (enc, '"');
		739	encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash));
		740	encode_ch (enc, '"');
		741	encode_ch (enc, ')');
		742	encode_ch (enc, '[');
		743
		744	while (count)
		745	{
		746	encode_sv (enc, SP[1 - count--]);
		747
		748	if (count)
		749	encode_ch (enc, ',');
		750	}
		751
		752	encode_ch (enc, ']');
		753
		754	PUTBACK;
		755
		756	FREETMPS; LEAVE;
		757	}
		758	else if ((enc->json.flags & F_CONV_BLESSED) && (method = gv_fetchmethod_autoload (stash, "TO_JSON", 0)))
		759	{
		760	dSP;
		761
		762	ENTER; SAVETMPS; PUSHMARK (SP);
		763	// we re-bless the reference to get overload and other niceties right
		764	XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash));
		765
		766	// calling with G_SCALAR ensures that we always get a 1 return value
		767	PUTBACK;
		768	call_sv ((SV *)GvCV (method), G_SCALAR);
		769	SPAGAIN;
		770
		771	// catch this surprisingly common error
		772	if (SvROK (TOPs) && SvRV (TOPs) == sv)
		773	croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv)));
		774
		775	sv = POPs;
		776	PUTBACK;
		777
		778	encode_sv (enc, sv);
		779
		780	FREETMPS; LEAVE;
		781	}
		782	else if (enc->json.flags & F_ALLOW_BLESSED)
		783	encode_str (enc, "null", 4, 0);
		784	else
		785	croak ("encountered object '%s', but neither allow_blessed, convert_blessed nor allow_tags settings are enabled (or TO_JSON/FREEZE method missing)",
		786	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		787	}
442	if (svt == SVt_PVHV)	788	else if (svt == SVt_PVHV)
443	encode_hv (enc, (HV *)sv);	789	encode_hv (enc, (HV *)sv);
444	else if (svt == SVt_PVAV)	790	else if (svt == SVt_PVAV)
445	encode_av (enc, (AV *)sv);	791	encode_av (enc, (AV *)sv);
446	else if (svt < SVt_PVAV)	792	else if (svt < SVt_PVAV)
447	{	793	{
448	if (SvNIOK (sv) && SvIV (sv) == 0)	794	STRLEN len = 0;
		795	char *pv = svt ? SvPV (sv, len) : 0;
		796
		797	if (len == 1 && *pv == '1')
		798	encode_str (enc, "true", 4, 0);
		799	else if (len == 1 && *pv == '0')
449	encode_str (enc, "false", 5, 0);	800	encode_str (enc, "false", 5, 0);
450	else if (SvNIOK (sv) && SvIV (sv) == 1)	801	else if (enc->json.flags & F_ALLOW_UNKNOWN)
451	encode_str (enc, "true", 4, 0);	802	encode_str (enc, "null", 4, 0);
452	else	803	else
453	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",	804	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
454	SvPV_nolen (sv_2mortal (newRV_inc (sv))));	805	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
455	}	806	}
		807	else if (enc->json.flags & F_ALLOW_UNKNOWN)
		808	encode_str (enc, "null", 4, 0);
456	else	809	else
457	croak ("encountered %s, but JSON can only represent references to arrays or hashes",	810	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
458	SvPV_nolen (sv_2mortal (newRV_inc (sv))));	811	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
459	}	812	}
460		813
…		…
471	encode_str (enc, str, len, SvUTF8 (sv));	824	encode_str (enc, str, len, SvUTF8 (sv));
472	encode_ch (enc, '"');	825	encode_ch (enc, '"');
473	}	826	}
474	else if (SvNOKp (sv))	827	else if (SvNOKp (sv))
475	{	828	{
		829	// trust that perl will do the right thing w.r.t. JSON syntax.
476	need (enc, NV_DIG + 32);	830	need (enc, NV_DIG + 32);
477	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	831	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
478	enc->cur += strlen (enc->cur);	832	enc->cur += strlen (enc->cur);
479	}	833	}
480	else if (SvIOKp (sv))	834	else if (SvIOKp (sv))
481	{	835	{
		836	// we assume we can always read an IV as a UV and vice versa
		837	// we assume two's complement
		838	// we assume no aliasing issues in the union
		839	if (SvIsUV (sv) ? SvUVX (sv) <= 59000
		840	: SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000)
		841	{
		842	// optimise the "small number case"
		843	// code will likely be branchless and use only a single multiplication
		844	// works for numbers up to 59074
		845	I32 i = SvIVX (sv);
		846	U32 u;
		847	char digit, nz = 0;
		848
482	need (enc, 64);	849	need (enc, 6);
		850
		851	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
		852	u = i < 0 ? -i : i;
		853
		854	// convert to 4.28 fixed-point representation
		855	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
		856
		857	// now output digit by digit, each time masking out the integer part
		858	// and multiplying by 5 while moving the decimal point one to the right,
		859	// resulting in a net multiplication by 10.
		860	// we always write the digit to memory but conditionally increment
		861	// the pointer, to enable the use of conditional move instructions.
		862	digit = u >> 28; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0xfffffffUL) 5;
		863	digit = u >> 27; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x7ffffffUL) 5;
		864	digit = u >> 26; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x3ffffffUL) 5;
		865	digit = u >> 25; enc->cur = digit + '0'; enc->cur += (nz = nz \|\| digit); u = (u & 0x1ffffffUL) 5;
		866	digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0'
		867	}
		868	else
		869	{
		870	// large integer, use the (rather slow) snprintf way.
		871	need (enc, IVUV_MAXCHARS);
483	enc->cur +=	872	enc->cur +=
484	SvIsUV(sv)	873	SvIsUV(sv)
485	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))	874	? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
486	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));	875	: snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
		876	}
487	}	877	}
488	else if (SvROK (sv))	878	else if (SvROK (sv))
489	encode_rv (enc, SvRV (sv));	879	encode_rv (enc, SvRV (sv));
490	else if (!SvOK (sv))	880	else if (!SvOK (sv) \|\| enc->json.flags & F_ALLOW_UNKNOWN)
491	encode_str (enc, "null", 4, 0);	881	encode_str (enc, "null", 4, 0);
492	else	882	else
493	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",	883	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, check your input data",
494	SvPV_nolen (sv), SvFLAGS (sv));	884	SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
495	}	885	}
496		886
497	static SV *	887	static SV *
498	encode_json (SV *scalar, U32 flags)	888	encode_json (SV scalar, JSON json)
499	{	889	{
		890	enc_t enc;
		891
500	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))	892	if (!(json->flags & F_ALLOW_NONREF) && !SvROK (scalar))
501	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");	893	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
502		894
503	enc_t enc;	895	enc.json = *json;
504	enc.flags = flags;
505	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	896	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
506	enc.cur = SvPVX (enc.sv);	897	enc.cur = SvPVX (enc.sv);
507	enc.end = SvEND (enc.sv);	898	enc.end = SvEND (enc.sv);
508	enc.indent = 0;	899	enc.indent = 0;
509	enc.maxdepth = DEC_DEPTH (flags);	900	enc.limit = enc.json.flags & F_ASCII ? 0x000080UL
		901	: enc.json.flags & F_LATIN1 ? 0x000100UL
		902	: 0x110000UL;
510		903
511	SvPOK_only (enc.sv);	904	SvPOK_only (enc.sv);
512	encode_sv (&enc, scalar);	905	encode_sv (&enc, scalar);
		906	encode_nl (&enc);
513		907
		908	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
		909	*SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
		910
514	if (!(flags & (F_ASCII \| F_UTF8)))	911	if (!(enc.json.flags & (F_ASCII \| F_LATIN1 \| F_UTF8)))
515	SvUTF8_on (enc.sv);	912	SvUTF8_on (enc.sv);
516		913
517	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
518
519	if (enc.flags & F_SHRINK)	914	if (enc.json.flags & F_SHRINK)
520	shrink (enc.sv);	915	shrink (enc.sv);
521		916
522	return enc.sv;	917	return enc.sv;
523	}	918	}
524		919
…		…
529	typedef struct	924	typedef struct
530	{	925	{
531	char *cur; // current parser pointer	926	char *cur; // current parser pointer
532	char *end; // end of input string	927	char *end; // end of input string
533	const char *err; // parse error, if != 0	928	const char *err; // parse error, if != 0
534	U32 flags; // F_*	929	JSON json;
535	U32 depth; // recursion depth	930	U32 depth; // recursion depth
536	U32 maxdepth; // recursion depth limit	931	U32 maxdepth; // recursion depth limit
537	} dec_t;	932	} dec_t;
538		933
539	static void	934	INLINE void
		935	decode_comment (dec_t *dec)
		936	{
		937	// only '#'-style comments allowed a.t.m.
		938
		939	while (dec->cur && dec->cur != 0x0a && *dec->cur != 0x0d)
		940	++dec->cur;
		941	}
		942
		943	INLINE void
540	decode_ws (dec_t *dec)	944	decode_ws (dec_t *dec)
541	{	945	{
542	for (;;)	946	for (;;)
543	{	947	{
544	char ch = *dec->cur;	948	char ch = *dec->cur;
545		949
546	if (ch > 0x20	950	if (ch > 0x20)
		951	{
		952	if (expect_false (ch == '#'))
		953	{
		954	if (dec->json.flags & F_RELAXED)
		955	decode_comment (dec);
		956	else
		957	break;
		958	}
		959	else
		960	break;
		961	}
547	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))	962	else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)
548	break;	963	break; // parse error, but let higher level handle it, gives better error messages
549		964
550	++dec->cur;	965	++dec->cur;
551	}	966	}
552	}	967	}
553		968
…		…
557	if (*dec->cur != ch) \	972	if (*dec->cur != ch) \
558	ERR (# ch " expected"); \	973	ERR (# ch " expected"); \
559	++dec->cur; \	974	++dec->cur; \
560	SE	975	SE
561		976
562	#define DEC_INC_DEPTH if (++dec->depth > dec->maxdepth) ERR ("json datastructure exceeds maximum nesting level (set a higher max_depth)")	977	#define DEC_INC_DEPTH if (++dec->depth > dec->json.max_depth) ERR (ERR_NESTING_EXCEEDED)
563	#define DEC_DEC_DEPTH --dec->depth	978	#define DEC_DEC_DEPTH --dec->depth
564		979
565	static SV decode_sv (dec_t dec);	980	static SV decode_sv (dec_t dec);
566		981
567	static signed char decode_hexdigit[256];	982	static signed char decode_hexdigit[256];
…		…
570	decode_4hex (dec_t *dec)	985	decode_4hex (dec_t *dec)
571	{	986	{
572	signed char d1, d2, d3, d4;	987	signed char d1, d2, d3, d4;
573	unsigned char cur = (unsigned char )dec->cur;	988	unsigned char cur = (unsigned char )dec->cur;
574		989
575	d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");	990	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected");
576	d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");	991	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected");
577	d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");	992	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected");
578	d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");	993	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected");
579		994
580	dec->cur += 4;	995	dec->cur += 4;
581		996
582	return ((UV)d1) << 12	997	return ((UV)d1) << 12
583	\| ((UV)d2) << 8	998	\| ((UV)d2) << 8
…		…
591	static SV *	1006	static SV *
592	decode_str (dec_t *dec)	1007	decode_str (dec_t *dec)
593	{	1008	{
594	SV *sv = 0;	1009	SV *sv = 0;
595	int utf8 = 0;	1010	int utf8 = 0;
		1011	char *dec_cur = dec->cur;
596		1012
597	do	1013	do
598	{	1014	{
599	char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];	1015	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
600	char *cur = buf;	1016	char *cur = buf;
601		1017
602	do	1018	do
603	{	1019	{
604	unsigned char ch = (unsigned char )dec->cur++;	1020	unsigned char ch = (unsigned char )dec_cur++;
605		1021
606	if (ch == '"')	1022	if (expect_false (ch == '"'))
607	{	1023	{
608	--dec->cur;	1024	--dec_cur;
609	break;	1025	break;
610	}	1026	}
611	else if (ch == '\\')	1027	else if (expect_false (ch == '\\'))
612	{	1028	{
613	switch (*dec->cur)	1029	switch (*dec_cur)
614	{	1030	{
615	case '\\':	1031	case '\\':
616	case '/':	1032	case '/':
617	case '"': cur++ = dec->cur++; break;	1033	case '"': cur++ = dec_cur++; break;
618		1034
619	case 'b': ++dec->cur; *cur++ = '\010'; break;	1035	case 'b': ++dec_cur; *cur++ = '\010'; break;
620	case 't': ++dec->cur; *cur++ = '\011'; break;	1036	case 't': ++dec_cur; *cur++ = '\011'; break;
621	case 'n': ++dec->cur; *cur++ = '\012'; break;	1037	case 'n': ++dec_cur; *cur++ = '\012'; break;
622	case 'f': ++dec->cur; *cur++ = '\014'; break;	1038	case 'f': ++dec_cur; *cur++ = '\014'; break;
623	case 'r': ++dec->cur; *cur++ = '\015'; break;	1039	case 'r': ++dec_cur; *cur++ = '\015'; break;
624		1040
625	case 'u':	1041	case 'u':
626	{	1042	{
627	UV lo, hi;	1043	UV lo, hi;
628	++dec->cur;	1044	++dec_cur;
629		1045
		1046	dec->cur = dec_cur;
630	hi = decode_4hex (dec);	1047	hi = decode_4hex (dec);
		1048	dec_cur = dec->cur;
631	if (hi == (UV)-1)	1049	if (hi == (UV)-1)
632	goto fail;	1050	goto fail;
633		1051
634	// possibly a surrogate pair	1052	// possibly a surrogate pair
635	if (hi >= 0xd800)	1053	if (hi >= 0xd800)
636	if (hi < 0xdc00)	1054	if (hi < 0xdc00)
637	{	1055	{
638	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	1056	if (dec_cur [0] != '\\' \|\| dec_cur [1] != 'u')
639	ERR ("missing low surrogate character in surrogate pair");	1057	ERR ("missing low surrogate character in surrogate pair");
640		1058
641	dec->cur += 2;	1059	dec_cur += 2;
642		1060
		1061	dec->cur = dec_cur;
643	lo = decode_4hex (dec);	1062	lo = decode_4hex (dec);
		1063	dec_cur = dec->cur;
644	if (lo == (UV)-1)	1064	if (lo == (UV)-1)
645	goto fail;	1065	goto fail;
646		1066
647	if (lo < 0xdc00 \|\| lo >= 0xe000)	1067	if (lo < 0xdc00 \|\| lo >= 0xe000)
648	ERR ("surrogate pair expected");	1068	ERR ("surrogate pair expected");
…		…
654		1074
655	if (hi >= 0x80)	1075	if (hi >= 0x80)
656	{	1076	{
657	utf8 = 1;	1077	utf8 = 1;
658		1078
659	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);	1079	cur = encode_utf8 (cur, hi);
660	}	1080	}
661	else	1081	else
662	*cur++ = hi;	1082	*cur++ = hi;
663	}	1083	}
664	break;	1084	break;
665		1085
666	default:	1086	default:
667	--dec->cur;	1087	--dec_cur;
668	ERR ("illegal backslash escape sequence in string");	1088	ERR ("illegal backslash escape sequence in string");
669	}	1089	}
670	}	1090	}
671	else if (ch >= 0x20 && ch <= 0x7f)	1091	else if (expect_true (ch >= 0x20 && ch < 0x80))
672	*cur++ = ch;	1092	*cur++ = ch;
673	else if (ch >= 0x80)	1093	else if (ch >= 0x80)
674	{	1094	{
675	--dec->cur;
676
677	STRLEN clen;	1095	STRLEN clen;
		1096
		1097	--dec_cur;
		1098
678	UV uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);	1099	decode_utf8 (dec_cur, dec->end - dec_cur, &clen);
679	if (clen == (STRLEN)-1)	1100	if (clen == (STRLEN)-1)
680	ERR ("malformed UTF-8 character in JSON string");	1101	ERR ("malformed UTF-8 character in JSON string");
681		1102
682	do	1103	do
683	cur++ = dec->cur++;	1104	cur++ = dec_cur++;
684	while (--clen);	1105	while (--clen);
685		1106
686	utf8 = 1;	1107	utf8 = 1;
687	}	1108	}
688	else	1109	else
689	{	1110	{
690	--dec->cur;	1111	--dec_cur;
691		1112
692	if (!ch)	1113	if (!ch)
693	ERR ("unexpected end of string while parsing JSON string");	1114	ERR ("unexpected end of string while parsing JSON string");
694	else	1115	else
695	ERR ("invalid character encountered while parsing JSON string");	1116	ERR ("invalid character encountered while parsing JSON string");
696	}	1117	}
697	}	1118	}
698	while (cur < buf + SHORT_STRING_LEN);	1119	while (cur < buf + SHORT_STRING_LEN);
699		1120
		1121	{
700	STRLEN len = cur - buf;	1122	STRLEN len = cur - buf;
701		1123
702	if (sv)	1124	if (sv)
703	{	1125	{
704	SvGROW (sv, SvCUR (sv) + len + 1);	1126	STRLEN cur = SvCUR (sv);
		1127
		1128	if (SvLEN (sv) <= cur + len)
		1129	SvGROW (sv, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
		1130
705	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);	1131	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
706	SvCUR_set (sv, SvCUR (sv) + len);	1132	SvCUR_set (sv, SvCUR (sv) + len);
707	}	1133	}
708	else	1134	else
709	sv = newSVpvn (buf, len);	1135	sv = newSVpvn (buf, len);
710	}	1136	}
		1137	}
711	while (*dec->cur != '"');	1138	while (*dec_cur != '"');
712		1139
713	++dec->cur;	1140	++dec_cur;
714		1141
715	if (sv)	1142	if (sv)
716	{	1143	{
717	SvPOK_only (sv);	1144	SvPOK_only (sv);
718	*SvEND (sv) = 0;	1145	*SvEND (sv) = 0;
…		…
721	SvUTF8_on (sv);	1148	SvUTF8_on (sv);
722	}	1149	}
723	else	1150	else
724	sv = newSVpvn ("", 0);	1151	sv = newSVpvn ("", 0);
725		1152
		1153	dec->cur = dec_cur;
726	return sv;	1154	return sv;
727		1155
728	fail:	1156	fail:
		1157	dec->cur = dec_cur;
729	return 0;	1158	return 0;
730	}	1159	}
731		1160
732	static SV *	1161	static SV *
733	decode_num (dec_t *dec)	1162	decode_num (dec_t *dec)
…		…
791	is_nv = 1;	1220	is_nv = 1;
792	}	1221	}
793		1222
794	if (!is_nv)	1223	if (!is_nv)
795	{	1224	{
796	UV uv;	1225	int len = dec->cur - start;
797	int numtype = grok_number (start, dec->cur - start, &uv);	1226
798	if (numtype & IS_NUMBER_IN_UV)	1227	// special case the rather common 1..5-digit-int case
799	if (numtype & IS_NUMBER_NEG)	1228	if (*start == '-')
		1229	switch (len)
800	{	1230	{
801	if (uv < (UV)IV_MIN)	1231	case 2: return newSViv (-(IV)( start [1] - '0' * 1));
802	return newSViv (-(IV)uv);	1232	case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11));
		1233	case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
		1234	case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
		1235	case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111));
803	}	1236	}
		1237	else
		1238	switch (len)
		1239	{
		1240	case 1: return newSViv ( start [0] - '0' * 1);
		1241	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
		1242	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
		1243	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		1244	case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111);
		1245	}
		1246
		1247	{
		1248	UV uv;
		1249	int numtype = grok_number (start, len, &uv);
		1250	if (numtype & IS_NUMBER_IN_UV)
		1251	if (numtype & IS_NUMBER_NEG)
		1252	{
		1253	if (uv < (UV)IV_MIN)
		1254	return newSViv (-(IV)uv);
		1255	}
804	else	1256	else
805	return newSVuv (uv);	1257	return newSVuv (uv);
806	}	1258	}
807		1259
		1260	len -= *start == '-' ? 1 : 0;
		1261
		1262	// does not fit into IV or UV, try NV
		1263	if (len <= NV_DIG)
		1264	// fits into NV without loss of precision
		1265	return newSVnv (json_atof (start));
		1266
		1267	// everything else fails, convert it to a string
		1268	return newSVpvn (start, dec->cur - start);
		1269	}
		1270
		1271	// loss of precision here
808	return newSVnv (Atof (start));	1272	return newSVnv (json_atof (start));
809		1273
810	fail:	1274	fail:
811	return 0;	1275	return 0;
812	}	1276	}
813		1277
…		…
837	if (*dec->cur == ']')	1301	if (*dec->cur == ']')
838	{	1302	{
839	++dec->cur;	1303	++dec->cur;
840	break;	1304	break;
841	}	1305	}
842		1306
843	if (*dec->cur != ',')	1307	if (*dec->cur != ',')
844	ERR (", or ] expected while parsing array");	1308	ERR (", or ] expected while parsing array");
845		1309
846	++dec->cur;	1310	++dec->cur;
		1311
		1312	decode_ws (dec);
		1313
		1314	if (*dec->cur == ']' && dec->json.flags & F_RELAXED)
		1315	{
		1316	++dec->cur;
		1317	break;
		1318	}
847	}	1319	}
848		1320
849	DEC_DEC_DEPTH;	1321	DEC_DEC_DEPTH;
850	return newRV_noinc ((SV *)av);	1322	return newRV_noinc ((SV *)av);
851		1323
…		…
856	}	1328	}
857		1329
858	static SV *	1330	static SV *
859	decode_hv (dec_t *dec)	1331	decode_hv (dec_t *dec)
860	{	1332	{
		1333	SV *sv;
861	HV *hv = newHV ();	1334	HV *hv = newHV ();
862		1335
863	DEC_INC_DEPTH;	1336	DEC_INC_DEPTH;
864	decode_ws (dec);	1337	decode_ws (dec);
865		1338
866	if (*dec->cur == '}')	1339	if (*dec->cur == '}')
867	++dec->cur;	1340	++dec->cur;
868	else	1341	else
869	for (;;)	1342	for (;;)
870	{	1343	{
		1344	EXPECT_CH ('"');
		1345
		1346	// heuristic: assume that
		1347	// a) decode_str + hv_store_ent are abysmally slow.
		1348	// b) most hash keys are short, simple ascii text.
		1349	// => try to "fast-match" such strings to avoid
		1350	// the overhead of decode_str + hv_store_ent.
		1351	{
871	SV key, value;	1352	SV *value;
		1353	char *p = dec->cur;
		1354	char *e = p + 24; // only try up to 24 bytes
872		1355
873	decode_ws (dec); EXPECT_CH ('"');	1356	for (;;)
874
875	key = decode_str (dec);
876	if (!key)
877	goto fail;
878
879	decode_ws (dec); EXPECT_CH (':');
880
881	value = decode_sv (dec);
882	if (!value)
883	{	1357	{
		1358	// the >= 0x80 is false on most architectures
		1359	if (p == e \|\| p < 0x20 \|\| p >= 0x80 \|\| *p == '\\')
		1360	{
		1361	// slow path, back up and use decode_str
		1362	SV *key = decode_str (dec);
		1363	if (!key)
		1364	goto fail;
		1365
		1366	decode_ws (dec); EXPECT_CH (':');
		1367
		1368	decode_ws (dec);
		1369	value = decode_sv (dec);
		1370	if (!value)
		1371	{
		1372	SvREFCNT_dec (key);
		1373	goto fail;
		1374	}
		1375
		1376	hv_store_ent (hv, key, value, 0);
884	SvREFCNT_dec (key);	1377	SvREFCNT_dec (key);
		1378
		1379	break;
		1380	}
		1381	else if (*p == '"')
		1382	{
		1383	// fast path, got a simple key
		1384	char *key = dec->cur;
		1385	int len = p - key;
		1386	dec->cur = p + 1;
		1387
		1388	decode_ws (dec); EXPECT_CH (':');
		1389
		1390	decode_ws (dec);
		1391	value = decode_sv (dec);
		1392	if (!value)
885	goto fail;	1393	goto fail;
		1394
		1395	hv_store (hv, key, len, value, 0);
		1396
		1397	break;
		1398	}
		1399
		1400	++p;
886	}	1401	}
887		1402	}
888	hv_store_ent (hv, key, value, 0);
889	SvREFCNT_dec (key);
890		1403
891	decode_ws (dec);	1404	decode_ws (dec);
892		1405
893	if (*dec->cur == '}')	1406	if (*dec->cur == '}')
894	{	1407	{
…		…
898		1411
899	if (*dec->cur != ',')	1412	if (*dec->cur != ',')
900	ERR (", or } expected while parsing object/hash");	1413	ERR (", or } expected while parsing object/hash");
901		1414
902	++dec->cur;	1415	++dec->cur;
		1416
		1417	decode_ws (dec);
		1418
		1419	if (*dec->cur == '}' && dec->json.flags & F_RELAXED)
		1420	{
		1421	++dec->cur;
		1422	break;
		1423	}
903	}	1424	}
904		1425
905	DEC_DEC_DEPTH;	1426	DEC_DEC_DEPTH;
906	return newRV_noinc ((SV *)hv);	1427	sv = newRV_noinc ((SV *)hv);
		1428
		1429	// check filter callbacks
		1430	if (dec->json.flags & F_HOOK)
		1431	{
		1432	if (dec->json.cb_sk_object && HvKEYS (hv) == 1)
		1433	{
		1434	HE cb, he;
		1435
		1436	hv_iterinit (hv);
		1437	he = hv_iternext (hv);
		1438	hv_iterinit (hv);
		1439
		1440	// the next line creates a mortal sv each time its called.
		1441	// might want to optimise this for common cases.
		1442	cb = hv_fetch_ent (dec->json.cb_sk_object, hv_iterkeysv (he), 0, 0);
		1443
		1444	if (cb)
		1445	{
		1446	dSP;
		1447	int count;
		1448
		1449	ENTER; SAVETMPS; PUSHMARK (SP);
		1450	XPUSHs (HeVAL (he));
		1451	sv_2mortal (sv);
		1452
		1453	PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
		1454
		1455	if (count == 1)
		1456	{
		1457	sv = newSVsv (POPs);
		1458	FREETMPS; LEAVE;
		1459	return sv;
		1460	}
		1461
		1462	SvREFCNT_inc (sv);
		1463	FREETMPS; LEAVE;
		1464	}
		1465	}
		1466
		1467	if (dec->json.cb_object)
		1468	{
		1469	dSP;
		1470	int count;
		1471
		1472	ENTER; SAVETMPS; PUSHMARK (SP);
		1473	XPUSHs (sv_2mortal (sv));
		1474
		1475	PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN;
		1476
		1477	if (count == 1)
		1478	{
		1479	sv = newSVsv (POPs);
		1480	FREETMPS; LEAVE;
		1481	return sv;
		1482	}
		1483
		1484	SvREFCNT_inc (sv);
		1485	FREETMPS; LEAVE;
		1486	}
		1487	}
		1488
		1489	return sv;
907		1490
908	fail:	1491	fail:
909	SvREFCNT_dec (hv);	1492	SvREFCNT_dec (hv);
910	DEC_DEC_DEPTH;	1493	DEC_DEC_DEPTH;
911	return 0;	1494	return 0;
912	}	1495	}
913		1496
914	static SV *	1497	static SV *
		1498	decode_tag (dec_t *dec)
		1499	{
		1500	SV *tag = 0;
		1501	SV *val = 0;
		1502
		1503	if (!(dec->json.flags & F_ALLOW_TAGS))
		1504	ERR ("malformed JSON string, neither array, object, number, string or atom");
		1505
		1506	++dec->cur;
		1507
		1508	decode_ws (dec);
		1509
		1510	tag = decode_sv (dec);
		1511	if (!tag)
		1512	goto fail;
		1513
		1514	if (!SvPOK (tag))
		1515	ERR ("malformed JSON string, (tag) must be a string");
		1516
		1517	decode_ws (dec);
		1518
		1519	if (*dec->cur != ')')
		1520	ERR (") expected after tag");
		1521
		1522	++dec->cur;
		1523
		1524	decode_ws (dec);
		1525
		1526	val = decode_sv (dec);
		1527	if (!val)
		1528	goto fail;
		1529
		1530	if (!SvROK (val) \|\| SvTYPE (SvRV (val)) != SVt_PVAV)
		1531	ERR ("malformed JSON string, tag value must be an array");
		1532
		1533	{
		1534	AV av = (AV )SvRV (val);
		1535	int i, len = av_len (av) + 1;
		1536	HV *stash = gv_stashsv (tag, 0);
		1537	SV *sv;
		1538
		1539	if (!stash)
		1540	ERR ("cannot decode perl-object (package does not exist)");
		1541
		1542	GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
		1543
		1544	if (!method)
		1545	ERR ("cannot decode perl-object (package does not have a THAW method)");
		1546
		1547	dSP;
		1548
		1549	ENTER; SAVETMPS; PUSHMARK (SP);
		1550	EXTEND (SP, len + 2);
		1551	// we re-bless the reference to get overload and other niceties right
		1552	PUSHs (tag);
		1553	PUSHs (sv_json);
		1554
		1555	for (i = 0; i < len; ++i)
		1556	PUSHs (*av_fetch (av, i, 1));
		1557
		1558	PUTBACK;
		1559	call_sv ((SV *)GvCV (method), G_SCALAR);
		1560	SPAGAIN;
		1561
		1562	SvREFCNT_dec (tag);
		1563	SvREFCNT_dec (val);
		1564	sv = SvREFCNT_inc (POPs);
		1565
		1566	PUTBACK;
		1567
		1568	FREETMPS; LEAVE;
		1569
		1570	return sv;
		1571	}
		1572
		1573	fail:
		1574	SvREFCNT_dec (tag);
		1575	SvREFCNT_dec (val);
		1576	return 0;
		1577	}
		1578
		1579	static SV *
915	decode_sv (dec_t *dec)	1580	decode_sv (dec_t *dec)
916	{	1581	{
917	decode_ws (dec);	1582	// the beauty of JSON: you need exactly one character lookahead
		1583	// to parse everything.
918	switch (*dec->cur)	1584	switch (*dec->cur)
919	{	1585	{
920	case '"': ++dec->cur; return decode_str (dec);	1586	case '"': ++dec->cur; return decode_str (dec);
921	case '[': ++dec->cur; return decode_av (dec);	1587	case '[': ++dec->cur; return decode_av (dec);
922	case '{': ++dec->cur; return decode_hv (dec);	1588	case '{': ++dec->cur; return decode_hv (dec);
		1589	case '(': return decode_tag (dec);
923		1590
924	case '-':	1591	case '-':
925	case '0': case '1': case '2': case '3': case '4':	1592	case '0': case '1': case '2': case '3': case '4':
926	case '5': case '6': case '7': case '8': case '9':	1593	case '5': case '6': case '7': case '8': case '9':
927	return decode_num (dec);	1594	return decode_num (dec);
928		1595
929	case 't':	1596	case 't':
930	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))	1597	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
931	{	1598	{
932	dec->cur += 4;	1599	dec->cur += 4;
		1600	#if JSON_SLOW
		1601	types_true = get_bool ("Types::Serialiser::true");
		1602	#endif
933	return newSViv (1);	1603	return newSVsv (types_true);
934	}	1604	}
935	else	1605	else
936	ERR ("'true' expected");	1606	ERR ("'true' expected");
937		1607
938	break;	1608	break;
939		1609
940	case 'f':	1610	case 'f':
941	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))	1611	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
942	{	1612	{
943	dec->cur += 5;	1613	dec->cur += 5;
		1614	#if JSON_SLOW
		1615	types_false = get_bool ("Types::Serialiser::false");
		1616	#endif
944	return newSViv (0);	1617	return newSVsv (types_false);
945	}	1618	}
946	else	1619	else
947	ERR ("'false' expected");	1620	ERR ("'false' expected");
948		1621
949	break;	1622	break;
…		…
958	ERR ("'null' expected");	1631	ERR ("'null' expected");
959		1632
960	break;	1633	break;
961		1634
962	default:	1635	default:
963	ERR ("malformed JSON string, neither array, object, number, string or atom");	1636	ERR ("malformed JSON string, neither tag, array, object, number, string or atom");
964	break;	1637	break;
965	}	1638	}
966		1639
967	fail:	1640	fail:
968	return 0;	1641	return 0;
969	}	1642	}
970		1643
971	static SV *	1644	static SV *
972	decode_json (SV *string, U32 flags)	1645	decode_json (SV string, JSON json, char **offset_return)
973	{	1646	{
		1647	dec_t dec;
974	SV *sv;	1648	SV *sv;
975		1649
		1650	/* work around bugs in 5.10 where manipulating magic values
		1651	* makes perl ignore the magic in subsequent accesses.
		1652	* also make a copy of non-PV values, to get them into a clean
		1653	* state (SvPV should do that, but it's buggy, see below).
		1654	*/
		1655	/SvGETMAGIC (string);/
		1656	if (SvMAGICAL (string) \|\| !SvPOK (string))
		1657	string = sv_2mortal (newSVsv (string));
		1658
976	SvUPGRADE (string, SVt_PV);	1659	SvUPGRADE (string, SVt_PV);
977		1660
978	if (flags & F_UTF8)	1661	/* work around a bug in perl 5.10, which causes SvCUR to fail an
		1662	* assertion with -DDEBUGGING, although SvCUR is documented to
		1663	* return the xpv_cur field which certainly exists after upgrading.
		1664	* according to nicholas clark, calling SvPOK fixes this.
		1665	* But it doesn't fix it, so try another workaround, call SvPV_nolen
		1666	* and hope for the best.
		1667	* Damnit, SvPV_nolen still trips over yet another assertion. This
		1668	* assertion business is seriously broken, try yet another workaround
		1669	* for the broken -DDEBUGGING.
		1670	*/
		1671	{
		1672	#ifdef DEBUGGING
		1673	STRLEN offset = SvOK (string) ? sv_len (string) : 0;
		1674	#else
		1675	STRLEN offset = SvCUR (string);
		1676	#endif
		1677
		1678	if (offset > json->max_size && json->max_size)
		1679	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
		1680	(unsigned long)SvCUR (string), (unsigned long)json->max_size);
		1681	}
		1682
		1683	if (DECODE_WANTS_OCTETS (json))
979	sv_utf8_downgrade (string, 0);	1684	sv_utf8_downgrade (string, 0);
980	else	1685	else
981	sv_utf8_upgrade (string);	1686	sv_utf8_upgrade (string);
982		1687
983	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP	1688	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
984		1689
985	dec_t dec;	1690	dec.json = *json;
986	dec.flags = flags;
987	dec.cur = SvPVX (string);	1691	dec.cur = SvPVX (string);
988	dec.end = SvEND (string);	1692	dec.end = SvEND (string);
989	dec.err = 0;	1693	dec.err = 0;
990	dec.depth = 0;	1694	dec.depth = 0;
991	dec.maxdepth = DEC_DEPTH (dec.flags);
992		1695
		1696	if (dec.json.cb_object \|\| dec.json.cb_sk_object)
		1697	dec.json.flags \|= F_HOOK;
		1698
993	*dec.end = 0; // this should basically be a nop, too, but make sure its there	1699	*dec.end = 0; // this should basically be a nop, too, but make sure it's there
		1700
		1701	decode_ws (&dec);
994	sv = decode_sv (&dec);	1702	sv = decode_sv (&dec);
995		1703
		1704	if (offset_return)
		1705	*offset_return = dec.cur;
		1706
		1707	if (!(offset_return \|\| !sv))
		1708	{
		1709	// check for trailing garbage
		1710	decode_ws (&dec);
		1711
		1712	if (*dec.cur)
		1713	{
		1714	dec.err = "garbage after JSON object";
		1715	SvREFCNT_dec (sv);
		1716	sv = 0;
		1717	}
		1718	}
		1719
996	if (!sv)	1720	if (!sv)
997	{	1721	{
998	IV offset = dec.flags & F_UTF8
999	? dec.cur - SvPVX (string)
1000	: utf8_distance (dec.cur, SvPVX (string));
1001	SV *uni = sv_newmortal ();	1722	SV *uni = sv_newmortal ();
1002		1723
1003	// horrible hack to silence warning inside pv_uni_display	1724	// horrible hack to silence warning inside pv_uni_display
1004	COP cop = *PL_curcop;	1725	COP cop = *PL_curcop;
1005	cop.cop_warnings = pWARN_NONE;	1726	cop.cop_warnings = pWARN_NONE;
…		…
1007	SAVEVPTR (PL_curcop);	1728	SAVEVPTR (PL_curcop);
1008	PL_curcop = &cop;	1729	PL_curcop = &cop;
1009	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);	1730	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
1010	LEAVE;	1731	LEAVE;
1011		1732
1012	croak ("%s, at character offset %d [\"%s\"]",	1733	croak ("%s, at character offset %d (before \"%s\")",
1013	dec.err,	1734	dec.err,
1014	(int)offset,	1735	(int)ptr_to_index (string, dec.cur),
1015	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");	1736	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
1016	}	1737	}
1017		1738
1018	sv = sv_2mortal (sv);	1739	sv = sv_2mortal (sv);
1019		1740
1020	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))	1741	if (!(dec.json.flags & F_ALLOW_NONREF) && !SvROK (sv))
1021	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");	1742	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
1022		1743
1023	return sv;	1744	return sv;
		1745	}
		1746
		1747	/////////////////////////////////////////////////////////////////////////////
		1748	// incremental parser
		1749
		1750	static void
		1751	incr_parse (JSON *self)
		1752	{
		1753	const char *p = SvPVX (self->incr_text) + self->incr_pos;
		1754
		1755	// the state machine here is a bit convoluted and could be simplified a lot
		1756	// but this would make it slower, so...
		1757
		1758	for (;;)
		1759	{
		1760	//printf ("loop pod %d p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), p, p, self->incr_mode, self->incr_nest);//D
		1761	switch (self->incr_mode)
		1762	{
		1763	// only used for initial whitespace skipping
		1764	case INCR_M_WS:
		1765	for (;;)
		1766	{
		1767	if (*p > 0x20)
		1768	{
		1769	if (*p == '#')
		1770	{
		1771	self->incr_mode = INCR_M_C0;
		1772	goto incr_m_c;
		1773	}
		1774	else
		1775	{
		1776	self->incr_mode = INCR_M_JSON;
		1777	goto incr_m_json;
		1778	}
		1779	}
		1780	else if (!*p)
		1781	goto interrupt;
		1782
		1783	++p;
		1784	}
		1785
		1786	// skip a single char inside a string (for \\-processing)
		1787	case INCR_M_BS:
		1788	if (!*p)
		1789	goto interrupt;
		1790
		1791	++p;
		1792	self->incr_mode = INCR_M_STR;
		1793	goto incr_m_str;
		1794
		1795	// inside #-style comments
		1796	case INCR_M_C0:
		1797	case INCR_M_C1:
		1798	incr_m_c:
		1799	for (;;)
		1800	{
		1801	if (*p == '\n')
		1802	{
		1803	self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON;
		1804	break;
		1805	}
		1806	else if (!*p)
		1807	goto interrupt;
		1808
		1809	++p;
		1810	}
		1811
		1812	break;
		1813
		1814	// inside a string
		1815	case INCR_M_STR:
		1816	incr_m_str:
		1817	for (;;)
		1818	{
		1819	if (*p == '"')
		1820	{
		1821	++p;
		1822	self->incr_mode = INCR_M_JSON;
		1823
		1824	if (!self->incr_nest)
		1825	goto interrupt;
		1826
		1827	goto incr_m_json;
		1828	}
		1829	else if (*p == '\\')
		1830	{
		1831	++p; // "virtually" consumes character after \
		1832
		1833	if (!*p) // if at end of string we have to switch modes
		1834	{
		1835	self->incr_mode = INCR_M_BS;
		1836	goto interrupt;
		1837	}
		1838	}
		1839	else if (!*p)
		1840	goto interrupt;
		1841
		1842	++p;
		1843	}
		1844
		1845	// after initial ws, outside string
		1846	case INCR_M_JSON:
		1847	incr_m_json:
		1848	for (;;)
		1849	{
		1850	switch (*p++)
		1851	{
		1852	case 0:
		1853	--p;
		1854	goto interrupt;
		1855
		1856	case 0x09:
		1857	case 0x0a:
		1858	case 0x0d:
		1859	case 0x20:
		1860	if (!self->incr_nest)
		1861	{
		1862	--p; // do not eat the whitespace, let the next round do it
		1863	goto interrupt;
		1864	}
		1865	break;
		1866
		1867	case '"':
		1868	self->incr_mode = INCR_M_STR;
		1869	goto incr_m_str;
		1870
		1871	case '[':
		1872	case '{':
		1873	case '(':
		1874	if (++self->incr_nest > self->max_depth)
		1875	croak (ERR_NESTING_EXCEEDED);
		1876	break;
		1877
		1878	case ']':
		1879	case '}':
		1880	if (--self->incr_nest <= 0)
		1881	goto interrupt;
		1882	break;
		1883
		1884	case ')':
		1885	--self->incr_nest;
		1886	break;
		1887
		1888	case '#':
		1889	self->incr_mode = INCR_M_C1;
		1890	goto incr_m_c;
		1891	}
		1892	}
		1893	}
		1894
		1895	modechange:
		1896	;
		1897	}
		1898
		1899	interrupt:
		1900	self->incr_pos = p - SvPVX (self->incr_text);
		1901	//printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D
		1902	//printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D
1024	}	1903	}
1025		1904
1026	/////////////////////////////////////////////////////////////////////////////	1905	/////////////////////////////////////////////////////////////////////////////
1027	// XS interface functions	1906	// XS interface functions
1028		1907
1029	MODULE = JSON::XS PACKAGE = JSON::XS	1908	MODULE = JSON::XS PACKAGE = JSON::XS
1030		1909
1031	BOOT:	1910	BOOT:
1032	{	1911	{
1033	int i;	1912	int i;
1034
1035	memset (decode_hexdigit, 0xff, 256);
1036		1913
1037	for (i = 0; i < 256; ++i)	1914	for (i = 0; i < 256; ++i)
1038	decode_hexdigit [i] =	1915	decode_hexdigit [i] =
1039	i >= '0' && i <= '9' ? i - '0'	1916	i >= '0' && i <= '9' ? i - '0'
1040	: i >= 'a' && i <= 'f' ? i - 'a' + 10	1917	: i >= 'a' && i <= 'f' ? i - 'a' + 10
1041	: i >= 'A' && i <= 'F' ? i - 'A' + 10	1918	: i >= 'A' && i <= 'F' ? i - 'A' + 10
1042	: -1;	1919	: -1;
1043		1920
1044	json_stash = gv_stashpv ("JSON::XS", 1);	1921	json_stash = gv_stashpv ("JSON::XS" , 1);
		1922	types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
		1923
		1924	types_true = get_bool ("Types::Serialiser::true");
		1925	types_false = get_bool ("Types::Serialiser::false");
		1926
		1927	sv_json = newSVpv ("JSON", 0);
		1928	SvREADONLY_on (sv_json);
		1929
		1930	CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */
1045	}	1931	}
1046		1932
1047	PROTOTYPES: DISABLE	1933	PROTOTYPES: DISABLE
1048		1934
1049	SV new (char dummy)	1935	void CLONE (...)
1050	CODE:	1936	CODE:
1051	RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);	1937	json_stash = 0;
		1938	types_boolean_stash = 0;
		1939
		1940	void new (char *klass)
		1941	PPCODE:
		1942	{
		1943	SV *pv = NEWSV (0, sizeof (JSON));
		1944	SvPOK_only (pv);
		1945	json_init ((JSON *)SvPVX (pv));
		1946	XPUSHs (sv_2mortal (sv_bless (
		1947	newRV_noinc (pv),
		1948	strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
		1949	)));
		1950	}
		1951
		1952	void ascii (JSON *self, int enable = 1)
		1953	ALIAS:
		1954	ascii = F_ASCII
		1955	latin1 = F_LATIN1
		1956	utf8 = F_UTF8
		1957	indent = F_INDENT
		1958	canonical = F_CANONICAL
		1959	space_before = F_SPACE_BEFORE
		1960	space_after = F_SPACE_AFTER
		1961	pretty = F_PRETTY
		1962	allow_nonref = F_ALLOW_NONREF
		1963	shrink = F_SHRINK
		1964	allow_blessed = F_ALLOW_BLESSED
		1965	convert_blessed = F_CONV_BLESSED
		1966	relaxed = F_RELAXED
		1967	allow_unknown = F_ALLOW_UNKNOWN
		1968	allow_tags = F_ALLOW_TAGS
		1969	PPCODE:
		1970	{
		1971	if (enable)
		1972	self->flags \|= ix;
		1973	else
		1974	self->flags &= ~ix;
		1975
		1976	XPUSHs (ST (0));
		1977	}
		1978
		1979	void get_ascii (JSON *self)
		1980	ALIAS:
		1981	get_ascii = F_ASCII
		1982	get_latin1 = F_LATIN1
		1983	get_utf8 = F_UTF8
		1984	get_indent = F_INDENT
		1985	get_canonical = F_CANONICAL
		1986	get_space_before = F_SPACE_BEFORE
		1987	get_space_after = F_SPACE_AFTER
		1988	get_allow_nonref = F_ALLOW_NONREF
		1989	get_shrink = F_SHRINK
		1990	get_allow_blessed = F_ALLOW_BLESSED
		1991	get_convert_blessed = F_CONV_BLESSED
		1992	get_relaxed = F_RELAXED
		1993	get_allow_unknown = F_ALLOW_UNKNOWN
		1994	get_allow_tags = F_ALLOW_TAGS
		1995	PPCODE:
		1996	XPUSHs (boolSV (self->flags & ix));
		1997
		1998	void max_depth (JSON *self, U32 max_depth = 0x80000000UL)
		1999	PPCODE:
		2000	self->max_depth = max_depth;
		2001	XPUSHs (ST (0));
		2002
		2003	U32 get_max_depth (JSON *self)
		2004	CODE:
		2005	RETVAL = self->max_depth;
1052	OUTPUT:	2006	OUTPUT:
1053	RETVAL	2007	RETVAL
1054		2008
1055	SV ascii (SV self, int enable = 1)	2009	void max_size (JSON *self, U32 max_size = 0)
1056	ALIAS:	2010	PPCODE:
1057	ascii = F_ASCII	2011	self->max_size = max_size;
1058	utf8 = F_UTF8	2012	XPUSHs (ST (0));
1059	indent = F_INDENT	2013
1060	canonical = F_CANONICAL	2014	int get_max_size (JSON *self)
1061	space_before = F_SPACE_BEFORE
1062	space_after = F_SPACE_AFTER
1063	pretty = F_PRETTY
1064	allow_nonref = F_ALLOW_NONREF
1065	shrink = F_SHRINK
1066	CODE:	2015	CODE:
1067	{	2016	RETVAL = self->max_size;
1068	UV *uv = SvJSON (self);
1069	if (enable)
1070	*uv \|= ix;
1071	else
1072	*uv &= ~ix;
1073
1074	RETVAL = newSVsv (self);
1075	}
1076	OUTPUT:	2017	OUTPUT:
1077	RETVAL	2018	RETVAL
1078		2019
1079	SV max_depth (SV self, int max_depth = 0x80000000UL)	2020	void filter_json_object (JSON self, SV cb = &PL_sv_undef)
		2021	PPCODE:
		2022	{
		2023	SvREFCNT_dec (self->cb_object);
		2024	self->cb_object = SvOK (cb) ? newSVsv (cb) : 0;
		2025
		2026	XPUSHs (ST (0));
		2027	}
		2028
		2029	void filter_json_single_key_object (JSON self, SV key, SV *cb = &PL_sv_undef)
		2030	PPCODE:
		2031	{
		2032	if (!self->cb_sk_object)
		2033	self->cb_sk_object = newHV ();
		2034
		2035	if (SvOK (cb))
		2036	hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0);
		2037	else
		2038	{
		2039	hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0);
		2040
		2041	if (!HvKEYS (self->cb_sk_object))
		2042	{
		2043	SvREFCNT_dec (self->cb_sk_object);
		2044	self->cb_sk_object = 0;
		2045	}
		2046	}
		2047
		2048	XPUSHs (ST (0));
		2049	}
		2050
		2051	void encode (JSON self, SV scalar)
		2052	PPCODE:
		2053	PUTBACK; scalar = encode_json (scalar, self); SPAGAIN;
		2054	XPUSHs (scalar);
		2055
		2056	void decode (JSON self, SV jsonstr)
		2057	PPCODE:
		2058	PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN;
		2059	XPUSHs (jsonstr);
		2060
		2061	void decode_prefix (JSON self, SV jsonstr)
		2062	PPCODE:
		2063	{
		2064	SV *sv;
		2065	char *offset;
		2066	PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN;
		2067	EXTEND (SP, 2);
		2068	PUSHs (sv);
		2069	PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset))));
		2070	}
		2071
		2072	void incr_parse (JSON self, SV jsonstr = 0)
		2073	PPCODE:
		2074	{
		2075	if (!self->incr_text)
		2076	self->incr_text = newSVpvn ("", 0);
		2077
		2078	/* if utf8-ness doesn't match the decoder, need to upgrade/downgrade */
		2079	if (!DECODE_WANTS_OCTETS (self) == !SvUTF8 (self->incr_text))
		2080	if (DECODE_WANTS_OCTETS (self))
		2081	{
		2082	if (self->incr_pos)
		2083	self->incr_pos = utf8_length ((U8 *)SvPVX (self->incr_text),
		2084	(U8 *)SvPVX (self->incr_text) + self->incr_pos);
		2085
		2086	sv_utf8_downgrade (self->incr_text, 0);
		2087	}
		2088	else
		2089	{
		2090	sv_utf8_upgrade (self->incr_text);
		2091
		2092	if (self->incr_pos)
		2093	self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
		2094	- (U8 *)SvPVX (self->incr_text);
		2095	}
		2096
		2097	// append data, if any
		2098	if (jsonstr)
		2099	{
		2100	/* make sure both strings have same encoding */
		2101	if (SvUTF8 (jsonstr) != SvUTF8 (self->incr_text))
		2102	if (SvUTF8 (jsonstr))
		2103	sv_utf8_downgrade (jsonstr, 0);
		2104	else
		2105	sv_utf8_upgrade (jsonstr);
		2106
		2107	/* and then just blindly append */
		2108	{
		2109	STRLEN len;
		2110	const char *str = SvPV (jsonstr, len);
		2111	STRLEN cur = SvCUR (self->incr_text);
		2112
		2113	if (SvLEN (self->incr_text) <= cur + len)
		2114	SvGROW (self->incr_text, cur + (len < (cur >> 2) ? cur >> 2 : len) + 1);
		2115
		2116	Move (str, SvEND (self->incr_text), len, char);
		2117	SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len);
		2118	*SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there
		2119	}
		2120	}
		2121
		2122	if (GIMME_V != G_VOID)
		2123	do
		2124	{
		2125	SV *sv;
		2126	char *offset;
		2127
		2128	if (!INCR_DONE (self))
		2129	{
		2130	incr_parse (self);
		2131
		2132	if (self->incr_pos > self->max_size && self->max_size)
		2133	croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
		2134	(unsigned long)self->incr_pos, (unsigned long)self->max_size);
		2135
		2136	if (!INCR_DONE (self))
		2137	{
		2138	// as an optimisation, do not accumulate white space in the incr buffer
		2139	if (self->incr_mode == INCR_M_WS && self->incr_pos)
		2140	{
		2141	self->incr_pos = 0;
		2142	SvCUR_set (self->incr_text, 0);
		2143	}
		2144
		2145	break;
		2146	}
		2147	}
		2148
		2149	PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN;
		2150	XPUSHs (sv);
		2151
		2152	self->incr_pos -= offset - SvPVX (self->incr_text);
		2153	self->incr_nest = 0;
		2154	self->incr_mode = 0;
		2155
		2156	sv_chop (self->incr_text, offset);
		2157	}
		2158	while (GIMME_V == G_ARRAY);
		2159	}
		2160
		2161	SV incr_text (JSON self)
		2162	ATTRS: lvalue
1080	CODE:	2163	CODE:
1081	{	2164	{
1082	UV *uv = SvJSON (self);	2165	if (self->incr_pos)
1083	UV log2 = 0;	2166	croak ("incr_text can not be called when the incremental parser already started parsing");
1084		2167
1085	if (max_depth > 0x80000000UL) max_depth = 0x80000000UL;	2168	RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef;
1086
1087	while ((1UL << log2) < max_depth)
1088	++log2;
1089
1090	uv = uv & ~F_MAXDEPTH \| (log2 << S_MAXDEPTH);
1091
1092	RETVAL = newSVsv (self);
1093	}	2169	}
1094	OUTPUT:	2170	OUTPUT:
1095	RETVAL	2171	RETVAL
1096		2172
		2173	void incr_skip (JSON *self)
		2174	CODE:
		2175	{
		2176	if (self->incr_pos)
		2177	{
		2178	sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos);
		2179	self->incr_pos = 0;
		2180	self->incr_nest = 0;
		2181	self->incr_mode = 0;
		2182	}
		2183	}
		2184
		2185	void incr_reset (JSON *self)
		2186	CODE:
		2187	{
		2188	SvREFCNT_dec (self->incr_text);
		2189	self->incr_text = 0;
		2190	self->incr_pos = 0;
		2191	self->incr_nest = 0;
		2192	self->incr_mode = 0;
		2193	}
		2194
		2195	void DESTROY (JSON *self)
		2196	CODE:
		2197	SvREFCNT_dec (self->cb_sk_object);
		2198	SvREFCNT_dec (self->cb_object);
		2199	SvREFCNT_dec (self->incr_text);
		2200
		2201	PROTOTYPES: ENABLE
		2202
1097	void encode (SV self, SV scalar)	2203	void encode_json (SV *scalar)
1098	PPCODE:	2204	PPCODE:
1099	XPUSHs (encode_json (scalar, *SvJSON (self)));	2205	{
		2206	JSON json;
		2207	json_init (&json);
		2208	json.flags \|= F_UTF8;
		2209	PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN;
		2210	XPUSHs (scalar);
		2211	}
1100		2212
1101	void decode (SV self, SV jsonstr)	2213	void decode_json (SV *jsonstr)
1102	PPCODE:	2214	PPCODE:
1103	XPUSHs (decode_json (jsonstr, *SvJSON (self)));	2215	{
		2216	JSON json;
		2217	json_init (&json);
		2218	json.flags \|= F_UTF8;
		2219	PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN;
		2220	XPUSHs (jsonstr);
		2221	}
1104		2222
1105	PROTOTYPES: ENABLE
1106
1107	void to_json (SV *scalar)
1108	ALIAS:
1109	objToJson = 0
1110	PPCODE:
1111	XPUSHs (encode_json (scalar, F_DEFAULT \| F_UTF8));
1112
1113	void from_json (SV *jsonstr)
1114	ALIAS:
1115	jsonToObj = 0
1116	PPCODE:
1117	XPUSHs (decode_json (jsonstr, F_DEFAULT \| F_UTF8));
1118

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.24 by root, Tue Apr 3 23:59:04 2007 UTC vs. Revision 1.122 by root, Tue Oct 29 15:55:49 2013 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.24 by root, Tue Apr 3 23:59:04 2007 UTC vs.
Revision 1.122 by root, Tue Oct 29 15:55:49 2013 UTC