[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.18 by root, Sun Mar 25 21:19:13 2007 UTC vs.
Revision 1.35 by root, Wed Jun 6 14:52:49 2007 UTC

…		…
3	#include "XSUB.h"	3	#include "XSUB.h"
4		4
5	#include "assert.h"	5	#include "assert.h"
6	#include "string.h"	6	#include "string.h"
7	#include "stdlib.h"	7	#include "stdlib.h"
		8	#include "stdio.h"
		9
		10	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		11	# define snprintf _snprintf // C compilers have this in stdio.h
		12	#endif
8		13
9	#define F_ASCII 0x00000001UL	14	#define F_ASCII 0x00000001UL
		15	#define F_LATIN1 0x00000002UL
10	#define F_UTF8 0x00000002UL	16	#define F_UTF8 0x00000004UL
11	#define F_INDENT 0x00000004UL	17	#define F_INDENT 0x00000008UL
12	#define F_CANONICAL 0x00000008UL	18	#define F_CANONICAL 0x00000010UL
13	#define F_SPACE_BEFORE 0x00000010UL	19	#define F_SPACE_BEFORE 0x00000020UL
14	#define F_SPACE_AFTER 0x00000020UL	20	#define F_SPACE_AFTER 0x00000040UL
15	#define F_ALLOW_NONREF 0x00000080UL	21	#define F_ALLOW_NONREF 0x00000100UL
16	#define F_SHRINK 0x00000100UL	22	#define F_SHRINK 0x00000200UL
17	#define F_MAXDEPTH 0xf8000000UL	23	#define F_MAXDEPTH 0xf8000000UL
18	#define S_MAXDEPTH 27	24	#define S_MAXDEPTH 27
19		25
20	#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH))	26	#define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH))
21		27
22	// F_SELFCONVERT? <=> to_json/toJson	28	// F_SELFCONVERT? <=> to_json/toJson
23	// F_BLESSED? <=> { $__class__$ => }	29	// F_BLESSED? <=> { $__class__$ => }
24		30
25	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	31	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
26	#define F_DEFAULT (13UL << S_MAXDEPTH)	32	#define F_DEFAULT (9UL << S_MAXDEPTH)
27		33
28	#define INIT_SIZE 32 // initial scalar size to be allocated	34	#define INIT_SIZE 32 // initial scalar size to be allocated
29	#define INDENT_STEP 3 // spaces per indentation level	35	#define INDENT_STEP 3 // spaces per indentation level
30		36
31	#define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
32	#define SHORT_STRING_LEN 512 // special-case strings of up to this size	37	#define SHORT_STRING_LEN 512 // special-case strings of up to this size
33		38
34	#define SB do {	39	#define SB do {
35	#define SE } while (0)	40	#define SE } while (0)
		41
		42	#if __GNUC__ >= 3
		43	# define expect(expr,value) __builtin_expect ((expr),(value))
		44	# define inline inline
		45	#else
		46	# define expect(expr,value) (expr)
		47	# define inline static
		48	#endif
		49
		50	#define expect_false(expr) expect ((expr) != 0, 0)
		51	#define expect_true(expr) expect ((expr) != 0, 1)
36		52
37	static HV *json_stash; // JSON::XS::	53	static HV *json_stash; // JSON::XS::
38		54
39	/////////////////////////////////////////////////////////////////////////////	55	/////////////////////////////////////////////////////////////////////////////
40	// utility functions	56	// utility functions
…		…
65	// decode an utf-8 character and return it, or (UV)-1 in	81	// decode an utf-8 character and return it, or (UV)-1 in
66	// case of an error.	82	// case of an error.
67	// we special-case "safe" characters from U+80 .. U+7FF,	83	// we special-case "safe" characters from U+80 .. U+7FF,
68	// but use the very good perl function to parse anything else.	84	// but use the very good perl function to parse anything else.
69	// note that we never call this function for a ascii codepoints	85	// note that we never call this function for a ascii codepoints
70	static UV	86	inline UV
71	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)	87	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
72	{	88	{
73	if (s[0] > 0xdf \|\| s[0] < 0xc2)	89	if (expect_false (s[0] > 0xdf \|\| s[0] < 0xc2))
74	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);	90	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
75	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)	91	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)
76	{	92	{
77	*clen = 2;	93	*clen = 2;
78	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);	94	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
79	}	95	}
80	else	96	else
		97	{
		98	*clen = (STRLEN)-1;
81	return (UV)-1;	99	return (UV)-1;
		100	}
82	}	101	}
83		102
84	/////////////////////////////////////////////////////////////////////////////	103	/////////////////////////////////////////////////////////////////////////////
85	// encoder	104	// encoder
86		105
…		…
93	U32 flags; // F_*	112	U32 flags; // F_*
94	U32 indent; // indentation level	113	U32 indent; // indentation level
95	U32 maxdepth; // max. indentation/recursion level	114	U32 maxdepth; // max. indentation/recursion level
96	} enc_t;	115	} enc_t;
97		116
98	static void	117	inline void
99	need (enc_t *enc, STRLEN len)	118	need (enc_t *enc, STRLEN len)
100	{	119	{
101	if (enc->cur + len >= enc->end)	120	if (expect_false (enc->cur + len >= enc->end))
102	{	121	{
103	STRLEN cur = enc->cur - SvPVX (enc->sv);	122	STRLEN cur = enc->cur - SvPVX (enc->sv);
104	SvGROW (enc->sv, cur + len + 1);	123	SvGROW (enc->sv, cur + len + 1);
105	enc->cur = SvPVX (enc->sv) + cur;	124	enc->cur = SvPVX (enc->sv) + cur;
106	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);	125	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1;
107	}	126	}
108	}	127	}
109		128
110	static void	129	inline void
111	encode_ch (enc_t *enc, char ch)	130	encode_ch (enc_t *enc, char ch)
112	{	131	{
113	need (enc, 1);	132	need (enc, 1);
114	*enc->cur++ = ch;	133	*enc->cur++ = ch;
115	}	134	}
…		…
123		142
124	while (str < end)	143	while (str < end)
125	{	144	{
126	unsigned char ch = (unsigned char )str;	145	unsigned char ch = (unsigned char )str;
127		146
128	if (ch >= 0x20 && ch < 0x80) // most common case	147	if (expect_true (ch >= 0x20 && ch < 0x80)) // most common case
129	{	148	{
130	if (ch == '"') // but with slow exceptions	149	if (expect_false (ch == '"')) // but with slow exceptions
131	{	150	{
132	need (enc, len += 1);	151	need (enc, len += 1);
133	*enc->cur++ = '\\';	152	*enc->cur++ = '\\';
134	*enc->cur++ = '"';	153	*enc->cur++ = '"';
135	}	154	}
136	else if (ch == '\\')	155	else if (expect_false (ch == '\\'))
137	{	156	{
138	need (enc, len += 1);	157	need (enc, len += 1);
139	*enc->cur++ = '\\';	158	*enc->cur++ = '\\';
140	*enc->cur++ = '\\';	159	*enc->cur++ = '\\';
141	}	160	}
…		…
173	}	192	}
174		193
175	if (uch > 0x10FFFFUL)	194	if (uch > 0x10FFFFUL)
176	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);	195	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
177		196
178	if (uch < 0x80 \|\| enc->flags & F_ASCII)	197	if (uch < 0x80 \|\| enc->flags & F_ASCII \|\| (enc->flags & F_LATIN1 && uch > 0xFF))
179	{	198	{
180	if (uch > 0xFFFFUL)	199	if (uch > 0xFFFFUL)
181	{	200	{
182	need (enc, len += 11);	201	need (enc, len += 11);
183	sprintf (enc->cur, "\\u%04x\\u%04x",	202	sprintf (enc->cur, "\\u%04x\\u%04x",
…		…
197	*enc->cur++ = hexdigit [(uch >> 0) & 15];	216	*enc->cur++ = hexdigit [(uch >> 0) & 15];
198	}	217	}
199		218
200	str += clen;	219	str += clen;
201	}	220	}
		221	else if (enc->flags & F_LATIN1)
		222	{
		223	*enc->cur++ = uch;
		224	str += clen;
		225	}
202	else if (is_utf8)	226	else if (is_utf8)
203	{	227	{
204	need (enc, len += clen);	228	need (enc, len += clen);
205	do	229	do
206	{	230	{
…		…
208	}	232	}
209	while (--clen);	233	while (--clen);
210	}	234	}
211	else	235	else
212	{	236	{
213	need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed	237	need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed
214	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	238	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
215	++str;	239	++str;
216	}	240	}
217	}	241	}
218	}	242	}
…		…
220		244
221	--len;	245	--len;
222	}	246	}
223	}	247	}
224		248
225	static void	249	inline void
226	encode_indent (enc_t *enc)	250	encode_indent (enc_t *enc)
227	{	251	{
228	if (enc->flags & F_INDENT)	252	if (enc->flags & F_INDENT)
229	{	253	{
230	int spaces = enc->indent * INDENT_STEP;	254	int spaces = enc->indent * INDENT_STEP;
…		…
233	memset (enc->cur, ' ', spaces);	257	memset (enc->cur, ' ', spaces);
234	enc->cur += spaces;	258	enc->cur += spaces;
235	}	259	}
236	}	260	}
237		261
238	static void	262	inline void
239	encode_space (enc_t *enc)	263	encode_space (enc_t *enc)
240	{	264	{
241	need (enc, 1);	265	need (enc, 1);
242	encode_ch (enc, ' ');	266	encode_ch (enc, ' ');
243	}	267	}
244		268
245	static void	269	inline void
246	encode_nl (enc_t *enc)	270	encode_nl (enc_t *enc)
247	{	271	{
248	if (enc->flags & F_INDENT)	272	if (enc->flags & F_INDENT)
249	{	273	{
250	need (enc, 1);	274	need (enc, 1);
251	encode_ch (enc, '\n');	275	encode_ch (enc, '\n');
252	}	276	}
253	}	277	}
254		278
255	static void	279	inline void
256	encode_comma (enc_t *enc)	280	encode_comma (enc_t *enc)
257	{	281	{
258	encode_ch (enc, ',');	282	encode_ch (enc, ',');
259		283
260	if (enc->flags & F_INDENT)	284	if (enc->flags & F_INDENT)
…		…
267		291
268	static void	292	static void
269	encode_av (enc_t enc, AV av)	293	encode_av (enc_t enc, AV av)
270	{	294	{
271	int i, len = av_len (av);	295	int i, len = av_len (av);
		296
		297	if (enc->indent >= enc->maxdepth)
		298	croak ("data structure too deep (hit recursion limit)");
272		299
273	encode_ch (enc, '['); encode_nl (enc);	300	encode_ch (enc, '['); encode_nl (enc);
274	++enc->indent;	301	++enc->indent;
275		302
276	for (i = 0; i <= len; ++i)	303	for (i = 0; i <= len; ++i)
…		…
343	static void	370	static void
344	encode_hv (enc_t enc, HV hv)	371	encode_hv (enc_t enc, HV hv)
345	{	372	{
346	int count, i;	373	int count, i;
347		374
		375	if (enc->indent >= enc->maxdepth)
		376	croak ("data structure too deep (hit recursion limit)");
		377
348	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;	378	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
349		379
350	if ((count = hv_iterinit (hv)))	380	if ((count = hv_iterinit (hv)))
351	{	381	{
352	// for canonical output we have to sort by keys first	382	// for canonical output we have to sort by keys first
353	// actually, this is mostly due to the stupid so-called	383	// actually, this is mostly due to the stupid so-called
354	// security workaround added somewhere in 5.8.x.	384	// security workaround added somewhere in 5.8.x.
355	// that randomises hash orderings	385	// that randomises hash orderings
356	if (enc->flags & F_CANONICAL)	386	if (enc->flags & F_CANONICAL)
357	{	387	{
358	HE he, hes [count]; // if your compiler dies here, you need to enable C99 mode
359	int fast = 1;	388	int fast = 1;
		389	HE *he;
		390	#if defined(__BORLANDC__) \|\| defined(_MSC_VER)
		391	HE *hes = _alloca (count sizeof (HE));
		392	#else
		393	HE *hes [count]; // if your compiler dies here, you need to enable C99 mode
		394	#endif
360		395
361	i = 0;	396	i = 0;
362	while ((he = hv_iternext (hv)))	397	while ((he = hv_iternext (hv)))
363	{	398	{
364	hes [i++] = he;	399	hes [i++] = he;
…		…
399		434
400	encode_nl (enc);	435	encode_nl (enc);
401	}	436	}
402	else	437	else
403	{	438	{
404	SV *sv;
405	HE *he = hv_iternext (hv);	439	HE *he = hv_iternext (hv);
406		440
407	for (;;)	441	for (;;)
408	{	442	{
409	encode_indent (enc);	443	encode_indent (enc);
…		…
418	encode_nl (enc);	452	encode_nl (enc);
419	}	453	}
420	}	454	}
421		455
422	--enc->indent; encode_indent (enc); encode_ch (enc, '}');	456	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
		457	}
		458
		459	// encode objects, arrays and special \0=false and \1=true values.
		460	static void
		461	encode_rv (enc_t enc, SV sv)
		462	{
		463	svtype svt;
		464
		465	SvGETMAGIC (sv);
		466	svt = SvTYPE (sv);
		467
		468	if (svt == SVt_PVHV)
		469	encode_hv (enc, (HV *)sv);
		470	else if (svt == SVt_PVAV)
		471	encode_av (enc, (AV *)sv);
		472	else if (svt < SVt_PVAV)
		473	{
		474	if (SvNIOK (sv) && SvIV (sv) == 0)
		475	encode_str (enc, "false", 5, 0);
		476	else if (SvNIOK (sv) && SvIV (sv) == 1)
		477	encode_str (enc, "true", 4, 0);
		478	else
		479	croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
		480	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
		481	}
		482	else
		483	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
		484	SvPV_nolen (sv_2mortal (newRV_inc (sv))));
423	}	485	}
424		486
425	static void	487	static void
426	encode_sv (enc_t enc, SV sv)	488	encode_sv (enc_t enc, SV sv)
427	{	489	{
…		…
441	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);	503	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
442	enc->cur += strlen (enc->cur);	504	enc->cur += strlen (enc->cur);
443	}	505	}
444	else if (SvIOKp (sv))	506	else if (SvIOKp (sv))
445	{	507	{
		508	// we assume we can always read an IV as a UV
		509	if (SvUV (sv) & ~(UV)0x7fff)
		510	{
446	need (enc, 64);	511	need (enc, 32);
447	enc->cur +=	512	enc->cur +=
448	SvIsUV(sv)	513	SvIsUV(sv)
449	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))	514	? snprintf (enc->cur, 32, "%"UVuf, (UV)SvUVX (sv))
450	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));	515	: snprintf (enc->cur, 32, "%"IVdf, (IV)SvIVX (sv));
		516	}
		517	else
		518	{
		519	// optimise the "small number case"
		520	// code will likely be branchless and use only a single multiplication
		521	I32 i = SvIV (sv);
		522	U32 u;
		523
		524	need (enc, 6);
		525
		526	*enc->cur = '-'; enc->cur += i < 0 ? 1 : 0;
		527	u = i < 0 ? -i : i;
		528
		529	// convert to 4.28 fixed-point representation
		530	u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits
		531
		532	char digit, nz = 0;
		533
		534	digit = u >> 28; enc->cur = digit + '0'; nz \|= digit; enc->cur += nz ? 1 : 0; u = (u & 0xfffffff) 5;
		535	digit = u >> 27; enc->cur = digit + '0'; nz \|= digit; enc->cur += nz ? 1 : 0; u = (u & 0x7ffffff) 5;
		536	digit = u >> 26; enc->cur = digit + '0'; nz \|= digit; enc->cur += nz ? 1 : 0; u = (u & 0x3ffffff) 5;
		537	digit = u >> 25; enc->cur = digit + '0'; nz \|= digit; enc->cur += nz ? 1 : 0; u = (u & 0x1ffffff) 5;
		538	digit = u >> 24; *enc->cur = digit + '0'; nz \|= digit; enc->cur += 1;
		539	}
451	}	540	}
452	else if (SvROK (sv))	541	else if (SvROK (sv))
453	{	542	encode_rv (enc, SvRV (sv));
454	SV *rv = SvRV (sv);
455
456	if (enc->indent >= enc->maxdepth)
457	croak ("data structure too deep (hit recursion limit)");
458
459	switch (SvTYPE (rv))
460	{
461	case SVt_PVAV: encode_av (enc, (AV *)rv); break;
462	case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
463
464	default:
465	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
466	SvPV_nolen (sv));
467	}
468	}
469	else if (!SvOK (sv))	543	else if (!SvOK (sv))
470	encode_str (enc, "null", 4, 0);	544	encode_str (enc, "null", 4, 0);
471	else	545	else
472	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",	546	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
473	SvPV_nolen (sv), SvFLAGS (sv));	547	SvPV_nolen (sv), SvFLAGS (sv));
474	}	548	}
475		549
476	static SV *	550	static SV *
477	encode_json (SV *scalar, U32 flags)	551	encode_json (SV *scalar, U32 flags)
478	{	552	{
		553	enc_t enc;
		554
479	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))	555	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
480	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");	556	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
481		557
482	enc_t enc;
483	enc.flags = flags;	558	enc.flags = flags;
484	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	559	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
485	enc.cur = SvPVX (enc.sv);	560	enc.cur = SvPVX (enc.sv);
486	enc.end = SvEND (enc.sv);	561	enc.end = SvEND (enc.sv);
487	enc.indent = 0;	562	enc.indent = 0;
488	enc.maxdepth = DEC_DEPTH (flags);	563	enc.maxdepth = DEC_DEPTH (flags);
489		564
490	SvPOK_only (enc.sv);	565	SvPOK_only (enc.sv);
491	encode_sv (&enc, scalar);	566	encode_sv (&enc, scalar);
492		567
		568	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
		569	*SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
		570
493	if (!(flags & (F_ASCII \| F_UTF8)))	571	if (!(flags & (F_ASCII \| F_LATIN1 \| F_UTF8)))
494	SvUTF8_on (enc.sv);	572	SvUTF8_on (enc.sv);
495
496	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
497		573
498	if (enc.flags & F_SHRINK)	574	if (enc.flags & F_SHRINK)
499	shrink (enc.sv);	575	shrink (enc.sv);
500		576
501	return enc.sv;	577	return enc.sv;
…		…
513	U32 flags; // F_*	589	U32 flags; // F_*
514	U32 depth; // recursion depth	590	U32 depth; // recursion depth
515	U32 maxdepth; // recursion depth limit	591	U32 maxdepth; // recursion depth limit
516	} dec_t;	592	} dec_t;
517		593
518	static void	594	inline void
519	decode_ws (dec_t *dec)	595	decode_ws (dec_t *dec)
520	{	596	{
521	for (;;)	597	for (;;)
522	{	598	{
523	char ch = *dec->cur;	599	char ch = *dec->cur;
…		…
549	decode_4hex (dec_t *dec)	625	decode_4hex (dec_t *dec)
550	{	626	{
551	signed char d1, d2, d3, d4;	627	signed char d1, d2, d3, d4;
552	unsigned char cur = (unsigned char )dec->cur;	628	unsigned char cur = (unsigned char )dec->cur;
553		629
554	d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");	630	d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("four hexadecimal digits expected");
555	d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");	631	d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("four hexadecimal digits expected");
556	d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");	632	d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("four hexadecimal digits expected");
557	d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");	633	d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("four hexadecimal digits expected");
558		634
559	dec->cur += 4;	635	dec->cur += 4;
560		636
561	return ((UV)d1) << 12	637	return ((UV)d1) << 12
562	\| ((UV)d2) << 8	638	\| ((UV)d2) << 8
…		…
573	SV *sv = 0;	649	SV *sv = 0;
574	int utf8 = 0;	650	int utf8 = 0;
575		651
576	do	652	do
577	{	653	{
578	char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];	654	char buf [SHORT_STRING_LEN + UTF8_MAXBYTES];
579	char *cur = buf;	655	char *cur = buf;
580		656
581	do	657	do
582	{	658	{
583	unsigned char ch = (unsigned char )dec->cur++;	659	unsigned char ch = (unsigned char )dec->cur++;
584		660
585	if (ch == '"')	661	if (expect_false (ch == '"'))
586	{	662	{
587	--dec->cur;	663	--dec->cur;
588	break;	664	break;
589	}	665	}
590	else if (ch == '\\')	666	else if (expect_false (ch == '\\'))
591	{	667	{
592	switch (*dec->cur)	668	switch (*dec->cur)
593	{	669	{
594	case '\\':	670	case '\\':
595	case '/':	671	case '/':
…		…
645	default:	721	default:
646	--dec->cur;	722	--dec->cur;
647	ERR ("illegal backslash escape sequence in string");	723	ERR ("illegal backslash escape sequence in string");
648	}	724	}
649	}	725	}
650	else if (ch >= 0x20 && ch <= 0x7f)	726	else if (expect_true (ch >= 0x20 && ch <= 0x7f))
651	*cur++ = ch;	727	*cur++ = ch;
652	else if (ch >= 0x80)	728	else if (ch >= 0x80)
653	{	729	{
		730	STRLEN clen;
		731	UV uch;
		732
654	--dec->cur;	733	--dec->cur;
655		734
656	STRLEN clen;
657	UV uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);	735	uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);
658	if (clen == (STRLEN)-1)	736	if (clen == (STRLEN)-1)
659	ERR ("malformed UTF-8 character in JSON string");	737	ERR ("malformed UTF-8 character in JSON string");
660		738
661	do	739	do
662	{
663	cur++ = dec->cur++;	740	cur++ = dec->cur++;
664	}
665	while (--clen);	741	while (--clen);
666		742
667	utf8 = 1;	743	utf8 = 1;
668	}	744	}
669	else if (!ch)
670	ERR ("unexpected end of string while parsing json string");
671	else	745	else
		746	{
		747	--dec->cur;
		748
		749	if (!ch)
		750	ERR ("unexpected end of string while parsing JSON string");
		751	else
672	ERR ("invalid character encountered");	752	ERR ("invalid character encountered while parsing JSON string");
673		753	}
674	}	754	}
675	while (cur < buf + SHORT_STRING_LEN);	755	while (cur < buf + SHORT_STRING_LEN);
676		756
		757	{
677	STRLEN len = cur - buf;	758	STRLEN len = cur - buf;
678		759
679	if (sv)	760	if (sv)
680	{	761	{
681	SvGROW (sv, SvCUR (sv) + len + 1);	762	SvGROW (sv, SvCUR (sv) + len + 1);
682	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);	763	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
683	SvCUR_set (sv, SvCUR (sv) + len);	764	SvCUR_set (sv, SvCUR (sv) + len);
684	}	765	}
685	else	766	else
686	sv = newSVpvn (buf, len);	767	sv = newSVpvn (buf, len);
		768	}
687	}	769	}
688	while (*dec->cur != '"');	770	while (*dec->cur != '"');
689		771
690	++dec->cur;	772	++dec->cur;
691		773
…		…
768	is_nv = 1;	850	is_nv = 1;
769	}	851	}
770		852
771	if (!is_nv)	853	if (!is_nv)
772	{	854	{
773	UV uv;	855	// special case the rather common 1..4-digit-int case, assumes 32 bit ints or so
774	int numtype = grok_number (start, dec->cur - start, &uv);	856	if (*start == '-')
775	if (numtype & IS_NUMBER_IN_UV)	857	switch (dec->cur - start)
776	if (numtype & IS_NUMBER_NEG)
777	{	858	{
778	if (uv < (UV)IV_MIN)	859	case 2: return newSViv (-( start [1] - '0' ));
779	return newSViv (-(IV)uv);	860	case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
		861	case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
		862	case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
780	}	863	}
		864	else
		865	switch (dec->cur - start)
		866	{
		867	case 1: return newSViv ( start [0] - '0' );
		868	case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
		869	case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
		870	case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
		871	}
		872
		873	{
		874	UV uv;
		875	int numtype = grok_number (start, dec->cur - start, &uv);
		876	if (numtype & IS_NUMBER_IN_UV)
		877	if (numtype & IS_NUMBER_NEG)
		878	{
		879	if (uv < (UV)IV_MIN)
		880	return newSViv (-(IV)uv);
		881	}
781	else	882	else
782	return newSVuv (uv);	883	return newSVuv (uv);
		884	}
783	}	885	}
784		886
785	return newSVnv (Atof (start));	887	return newSVnv (Atof (start));
786		888
787	fail:	889	fail:
…		…
935	ERR ("'null' expected");	1037	ERR ("'null' expected");
936		1038
937	break;	1039	break;
938		1040
939	default:	1041	default:
940	ERR ("malformed json string, neither array, object, number, string or atom");	1042	ERR ("malformed JSON string, neither array, object, number, string or atom");
941	break;	1043	break;
942	}	1044	}
943		1045
944	fail:	1046	fail:
945	return 0;	1047	return 0;
946	}	1048	}
947		1049
948	static SV *	1050	static SV *
949	decode_json (SV *string, U32 flags)	1051	decode_json (SV string, U32 flags, UV offset_return)
950	{	1052	{
		1053	dec_t dec;
		1054	UV offset;
951	SV *sv;	1055	SV *sv;
		1056
		1057	SvGETMAGIC (string);
		1058	SvUPGRADE (string, SVt_PV);
952		1059
953	if (flags & F_UTF8)	1060	if (flags & F_UTF8)
954	sv_utf8_downgrade (string, 0);	1061	sv_utf8_downgrade (string, 0);
955	else	1062	else
956	sv_utf8_upgrade (string);	1063	sv_utf8_upgrade (string);
957		1064
958	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP	1065	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
959		1066
960	dec_t dec;
961	dec.flags = flags;	1067	dec.flags = flags;
962	dec.cur = SvPVX (string);	1068	dec.cur = SvPVX (string);
963	dec.end = SvEND (string);	1069	dec.end = SvEND (string);
964	dec.err = 0;	1070	dec.err = 0;
965	dec.depth = 0;	1071	dec.depth = 0;
966	dec.maxdepth = DEC_DEPTH (dec.flags);	1072	dec.maxdepth = DEC_DEPTH (dec.flags);
967		1073
968	*SvEND (sv) = 0; // this shou[ld basically be a nop, too	1074	*dec.end = 0; // this should basically be a nop, too, but make sure it's there
969	sv = decode_sv (&dec);	1075	sv = decode_sv (&dec);
970		1076
		1077	if (!(offset_return \|\| !sv))
		1078	{
		1079	// check for trailing garbage
		1080	decode_ws (&dec);
		1081
		1082	if (*dec.cur)
		1083	{
		1084	dec.err = "garbage after JSON object";
		1085	SvREFCNT_dec (sv);
		1086	sv = 0;
		1087	}
		1088	}
		1089
		1090	if (offset_return \|\| !sv)
		1091	{
		1092	offset = dec.flags & F_UTF8
		1093	? dec.cur - SvPVX (string)
		1094	: utf8_distance (dec.cur, SvPVX (string));
		1095
		1096	if (offset_return)
		1097	*offset_return = offset;
		1098	}
		1099
971	if (!sv)	1100	if (!sv)
972	{	1101	{
973	IV offset = dec.flags & F_UTF8
974	? dec.cur - SvPVX (string)
975	: utf8_distance (dec.cur, SvPVX (string));
976	SV *uni = sv_newmortal ();	1102	SV *uni = sv_newmortal ();
977		1103
978	// horrible hack to silence warning inside pv_uni_display	1104	// horrible hack to silence warning inside pv_uni_display
979	COP cop = *PL_curcop;	1105	COP cop = *PL_curcop;
980	cop.cop_warnings = pWARN_NONE;	1106	cop.cop_warnings = pWARN_NONE;
…		…
982	SAVEVPTR (PL_curcop);	1108	SAVEVPTR (PL_curcop);
983	PL_curcop = &cop;	1109	PL_curcop = &cop;
984	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);	1110	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
985	LEAVE;	1111	LEAVE;
986		1112
987	croak ("%s, at character offset %d (%s)",	1113	croak ("%s, at character offset %d [\"%s\"]",
988	dec.err,	1114	dec.err,
989	(int)offset,	1115	(int)offset,
990	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");	1116	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
991	}	1117	}
992		1118
…		…
1028	RETVAL	1154	RETVAL
1029		1155
1030	SV ascii (SV self, int enable = 1)	1156	SV ascii (SV self, int enable = 1)
1031	ALIAS:	1157	ALIAS:
1032	ascii = F_ASCII	1158	ascii = F_ASCII
		1159	latin1 = F_LATIN1
1033	utf8 = F_UTF8	1160	utf8 = F_UTF8
1034	indent = F_INDENT	1161	indent = F_INDENT
1035	canonical = F_CANONICAL	1162	canonical = F_CANONICAL
1036	space_before = F_SPACE_BEFORE	1163	space_before = F_SPACE_BEFORE
1037	space_after = F_SPACE_AFTER	1164	space_after = F_SPACE_AFTER
…		…
1049	RETVAL = newSVsv (self);	1176	RETVAL = newSVsv (self);
1050	}	1177	}
1051	OUTPUT:	1178	OUTPUT:
1052	RETVAL	1179	RETVAL
1053		1180
1054	SV max_depth (SV self, int max_depth = 0x80000000UL)	1181	SV max_depth (SV self, UV max_depth = 0x80000000UL)
1055	CODE:	1182	CODE:
1056	{	1183	{
1057	UV *uv = SvJSON (self);	1184	UV *uv = SvJSON (self);
1058	UV log2 = 0;	1185	UV log2 = 0;
1059		1186
…		…
1073	PPCODE:	1200	PPCODE:
1074	XPUSHs (encode_json (scalar, *SvJSON (self)));	1201	XPUSHs (encode_json (scalar, *SvJSON (self)));
1075		1202
1076	void decode (SV self, SV jsonstr)	1203	void decode (SV self, SV jsonstr)
1077	PPCODE:	1204	PPCODE:
1078	XPUSHs (decode_json (jsonstr, *SvJSON (self)));	1205	XPUSHs (decode_json (jsonstr, *SvJSON (self), 0));
		1206
		1207	void decode_prefix (SV self, SV jsonstr)
		1208	PPCODE:
		1209	{
		1210	UV offset;
		1211	EXTEND (SP, 2);
		1212	PUSHs (decode_json (jsonstr, *SvJSON (self), &offset));
		1213	PUSHs (sv_2mortal (newSVuv (offset)));
		1214	}
1079		1215
1080	PROTOTYPES: ENABLE	1216	PROTOTYPES: ENABLE
1081		1217
1082	void to_json (SV *scalar)	1218	void to_json (SV *scalar)
1083	ALIAS:	1219	ALIAS:
…		…
1087		1223
1088	void from_json (SV *jsonstr)	1224	void from_json (SV *jsonstr)
1089	ALIAS:	1225	ALIAS:
1090	jsonToObj = 0	1226	jsonToObj = 0
1091	PPCODE:	1227	PPCODE:
1092	XPUSHs (decode_json (jsonstr, F_DEFAULT \| F_UTF8));	1228	XPUSHs (decode_json (jsonstr, F_DEFAULT \| F_UTF8, 0));
1093		1229

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.18 by root, Sun Mar 25 21:19:13 2007 UTC vs. Revision 1.35 by root, Wed Jun 6 14:52:49 2007 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.18 by root, Sun Mar 25 21:19:13 2007 UTC vs.
Revision 1.35 by root, Wed Jun 6 14:52:49 2007 UTC