[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.8 by root, Fri Mar 23 16:13:59 2007 UTC vs.
Revision 1.13 by root, Sat Mar 24 22:55:16 2007 UTC

…		…
10	#define F_UTF8 0x00000002	10	#define F_UTF8 0x00000002
11	#define F_INDENT 0x00000004	11	#define F_INDENT 0x00000004
12	#define F_CANONICAL 0x00000008	12	#define F_CANONICAL 0x00000008
13	#define F_SPACE_BEFORE 0x00000010	13	#define F_SPACE_BEFORE 0x00000010
14	#define F_SPACE_AFTER 0x00000020	14	#define F_SPACE_AFTER 0x00000020
15	#define F_JSON_RPC 0x00000040
16	#define F_ALLOW_NONREF 0x00000080	15	#define F_ALLOW_NONREF 0x00000080
17	#define F_SHRINK 0x00000100	16	#define F_SHRINK 0x00000100
18		17
19	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	18	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
20	#define F_DEFAULT 0	19	#define F_DEFAULT 0
21		20
22	#define INIT_SIZE 32 // initial scalar size to be allocated	21	#define INIT_SIZE 32 // initial scalar size to be allocated
		22	#define INDENT_STEP 3 // spaces per indentation level
		23
		24	#define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
		25	#define SHORT_STRING_LEN 256 // special-case strings of up to this size
23		26
24	#define SB do {	27	#define SB do {
25	#define SE } while (0)	28	#define SE } while (0)
26		29
27	static HV *json_stash;	30	static HV *json_stash; // JSON::XS::
		31
		32	/////////////////////////////////////////////////////////////////////////////
		33	// utility functions
		34
		35	static UV *
		36	SvJSON (SV *sv)
		37	{
		38	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
		39	croak ("object is not of type JSON::XS");
		40
		41	return &SvUVX (SvRV (sv));
		42	}
		43
		44	static void
		45	shrink (SV *sv)
		46	{
		47	sv_utf8_downgrade (sv, 1);
		48	if (SvLEN (sv) > SvCUR (sv) + 1)
		49	{
		50	#ifdef SvPV_shrink_to_cur
		51	SvPV_shrink_to_cur (sv);
		52	#elif defined (SvPV_renew)
		53	SvPV_renew (sv, SvCUR (sv) + 1);
		54	#endif
		55	}
		56	}
		57
		58	// decode an utf-8 character and return it, or (UV)-1 in
		59	// case of an error.
		60	// we special-case "safe" characters from U+80 .. U+7FF,
		61	// but use the very good perl function to parse anything else.
		62	// note that we never call this function for a ascii codepoints
		63	static UV
		64	decode_utf8 (unsigned char s, STRLEN len, STRLEN clen)
		65	{
		66	if (s[0] > 0xdf \|\| s[0] < 0xc2)
		67	return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
		68	else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)
		69	{
		70	*clen = 2;
		71	return ((s[0] & 0x1f) << 6) \| (s[1] & 0x3f);
		72	}
		73	else
		74	return (UV)-1;
		75	}
		76
		77	/////////////////////////////////////////////////////////////////////////////
		78	// encoder
28		79
29	// structure used for encoding JSON	80	// structure used for encoding JSON
30	typedef struct	81	typedef struct
31	{	82	{
32	char *cur;	83	char *cur; // SvPVX (sv) + current output position
33	STRLEN len; // SvLEN (sv)
34	char *end; // SvEND (sv)	84	char *end; // SvEND (sv)
35	SV *sv;	85	SV *sv; // result scalar
36	UV flags;	86	UV flags; // F_*
37	int max_recurse;	87	int indent; // indentation level
38	int indent;	88	int max_depth; // max. recursion level
39	} enc_t;	89	} enc_t;
40
41	// structure used for decoding JSON
42	typedef struct
43	{
44	char *cur;
45	char *end;
46	const char *err;
47	UV flags;
48	} dec_t;
49
50	static UV *
51	SvJSON (SV *sv)
52	{
53	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
54	croak ("object is not of type JSON::XS");
55
56	return &SvUVX (SvRV (sv));
57	}
58
59	static void
60	shrink (SV *sv)
61	{
62	sv_utf8_downgrade (sv, 1);
63	#ifdef SvPV_shrink_to_cur
64	SvPV_shrink_to_cur (sv);
65	#endif
66	}
67
68	/////////////////////////////////////////////////////////////////////////////
69		90
70	static void	91	static void
71	need (enc_t *enc, STRLEN len)	92	need (enc_t *enc, STRLEN len)
72	{	93	{
73	if (enc->cur + len >= enc->end)	94	if (enc->cur + len >= enc->end)
…		…
131	STRLEN clen;	152	STRLEN clen;
132	UV uch;	153	UV uch;
133		154
134	if (is_utf8)	155	if (is_utf8)
135	{	156	{
136	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);	157	//uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
		158	uch = decode_utf8 (str, end - str, &clen);
137	if (clen == (STRLEN)-1)	159	if (clen == (STRLEN)-1)
138	croak ("malformed UTF-8 character in string, cannot convert to JSON");	160	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
139	}	161	}
140	else	162	else
141	{	163	{
142	uch = ch;	164	uch = ch;
143	clen = 1;	165	clen = 1;
144	}	166	}
145		167
		168	if (uch > 0x10FFFFUL)
		169	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		170
146	if (uch < 0x80 \|\| enc->flags & F_ASCII)	171	if (uch < 0x80 \|\| enc->flags & F_ASCII)
147	{	172	{
148	if (uch > 0xFFFFUL)	173	if (uch > 0xFFFFUL)
149	{	174	{
150	need (enc, len += 11);	175	need (enc, len += 11);
151	sprintf (enc->cur, "\\u%04x\\u%04x",	176	sprintf (enc->cur, "\\u%04x\\u%04x",
152	(uch - 0x10000) / 0x400 + 0xD800,	177	(int)((uch - 0x10000) / 0x400 + 0xD800),
153	(uch - 0x10000) % 0x400 + 0xDC00);	178	(int)((uch - 0x10000) % 0x400 + 0xDC00));
154	enc->cur += 12;	179	enc->cur += 12;
155	}	180	}
156	else	181	else
157	{	182	{
158	static char hexdigit [16] = "0123456789abcdef";	183	static char hexdigit [16] = "0123456789abcdef";
…		…
176	}	201	}
177	while (--clen);	202	while (--clen);
178	}	203	}
179	else	204	else
180	{	205	{
181	need (enc, len += 10); // never more than 11 bytes needed	206	need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed
182	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	207	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
183	++str;	208	++str;
184	}	209	}
185	}	210	}
186	}	211	}
…		…
188		213
189	--len;	214	--len;
190	}	215	}
191	}	216	}
192		217
193	#define INDENT SB \	218	static void
		219	encode_indent (enc_t *enc)
		220	{
194	if (enc->flags & F_INDENT) \	221	if (enc->flags & F_INDENT)
195	{ \	222	{
196	int i_; \	223	int spaces = enc->indent * INDENT_STEP;
197	need (enc, enc->indent); \
198	for (i_ = enc->indent * 3; i_--; )\
199	encode_ch (enc, ' '); \
200	} \
201	SE
202		224
203	#define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE	225	need (enc, spaces);
204	#define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE	226	memset (enc->cur, ' ', spaces);
205	#define COMMA SB \	227	enc->cur += spaces;
		228	}
		229	}
		230
		231	static void
		232	encode_space (enc_t *enc)
		233	{
		234	need (enc, 1);
206	encode_ch (enc, ','); \	235	encode_ch (enc, ' ');
		236	}
		237
		238	static void
		239	encode_nl (enc_t *enc)
		240	{
207	if (enc->flags & F_INDENT) \	241	if (enc->flags & F_INDENT)
208	NL; \	242	{
		243	need (enc, 1);
		244	encode_ch (enc, '\n');
		245	}
		246	}
		247
		248	static void
		249	encode_comma (enc_t *enc)
		250	{
		251	encode_ch (enc, ',');
		252
		253	if (enc->flags & F_INDENT)
		254	encode_nl (enc);
209	else if (enc->flags & F_SPACE_AFTER) \	255	else if (enc->flags & F_SPACE_AFTER)
210	SPACE; \	256	encode_space (enc);
211	SE	257	}
212		258
213	static void encode_sv (enc_t enc, SV sv);	259	static void encode_sv (enc_t enc, SV sv);
214		260
215	static void	261	static void
216	encode_av (enc_t enc, AV av)	262	encode_av (enc_t enc, AV av)
217	{	263	{
218	int i, len = av_len (av);	264	int i, len = av_len (av);
219		265
220	encode_ch (enc, '['); NL;	266	encode_ch (enc, '['); encode_nl (enc);
221	++enc->indent;	267	++enc->indent;
222		268
223	for (i = 0; i <= len; ++i)	269	for (i = 0; i <= len; ++i)
224	{	270	{
225	INDENT;	271	encode_indent (enc);
226	encode_sv (enc, *av_fetch (av, i, 0));	272	encode_sv (enc, *av_fetch (av, i, 0));
227		273
228	if (i < len)	274	if (i < len)
229	COMMA;	275	encode_comma (enc);
230	}	276	}
231		277
232	NL;	278	encode_nl (enc);
233		279
234	--enc->indent;	280	--enc->indent;
235	INDENT; encode_ch (enc, ']');	281	encode_indent (enc); encode_ch (enc, ']');
236	}	282	}
237		283
238	static void	284	static void
239	encode_he (enc_t enc, HE he)	285	encode_he (enc_t enc, HE he)
240	{	286	{
…		…
254	else	300	else
255	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));	301	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
256		302
257	encode_ch (enc, '"');	303	encode_ch (enc, '"');
258		304
259	if (enc->flags & F_SPACE_BEFORE) SPACE;	305	if (enc->flags & F_SPACE_BEFORE) encode_space (enc);
260	encode_ch (enc, ':');	306	encode_ch (enc, ':');
261	if (enc->flags & F_SPACE_AFTER ) SPACE;	307	if (enc->flags & F_SPACE_AFTER ) encode_space (enc);
262	encode_sv (enc, HeVAL (he));	308	encode_sv (enc, HeVAL (he));
263	}	309	}
264		310
265	// compare hash entries, used when all keys are bytestrings	311	// compare hash entries, used when all keys are bytestrings
266	static int	312	static int
…		…
272	HE b = (HE **)b_;	318	HE b = (HE **)b_;
273		319
274	STRLEN la = HeKLEN (a);	320	STRLEN la = HeKLEN (a);
275	STRLEN lb = HeKLEN (b);	321	STRLEN lb = HeKLEN (b);
276		322
277	if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))	323	if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
278	cmp = la < lb ? -1 : la == lb ? 0 : 1;	324	cmp = la - lb;
279		325
280	return cmp;	326	return cmp;
281	}	327	}
282		328
283	// compare hash entries, used when some keys are sv's or utf-x	329	// compare hash entries, used when some keys are sv's or utf-x
…		…
290	static void	336	static void
291	encode_hv (enc_t enc, HV hv)	337	encode_hv (enc_t enc, HV hv)
292	{	338	{
293	int count, i;	339	int count, i;
294		340
295	encode_ch (enc, '{'); NL; ++enc->indent;	341	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
296		342
297	if ((count = hv_iterinit (hv)))	343	if ((count = hv_iterinit (hv)))
298	{	344	{
299	// for canonical output we have to sort by keys first	345	// for canonical output we have to sort by keys first
300	// actually, this is mostly due to the stupid so-called	346	// actually, this is mostly due to the stupid so-called
301	// security workaround added somewhere in 5.8.x.	347	// security workaround added somewhere in 5.8.x.
302	// that randomises hash orderings	348	// that randomises hash orderings
303	if (enc->flags & F_CANONICAL)	349	if (enc->flags & F_CANONICAL)
304	{	350	{
305	HE he, hes [count];	351	HE he, hes [count]; // if your compiler dies here, you need to enable C99 mode
306	int fast = 1;	352	int fast = 1;
307		353
308	i = 0;	354	i = 0;
309	while ((he = hv_iternext (hv)))	355	while ((he = hv_iternext (hv)))
310	{	356	{
…		…
335	LEAVE;	381	LEAVE;
336	}	382	}
337		383
338	for (i = 0; i < count; ++i)	384	for (i = 0; i < count; ++i)
339	{	385	{
340	INDENT;	386	encode_indent (enc);
341	encode_he (enc, hes [i]);	387	encode_he (enc, hes [i]);
342		388
343	if (i < count - 1)	389	if (i < count - 1)
344	COMMA;	390	encode_comma (enc);
345	}	391	}
346		392
347	NL;	393	encode_nl (enc);
348	}	394	}
349	else	395	else
350	{	396	{
351	SV *sv;	397	SV *sv;
352	HE *he = hv_iternext (hv);	398	HE *he = hv_iternext (hv);
353		399
354	for (;;)	400	for (;;)
355	{	401	{
356	INDENT;	402	encode_indent (enc);
357	encode_he (enc, he);	403	encode_he (enc, he);
358		404
359	if (!(he = hv_iternext (hv)))	405	if (!(he = hv_iternext (hv)))
360	break;	406	break;
361		407
362	COMMA;	408	encode_comma (enc);
363	}	409	}
364		410
365	NL;	411	encode_nl (enc);
366	}	412	}
367	}	413	}
368		414
369	--enc->indent; INDENT; encode_ch (enc, '}');	415	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
370	}	416	}
371		417
372	static void	418	static void
373	encode_sv (enc_t enc, SV sv)	419	encode_sv (enc_t enc, SV sv)
374	{	420	{
…		…
396	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))	442	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
397	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));	443	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
398	}	444	}
399	else if (SvROK (sv))	445	else if (SvROK (sv))
400	{	446	{
401	if (!--enc->max_recurse)	447	SV *rv = SvRV (sv);
		448
		449	if (enc->indent >= enc->max_depth)
402	croak ("data structure too deep (hit recursion limit)");	450	croak ("data structure too deep (hit recursion limit)");
403		451
404	sv = SvRV (sv);
405
406	switch (SvTYPE (sv))	452	switch (SvTYPE (rv))
407	{	453	{
408	case SVt_PVAV: encode_av (enc, (AV *)sv); break;	454	case SVt_PVAV: encode_av (enc, (AV *)rv); break;
409	case SVt_PVHV: encode_hv (enc, (HV *)sv); break;	455	case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
410		456
411	default:	457	default:
412	croak ("JSON can only represent references to arrays or hashes");	458	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
		459	SvPV_nolen (sv));
413	}	460	}
414	}	461	}
415	else if (!SvOK (sv))	462	else if (!SvOK (sv))
416	encode_str (enc, "null", 4, 0);	463	encode_str (enc, "null", 4, 0);
417	else	464	else
418	croak ("encountered perl type that JSON cannot handle");	465	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
		466	SvPV_nolen (sv), SvFLAGS (sv));
419	}	467	}
420		468
421	static SV *	469	static SV *
422	encode_json (SV *scalar, UV flags)	470	encode_json (SV *scalar, UV flags)
423	{	471	{
424	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))	472	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
425	croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)");	473	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
426		474
427	enc_t enc;	475	enc_t enc;
428	enc.flags = flags;	476	enc.flags = flags;
429	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	477	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
430	enc.cur = SvPVX (enc.sv);	478	enc.cur = SvPVX (enc.sv);
431	enc.end = SvEND (enc.sv);	479	enc.end = SvEND (enc.sv);
432	enc.max_recurse = 0;
433	enc.indent = 0;	480	enc.indent = 0;
		481	enc.max_depth = 0x7fffffffUL;
434		482
435	SvPOK_only (enc.sv);	483	SvPOK_only (enc.sv);
436	encode_sv (&enc, scalar);	484	encode_sv (&enc, scalar);
437		485
438	if (!(flags & (F_ASCII \| F_UTF8)))	486	if (!(flags & (F_ASCII \| F_UTF8)))
…		…
445		493
446	return enc.sv;	494	return enc.sv;
447	}	495	}
448		496
449	/////////////////////////////////////////////////////////////////////////////	497	/////////////////////////////////////////////////////////////////////////////
		498	// decoder
450		499
451	#define WS \	500	// structure used for decoding JSON
		501	typedef struct
		502	{
		503	char *cur; // current parser pointer
		504	char *end; // end of input string
		505	const char *err; // parse error, if != 0
		506	UV flags; // F_*
		507	} dec_t;
		508
		509	static void
		510	decode_ws (dec_t *dec)
		511	{
452	for (;;) \	512	for (;;)
453	{ \	513	{
454	char ch = *dec->cur; \	514	char ch = *dec->cur;
		515
455	if (ch > 0x20 \	516	if (ch > 0x20
456	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \	517	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
457	break; \	518	break;
		519
458	++dec->cur; \	520	++dec->cur;
459	}	521	}
		522	}
460		523
461	#define ERR(reason) SB dec->err = reason; goto fail; SE	524	#define ERR(reason) SB dec->err = reason; goto fail; SE
462	#define EXPECT_CH(ch) SB \	525	#define EXPECT_CH(ch) SB \
463	if (*dec->cur != ch) \	526	if (*dec->cur != ch) \
464	ERR (# ch " expected"); \	527	ERR (# ch " expected"); \
…		…
471		534
472	static UV	535	static UV
473	decode_4hex (dec_t *dec)	536	decode_4hex (dec_t *dec)
474	{	537	{
475	signed char d1, d2, d3, d4;	538	signed char d1, d2, d3, d4;
		539	unsigned char cur = (unsigned char )dec->cur;
476		540
477	d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];
478	if (d1 < 0) ERR ("four hexadecimal digits expected");	541	d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");
479	d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];
480	if (d2 < 0) ERR ("four hexadecimal digits expected");	542	d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");
481	d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
482	if (d3 < 0) ERR ("four hexadecimal digits expected");	543	d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");
483	d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
484	if (d4 < 0) ERR ("four hexadecimal digits expected");	544	d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");
485		545
486	dec->cur += 4;	546	dec->cur += 4;
487		547
488	return ((UV)d1) << 12	548	return ((UV)d1) << 12
489	\| ((UV)d2) << 8	549	\| ((UV)d2) << 8
…		…
492		552
493	fail:	553	fail:
494	return (UV)-1;	554	return (UV)-1;
495	}	555	}
496		556
497	#define APPEND_GROW(n) SB \
498	if (cur + (n) >= end) \
499	{ \
500	STRLEN ofs = cur - SvPVX (sv); \
501	SvGROW (sv, ofs + (n) + 1); \
502	cur = SvPVX (sv) + ofs; \
503	end = SvEND (sv); \
504	} \
505	SE
506
507	#define APPEND_CH(ch) SB \
508	APPEND_GROW (1); \
509	*cur++ = (ch); \
510	SE
511
512	static SV *	557	static SV *
513	decode_str (dec_t *dec)	558	decode_str (dec_t *dec)
514	{	559	{
515	SV *sv = NEWSV (0,2);	560	SV *sv = 0;
516	int utf8 = 0;	561	int utf8 = 0;
517	char *cur = SvPVX (sv);
518	char *end = SvEND (sv);
519		562
520	for (;;)	563	do
521	{	564	{
522	unsigned char ch = (unsigned char )dec->cur;	565	char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];
		566	char *cur = buf;
523		567
524	if (ch == '"')	568	do
525	break;
526	else if (ch == '\\')
527	{	569	{
528	switch (*++dec->cur)	570	unsigned char ch = (unsigned char )dec->cur++;
		571
		572	if (ch == '"')
529	{	573	{
530	case '\\':	574	--dec->cur;
531	case '/':	575	break;
532	case '"': APPEND_CH (*dec->cur++); break;	576	}
533		577	else if (ch == '\\')
534	case 'b': APPEND_CH ('\010'); ++dec->cur; break;	578	{
535	case 't': APPEND_CH ('\011'); ++dec->cur; break;	579	switch (*dec->cur)
536	case 'n': APPEND_CH ('\012'); ++dec->cur; break;
537	case 'f': APPEND_CH ('\014'); ++dec->cur; break;
538	case 'r': APPEND_CH ('\015'); ++dec->cur; break;
539
540	case 'u':
541	{	580	{
542	UV lo, hi;	581	case '\\':
543	++dec->cur;	582	case '/':
		583	case '"': cur++ = dec->cur++; break;
544		584
545	hi = decode_4hex (dec);	585	case 'b': ++dec->cur; *cur++ = '\010'; break;
546	if (hi == (UV)-1)	586	case 't': ++dec->cur; *cur++ = '\011'; break;
547	goto fail;	587	case 'n': ++dec->cur; *cur++ = '\012'; break;
		588	case 'f': ++dec->cur; *cur++ = '\014'; break;
		589	case 'r': ++dec->cur; *cur++ = '\015'; break;
548		590
549	// possibly a surrogate pair	591	case 'u':
550	if (hi >= 0xd800 && hi < 0xdc00)
551	{	592	{
552	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	593	UV lo, hi;
553	ERR ("missing low surrogate character in surrogate pair");
554
555	dec->cur += 2;	594	++dec->cur;
556		595
557	lo = decode_4hex (dec);	596	hi = decode_4hex (dec);
558	if (lo == (UV)-1)	597	if (hi == (UV)-1)
559	goto fail;	598	goto fail;
560		599
		600	// possibly a surrogate pair
		601	if (hi >= 0xd800)
		602	if (hi < 0xdc00)
		603	{
		604	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')
		605	ERR ("missing low surrogate character in surrogate pair");
		606
		607	dec->cur += 2;
		608
		609	lo = decode_4hex (dec);
		610	if (lo == (UV)-1)
		611	goto fail;
		612
561	if (lo < 0xdc00 \|\| lo >= 0xe000)	613	if (lo < 0xdc00 \|\| lo >= 0xe000)
562	ERR ("surrogate pair expected");	614	ERR ("surrogate pair expected");
563		615
564	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;	616	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
		617	}
		618	else if (hi < 0xe000)
		619	ERR ("missing high surrogate character in surrogate pair");
		620
		621	if (hi >= 0x80)
		622	{
		623	utf8 = 1;
		624
		625	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
		626	}
		627	else
		628	*cur++ = hi;
565	}	629	}
566	else if (hi >= 0xdc00 && hi < 0xe000)
567	ERR ("missing high surrogate character in surrogate pair");
568
569	if (hi >= 0x80)
570	{	630	break;
571	utf8 = 1;
572		631
573	APPEND_GROW (4); // at most 4 bytes for 21 bits
574	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
575	}
576	else	632	default:
577	APPEND_CH (hi);	633	--dec->cur;
		634	ERR ("illegal backslash escape sequence in string");
578	}	635	}
579	break;
580
581	default:
582	--dec->cur;
583	ERR ("illegal backslash escape sequence in string");
584	}	636	}
		637	else if (ch >= 0x20 && ch <= 0x7f)
		638	*cur++ = ch;
		639	else if (ch >= 0x80)
		640	{
		641	--dec->cur;
		642
		643	STRLEN clen;
		644	UV uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);
		645	if (clen == (STRLEN)-1)
		646	ERR ("malformed UTF-8 character in JSON string");
		647
		648	do
		649	{
		650	cur++ = dec->cur++;
		651	}
		652	while (--clen);
		653
		654	utf8 = 1;
		655	}
		656	else if (!ch)
		657	ERR ("unexpected end of string while parsing json string");
		658	else
		659	ERR ("invalid character encountered");
		660
585	}	661	}
586	else if (ch >= 0x20 && ch <= 0x7f)	662	while (cur < buf + SHORT_STRING_LEN);
587	APPEND_CH (*dec->cur++);	663
588	else if (ch >= 0x80)	664	STRLEN len = cur - buf;
		665
		666	if (sv)
589	{	667	{
590	STRLEN clen;	668	SvGROW (sv, SvCUR (sv) + len + 1);
591	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);	669	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
592	if (clen == (STRLEN)-1)	670	SvCUR_set (sv, SvCUR (sv) + len);
593	ERR ("malformed UTF-8 character in JSON string");
594
595	APPEND_GROW (clen);
596	do
597	{
598	cur++ = dec->cur++;
599	}
600	while (--clen);
601
602	utf8 = 1;
603	}	671	}
604	else if (dec->cur == dec->end)
605	ERR ("unexpected end of string while parsing json string");
606	else	672	else
607	ERR ("invalid character encountered");	673	sv = newSVpvn (buf, len);
608	}	674	}
		675	while (*dec->cur != '"');
609		676
610	++dec->cur;	677	++dec->cur;
611		678
612	SvCUR_set (sv, cur - SvPVX (sv));	679	if (sv)
613		680	{
614	SvPOK_only (sv);	681	SvPOK_only (sv);
615	*SvEND (sv) = 0;	682	*SvEND (sv) = 0;
616		683
617	if (utf8)	684	if (utf8)
618	SvUTF8_on (sv);	685	SvUTF8_on (sv);
619		686	}
620	if (dec->flags & F_SHRINK)	687	else
621	shrink (sv);	688	sv = newSVpvn ("", 0);
622		689
623	return sv;	690	return sv;
624		691
625	fail:	692	fail:
626	SvREFCNT_dec (sv);
627	return 0;	693	return 0;
628	}	694	}
629		695
630	static SV *	696	static SV *
631	decode_num (dec_t *dec)	697	decode_num (dec_t *dec)
…		…
712	static SV *	778	static SV *
713	decode_av (dec_t *dec)	779	decode_av (dec_t *dec)
714	{	780	{
715	AV *av = newAV ();	781	AV *av = newAV ();
716		782
717	WS;	783	decode_ws (dec);
718	if (*dec->cur == ']')	784	if (*dec->cur == ']')
719	++dec->cur;	785	++dec->cur;
720	else	786	else
721	for (;;)	787	for (;;)
722	{	788	{
…		…
726	if (!value)	792	if (!value)
727	goto fail;	793	goto fail;
728		794
729	av_push (av, value);	795	av_push (av, value);
730		796
731	WS;	797	decode_ws (dec);
732		798
733	if (*dec->cur == ']')	799	if (*dec->cur == ']')
734	{	800	{
735	++dec->cur;	801	++dec->cur;
736	break;	802	break;
…		…
752	static SV *	818	static SV *
753	decode_hv (dec_t *dec)	819	decode_hv (dec_t *dec)
754	{	820	{
755	HV *hv = newHV ();	821	HV *hv = newHV ();
756		822
757	WS;	823	decode_ws (dec);
758	if (*dec->cur == '}')	824	if (*dec->cur == '}')
759	++dec->cur;	825	++dec->cur;
760	else	826	else
761	for (;;)	827	for (;;)
762	{	828	{
763	SV key, value;	829	SV key, value;
764		830
765	WS; EXPECT_CH ('"');	831	decode_ws (dec); EXPECT_CH ('"');
766		832
767	key = decode_str (dec);	833	key = decode_str (dec);
768	if (!key)	834	if (!key)
769	goto fail;	835	goto fail;
770		836
771	WS; EXPECT_CH (':');	837	decode_ws (dec); EXPECT_CH (':');
772		838
773	value = decode_sv (dec);	839	value = decode_sv (dec);
774	if (!value)	840	if (!value)
775	{	841	{
776	SvREFCNT_dec (key);	842	SvREFCNT_dec (key);
…		…
778	}	844	}
779		845
780	//TODO: optimise	846	//TODO: optimise
781	hv_store_ent (hv, key, value, 0);	847	hv_store_ent (hv, key, value, 0);
782		848
783	WS;	849	decode_ws (dec);
784		850
785	if (*dec->cur == '}')	851	if (*dec->cur == '}')
786	{	852	{
787	++dec->cur;	853	++dec->cur;
788	break;	854	break;
…		…
802	}	868	}
803		869
804	static SV *	870	static SV *
805	decode_sv (dec_t *dec)	871	decode_sv (dec_t *dec)
806	{	872	{
807	WS;	873	decode_ws (dec);
808	switch (*dec->cur)	874	switch (*dec->cur)
809	{	875	{
810	case '"': ++dec->cur; return decode_str (dec);	876	case '"': ++dec->cur; return decode_str (dec);
811	case '[': ++dec->cur; return decode_av (dec);	877	case '[': ++dec->cur; return decode_av (dec);
812	case '{': ++dec->cur; return decode_hv (dec);	878	case '{': ++dec->cur; return decode_hv (dec);
…		…
901	}	967	}
902		968
903	sv = sv_2mortal (sv);	969	sv = sv_2mortal (sv);
904		970
905	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))	971	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
906	croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)");	972	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
907		973
908	return sv;	974	return sv;
909	}	975	}
		976
		977	/////////////////////////////////////////////////////////////////////////////
		978	// XS interface functions
910		979
911	MODULE = JSON::XS PACKAGE = JSON::XS	980	MODULE = JSON::XS PACKAGE = JSON::XS
912		981
913	BOOT:	982	BOOT:
914	{	983	{
…		…
941	utf8 = F_UTF8	1010	utf8 = F_UTF8
942	indent = F_INDENT	1011	indent = F_INDENT
943	canonical = F_CANONICAL	1012	canonical = F_CANONICAL
944	space_before = F_SPACE_BEFORE	1013	space_before = F_SPACE_BEFORE
945	space_after = F_SPACE_AFTER	1014	space_after = F_SPACE_AFTER
946	json_rpc = F_JSON_RPC
947	pretty = F_PRETTY	1015	pretty = F_PRETTY
948	allow_nonref = F_ALLOW_NONREF	1016	allow_nonref = F_ALLOW_NONREF
949	shrink = F_SHRINK	1017	shrink = F_SHRINK
950	CODE:	1018	CODE:
951	{	1019	{

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.8 by root, Fri Mar 23 16:13:59 2007 UTC vs. Revision 1.13 by root, Sat Mar 24 22:55:16 2007 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.8 by root, Fri Mar 23 16:13:59 2007 UTC vs.
Revision 1.13 by root, Sat Mar 24 22:55:16 2007 UTC