[ViewVC] Diff of: cvs/JSON-XS/XS.xs

Comparing JSON-XS/XS.xs (file contents):
Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC vs.
Revision 1.12 by root, Sat Mar 24 22:10:08 2007 UTC

…		…
10	#define F_UTF8 0x00000002	10	#define F_UTF8 0x00000002
11	#define F_INDENT 0x00000004	11	#define F_INDENT 0x00000004
12	#define F_CANONICAL 0x00000008	12	#define F_CANONICAL 0x00000008
13	#define F_SPACE_BEFORE 0x00000010	13	#define F_SPACE_BEFORE 0x00000010
14	#define F_SPACE_AFTER 0x00000020	14	#define F_SPACE_AFTER 0x00000020
15	#define F_JSON_RPC 0x00000040
16	#define F_ALLOW_NONREF 0x00000080	15	#define F_ALLOW_NONREF 0x00000080
17	#define F_SHRINK 0x00000100	16	#define F_SHRINK 0x00000100
18		17
19	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER	18	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
20	#define F_DEFAULT 0	19	#define F_DEFAULT 0
21		20
22	#define INIT_SIZE 32 // initial scalar size to be allocated	21	#define INIT_SIZE 32 // initial scalar size to be allocated
		22	#define INDENT_STEP 3 // spaces per indentation level
		23
		24	#define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
		25	#define SHORT_STRING_LEN 256 // special-case strings of up to this size
23		26
24	#define SB do {	27	#define SB do {
25	#define SE } while (0)	28	#define SE } while (0)
26		29
27	static HV *json_stash;	30	static HV *json_stash; // JSON::XS::
		31
		32	/////////////////////////////////////////////////////////////////////////////
		33	// utility functions
		34
		35	static UV *
		36	SvJSON (SV *sv)
		37	{
		38	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
		39	croak ("object is not of type JSON::XS");
		40
		41	return &SvUVX (SvRV (sv));
		42	}
		43
		44	static void
		45	shrink (SV *sv)
		46	{
		47	sv_utf8_downgrade (sv, 1);
		48	if (SvLEN (sv) > SvCUR (sv) + 1)
		49	{
		50	#ifdef SvPV_shrink_to_cur
		51	SvPV_shrink_to_cur (sv);
		52	#elif defined (SvPV_renew)
		53	SvPV_renew (sv, SvCUR (sv) + 1);
		54	#endif
		55	}
		56	}
		57
		58	/////////////////////////////////////////////////////////////////////////////
		59	// encoder
28		60
29	// structure used for encoding JSON	61	// structure used for encoding JSON
30	typedef struct	62	typedef struct
31	{	63	{
32	char *cur;	64	char *cur; // SvPVX (sv) + current output position
33	STRLEN len; // SvLEN (sv)
34	char *end; // SvEND (sv)	65	char *end; // SvEND (sv)
35	SV *sv;	66	SV *sv; // result scalar
36	UV flags;	67	UV flags; // F_*
37	int max_recurse;	68	int indent; // indentation level
38	int indent;	69	int max_depth; // max. recursion level
39	} enc_t;	70	} enc_t;
40
41	// structure used for decoding JSON
42	typedef struct
43	{
44	char *cur;
45	char *end;
46	const char *err;
47	UV flags;
48	} dec_t;
49
50	static UV *
51	SvJSON (SV *sv)
52	{
53	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
54	croak ("object is not of type JSON::XS");
55
56	return &SvUVX (SvRV (sv));
57	}
58
59	static void
60	shrink (SV *sv)
61	{
62	sv_utf8_downgrade (sv, 1);
63	#ifdef SvPV_shrink_to_cur
64	SvPV_shrink_to_cur (sv);
65	#endif
66	}
67
68	/////////////////////////////////////////////////////////////////////////////
69		71
70	static void	72	static void
71	need (enc_t *enc, STRLEN len)	73	need (enc_t *enc, STRLEN len)
72	{	74	{
73	if (enc->cur + len >= enc->end)	75	if (enc->cur + len >= enc->end)
…		…
133		135
134	if (is_utf8)	136	if (is_utf8)
135	{	137	{
136	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);	138	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
137	if (clen == (STRLEN)-1)	139	if (clen == (STRLEN)-1)
138	croak ("malformed UTF-8 character in string, cannot convert to JSON");	140	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
139	}	141	}
140	else	142	else
141	{	143	{
142	uch = ch;	144	uch = ch;
143	clen = 1;	145	clen = 1;
144	}	146	}
145		147
		148	if (uch > 0x10FFFFUL)
		149	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
		150
146	if (uch < 0x80 \|\| enc->flags & F_ASCII)	151	if (uch < 0x80 \|\| enc->flags & F_ASCII)
147	{	152	{
148	if (uch > 0xFFFFUL)	153	if (uch > 0xFFFFUL)
149	{	154	{
150	need (enc, len += 11);	155	need (enc, len += 11);
151	sprintf (enc->cur, "\\u%04x\\u%04x",	156	sprintf (enc->cur, "\\u%04x\\u%04x",
152	(uch - 0x10000) / 0x400 + 0xD800,	157	(int)((uch - 0x10000) / 0x400 + 0xD800),
153	(uch - 0x10000) % 0x400 + 0xDC00);	158	(int)((uch - 0x10000) % 0x400 + 0xDC00));
154	enc->cur += 12;	159	enc->cur += 12;
155	}	160	}
156	else	161	else
157	{	162	{
158	static char hexdigit [16] = "0123456789abcdef";	163	static char hexdigit [16] = "0123456789abcdef";
…		…
176	}	181	}
177	while (--clen);	182	while (--clen);
178	}	183	}
179	else	184	else
180	{	185	{
181	need (enc, len += 10); // never more than 11 bytes needed	186	need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed
182	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);	187	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
183	++str;	188	++str;
184	}	189	}
185	}	190	}
186	}	191	}
…		…
188		193
189	--len;	194	--len;
190	}	195	}
191	}	196	}
192		197
193	#define INDENT SB \	198	static void
		199	encode_indent (enc_t *enc)
		200	{
194	if (enc->flags & F_INDENT) \	201	if (enc->flags & F_INDENT)
195	{ \	202	{
196	int i_; \	203	int spaces = enc->indent * INDENT_STEP;
197	need (enc, enc->indent); \
198	for (i_ = enc->indent * 3; i_--; )\
199	encode_ch (enc, ' '); \
200	} \
201	SE
202		204
203	#define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE	205	need (enc, spaces);
204	#define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE	206	memset (enc->cur, ' ', spaces);
205	#define COMMA SB \	207	enc->cur += spaces;
		208	}
		209	}
		210
		211	static void
		212	encode_space (enc_t *enc)
		213	{
		214	need (enc, 1);
206	encode_ch (enc, ','); \	215	encode_ch (enc, ' ');
		216	}
		217
		218	static void
		219	encode_nl (enc_t *enc)
		220	{
207	if (enc->flags & F_INDENT) \	221	if (enc->flags & F_INDENT)
208	NL; \	222	{
		223	need (enc, 1);
		224	encode_ch (enc, '\n');
		225	}
		226	}
		227
		228	static void
		229	encode_comma (enc_t *enc)
		230	{
		231	encode_ch (enc, ',');
		232
		233	if (enc->flags & F_INDENT)
		234	encode_nl (enc);
209	else if (enc->flags & F_SPACE_AFTER) \	235	else if (enc->flags & F_SPACE_AFTER)
210	SPACE; \	236	encode_space (enc);
211	SE	237	}
212		238
213	static void encode_sv (enc_t enc, SV sv);	239	static void encode_sv (enc_t enc, SV sv);
214		240
215	static void	241	static void
216	encode_av (enc_t enc, AV av)	242	encode_av (enc_t enc, AV av)
217	{	243	{
218	int i, len = av_len (av);	244	int i, len = av_len (av);
219		245
220	encode_ch (enc, '['); NL;	246	encode_ch (enc, '['); encode_nl (enc);
221	++enc->indent;	247	++enc->indent;
222		248
223	for (i = 0; i <= len; ++i)	249	for (i = 0; i <= len; ++i)
224	{	250	{
225	INDENT;	251	encode_indent (enc);
226	encode_sv (enc, *av_fetch (av, i, 0));	252	encode_sv (enc, *av_fetch (av, i, 0));
227		253
228	if (i < len)	254	if (i < len)
229	COMMA;	255	encode_comma (enc);
230	}	256	}
231		257
232	NL;	258	encode_nl (enc);
233		259
234	--enc->indent;	260	--enc->indent;
235	INDENT; encode_ch (enc, ']');	261	encode_indent (enc); encode_ch (enc, ']');
236	}	262	}
237		263
238	static void	264	static void
239	encode_he (enc_t enc, HE he)	265	encode_he (enc_t enc, HE he)
240	{	266	{
…		…
254	else	280	else
255	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));	281	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
256		282
257	encode_ch (enc, '"');	283	encode_ch (enc, '"');
258		284
259	if (enc->flags & F_SPACE_BEFORE) SPACE;	285	if (enc->flags & F_SPACE_BEFORE) encode_space (enc);
260	encode_ch (enc, ':');	286	encode_ch (enc, ':');
261	if (enc->flags & F_SPACE_AFTER ) SPACE;	287	if (enc->flags & F_SPACE_AFTER ) encode_space (enc);
262	encode_sv (enc, HeVAL (he));	288	encode_sv (enc, HeVAL (he));
263	}	289	}
264		290
265	// compare hash entries, used when all keys are bytestrings	291	// compare hash entries, used when all keys are bytestrings
266	static int	292	static int
…		…
272	HE b = (HE **)b_;	298	HE b = (HE **)b_;
273		299
274	STRLEN la = HeKLEN (a);	300	STRLEN la = HeKLEN (a);
275	STRLEN lb = HeKLEN (b);	301	STRLEN lb = HeKLEN (b);
276		302
277	if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))	303	if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
278	cmp = la < lb ? -1 : la == lb ? 0 : 1;	304	cmp = la - lb;
279		305
280	return cmp;	306	return cmp;
281	}	307	}
282		308
283	// compare hash entries, used when some keys are sv's or utf-x	309	// compare hash entries, used when some keys are sv's or utf-x
…		…
290	static void	316	static void
291	encode_hv (enc_t enc, HV hv)	317	encode_hv (enc_t enc, HV hv)
292	{	318	{
293	int count, i;	319	int count, i;
294		320
295	encode_ch (enc, '{'); NL; ++enc->indent;	321	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
296		322
297	if ((count = hv_iterinit (hv)))	323	if ((count = hv_iterinit (hv)))
298	{	324	{
299	// for canonical output we have to sort by keys first	325	// for canonical output we have to sort by keys first
300	// actually, this is mostly due to the stupid so-called	326	// actually, this is mostly due to the stupid so-called
301	// security workaround added somewhere in 5.8.x.	327	// security workaround added somewhere in 5.8.x.
302	// that randomises hash orderings	328	// that randomises hash orderings
303	if (enc->flags & F_CANONICAL)	329	if (enc->flags & F_CANONICAL)
304	{	330	{
305	HE he, hes [count];	331	HE he, hes [count]; // if your compiler dies here, you need to enable C99 mode
306	int fast = 1;	332	int fast = 1;
307		333
308	i = 0;	334	i = 0;
309	while ((he = hv_iternext (hv)))	335	while ((he = hv_iternext (hv)))
310	{	336	{
…		…
317		343
318	if (fast)	344	if (fast)
319	qsort (hes, count, sizeof (HE *), he_cmp_fast);	345	qsort (hes, count, sizeof (HE *), he_cmp_fast);
320	else	346	else
321	{	347	{
322	// hack to disable "use bytes"	348	// hack to forcefully disable "use bytes"
323	COP *oldcop = PL_curcop, cop;	349	COP cop = *PL_curcop;
324	cop.op_private = 0;	350	cop.op_private = 0;
		351
		352	ENTER;
		353	SAVETMPS;
		354
		355	SAVEVPTR (PL_curcop);
325	PL_curcop = &cop;	356	PL_curcop = &cop;
326		357
327	SAVETMPS;
328	qsort (hes, count, sizeof (HE *), he_cmp_slow);	358	qsort (hes, count, sizeof (HE *), he_cmp_slow);
		359
329	FREETMPS;	360	FREETMPS;
330		361	LEAVE;
331	PL_curcop = oldcop;
332	}	362	}
333		363
334	for (i = 0; i < count; ++i)	364	for (i = 0; i < count; ++i)
335	{	365	{
336	INDENT;	366	encode_indent (enc);
337	encode_he (enc, hes [i]);	367	encode_he (enc, hes [i]);
338		368
339	if (i < count - 1)	369	if (i < count - 1)
340	COMMA;	370	encode_comma (enc);
341	}	371	}
342		372
343	NL;	373	encode_nl (enc);
344	}	374	}
345	else	375	else
346	{	376	{
347	SV *sv;	377	SV *sv;
348	HE *he = hv_iternext (hv);	378	HE *he = hv_iternext (hv);
349		379
350	for (;;)	380	for (;;)
351	{	381	{
352	INDENT;	382	encode_indent (enc);
353	encode_he (enc, he);	383	encode_he (enc, he);
354		384
355	if (!(he = hv_iternext (hv)))	385	if (!(he = hv_iternext (hv)))
356	break;	386	break;
357		387
358	COMMA;	388	encode_comma (enc);
359	}	389	}
360		390
361	NL;	391	encode_nl (enc);
362	}	392	}
363	}	393	}
364		394
365	--enc->indent; INDENT; encode_ch (enc, '}');	395	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
366	}	396	}
367		397
368	static void	398	static void
369	encode_sv (enc_t enc, SV sv)	399	encode_sv (enc_t enc, SV sv)
370	{	400	{
…		…
392	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))	422	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
393	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));	423	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
394	}	424	}
395	else if (SvROK (sv))	425	else if (SvROK (sv))
396	{	426	{
397	if (!--enc->max_recurse)	427	SV *rv = SvRV (sv);
		428
		429	if (enc->indent >= enc->max_depth)
398	croak ("data structure too deep (hit recursion limit)");	430	croak ("data structure too deep (hit recursion limit)");
399		431
400	sv = SvRV (sv);
401
402	switch (SvTYPE (sv))	432	switch (SvTYPE (rv))
403	{	433	{
404	case SVt_PVAV: encode_av (enc, (AV *)sv); break;	434	case SVt_PVAV: encode_av (enc, (AV *)rv); break;
405	case SVt_PVHV: encode_hv (enc, (HV *)sv); break;	435	case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
406		436
407	default:	437	default:
408	croak ("JSON can only represent references to arrays or hashes");	438	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
		439	SvPV_nolen (sv));
409	}	440	}
410	}	441	}
411	else if (!SvOK (sv))	442	else if (!SvOK (sv))
412	encode_str (enc, "null", 4, 0);	443	encode_str (enc, "null", 4, 0);
413	else	444	else
414	croak ("encountered perl type that JSON cannot handle");	445	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
		446	SvPV_nolen (sv), SvFLAGS (sv));
415	}	447	}
416		448
417	static SV *	449	static SV *
418	encode_json (SV *scalar, UV flags)	450	encode_json (SV *scalar, UV flags)
419	{	451	{
420	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))	452	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
421	croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)");	453	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
422		454
423	enc_t enc;	455	enc_t enc;
424	enc.flags = flags;	456	enc.flags = flags;
425	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));	457	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
426	enc.cur = SvPVX (enc.sv);	458	enc.cur = SvPVX (enc.sv);
427	enc.end = SvEND (enc.sv);	459	enc.end = SvEND (enc.sv);
428	enc.max_recurse = 0;
429	enc.indent = 0;	460	enc.indent = 0;
		461	enc.max_depth = 0x7fffffffUL;
430		462
431	SvPOK_only (enc.sv);	463	SvPOK_only (enc.sv);
432	encode_sv (&enc, scalar);	464	encode_sv (&enc, scalar);
433		465
434	if (!(flags & (F_ASCII \| F_UTF8)))	466	if (!(flags & (F_ASCII \| F_UTF8)))
…		…
441		473
442	return enc.sv;	474	return enc.sv;
443	}	475	}
444		476
445	/////////////////////////////////////////////////////////////////////////////	477	/////////////////////////////////////////////////////////////////////////////
		478	// decoder
446		479
447	#define WS \	480	// structure used for decoding JSON
		481	typedef struct
		482	{
		483	char *cur; // current parser pointer
		484	char *end; // end of input string
		485	const char *err; // parse error, if != 0
		486	UV flags; // F_*
		487	} dec_t;
		488
		489	static void
		490	decode_ws (dec_t *dec)
		491	{
448	for (;;) \	492	for (;;)
449	{ \	493	{
450	char ch = *dec->cur; \	494	char ch = *dec->cur;
		495
451	if (ch > 0x20 \	496	if (ch > 0x20
452	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \	497	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
453	break; \	498	break;
		499
454	++dec->cur; \	500	++dec->cur;
455	}	501	}
		502	}
456		503
457	#define ERR(reason) SB dec->err = reason; goto fail; SE	504	#define ERR(reason) SB dec->err = reason; goto fail; SE
458	#define EXPECT_CH(ch) SB \	505	#define EXPECT_CH(ch) SB \
459	if (*dec->cur != ch) \	506	if (*dec->cur != ch) \
460	ERR (# ch " expected"); \	507	ERR (# ch " expected"); \
…		…
467		514
468	static UV	515	static UV
469	decode_4hex (dec_t *dec)	516	decode_4hex (dec_t *dec)
470	{	517	{
471	signed char d1, d2, d3, d4;	518	signed char d1, d2, d3, d4;
		519	unsigned char cur = (unsigned char )dec->cur;
472		520
473	d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];
474	if (d1 < 0) ERR ("four hexadecimal digits expected");	521	d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");
475	d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];
476	if (d2 < 0) ERR ("four hexadecimal digits expected");	522	d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");
477	d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
478	if (d3 < 0) ERR ("four hexadecimal digits expected");	523	d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");
479	d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
480	if (d4 < 0) ERR ("four hexadecimal digits expected");	524	d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");
481		525
482	dec->cur += 4;	526	dec->cur += 4;
483		527
484	return ((UV)d1) << 12	528	return ((UV)d1) << 12
485	\| ((UV)d2) << 8	529	\| ((UV)d2) << 8
…		…
488		532
489	fail:	533	fail:
490	return (UV)-1;	534	return (UV)-1;
491	}	535	}
492		536
493	#define APPEND_GROW(n) SB \
494	if (cur + (n) >= end) \
495	{ \
496	STRLEN ofs = cur - SvPVX (sv); \
497	SvGROW (sv, ofs + (n) + 1); \
498	cur = SvPVX (sv) + ofs; \
499	end = SvEND (sv); \
500	} \
501	SE
502
503	#define APPEND_CH(ch) SB \
504	APPEND_GROW (1); \
505	*cur++ = (ch); \
506	SE
507
508	static SV *	537	static SV *
509	decode_str (dec_t *dec)	538	decode_str (dec_t *dec)
510	{	539	{
511	SV *sv = NEWSV (0,2);	540	SV *sv = 0;
512	int utf8 = 0;	541	int utf8 = 0;
513	char *cur = SvPVX (sv);
514	char *end = SvEND (sv);
515		542
516	for (;;)	543	do
517	{	544	{
518	unsigned char ch = (unsigned char )dec->cur;	545	char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];
		546	char *cur = buf;
519		547
520	if (ch == '"')	548	do
521	break;
522	else if (ch == '\\')
523	{	549	{
524	switch (*++dec->cur)	550	unsigned char ch = (unsigned char )dec->cur++;
		551
		552	if (ch == '"')
525	{	553	{
526	case '\\':	554	--dec->cur;
527	case '/':	555	break;
528	case '"': APPEND_CH (*dec->cur++); break;	556	}
529		557	else if (ch == '\\')
530	case 'b': APPEND_CH ('\010'); ++dec->cur; break;	558	{
531	case 't': APPEND_CH ('\011'); ++dec->cur; break;	559	switch (*dec->cur)
532	case 'n': APPEND_CH ('\012'); ++dec->cur; break;
533	case 'f': APPEND_CH ('\014'); ++dec->cur; break;
534	case 'r': APPEND_CH ('\015'); ++dec->cur; break;
535
536	case 'u':
537	{	560	{
538	UV lo, hi;	561	case '\\':
539	++dec->cur;	562	case '/':
		563	case '"': cur++ = dec->cur++; break;
540		564
541	hi = decode_4hex (dec);	565	case 'b': ++dec->cur; *cur++ = '\010'; break;
542	if (hi == (UV)-1)	566	case 't': ++dec->cur; *cur++ = '\011'; break;
543	goto fail;	567	case 'n': ++dec->cur; *cur++ = '\012'; break;
		568	case 'f': ++dec->cur; *cur++ = '\014'; break;
		569	case 'r': ++dec->cur; *cur++ = '\015'; break;
544		570
545	// possibly a surrogate pair	571	case 'u':
546	if (hi >= 0xd800 && hi < 0xdc00)
547	{	572	{
548	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')	573	UV lo, hi;
549	ERR ("missing low surrogate character in surrogate pair");
550
551	dec->cur += 2;	574	++dec->cur;
552		575
553	lo = decode_4hex (dec);	576	hi = decode_4hex (dec);
554	if (lo == (UV)-1)	577	if (hi == (UV)-1)
555	goto fail;	578	goto fail;
556		579
		580	// possibly a surrogate pair
		581	if (hi >= 0xd800)
		582	if (hi < 0xdc00)
		583	{
		584	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')
		585	ERR ("missing low surrogate character in surrogate pair");
		586
		587	dec->cur += 2;
		588
		589	lo = decode_4hex (dec);
		590	if (lo == (UV)-1)
		591	goto fail;
		592
557	if (lo < 0xdc00 \|\| lo >= 0xe000)	593	if (lo < 0xdc00 \|\| lo >= 0xe000)
558	ERR ("surrogate pair expected");	594	ERR ("surrogate pair expected");
559		595
560	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;	596	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
		597	}
		598	else if (hi < 0xe000)
		599	ERR ("missing high surrogate character in surrogate pair");
		600
		601	if (hi >= 0x80)
		602	{
		603	utf8 = 1;
		604
		605	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
		606	}
		607	else
		608	*cur++ = hi;
561	}	609	}
562	else if (hi >= 0xdc00 && hi < 0xe000)
563	ERR ("missing high surrogate character in surrogate pair");
564
565	if (hi >= 0x80)
566	{	610	break;
567	utf8 = 1;
568		611
569	APPEND_GROW (4); // at most 4 bytes for 21 bits
570	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
571	}
572	else	612	default:
573	APPEND_CH (hi);	613	--dec->cur;
		614	ERR ("illegal backslash escape sequence in string");
574	}	615	}
575	break;
576
577	default:
578	--dec->cur;
579	ERR ("illegal backslash escape sequence in string");
580	}	616	}
		617	else if (ch >= 0x20 && ch <= 0x7f)
		618	*cur++ = ch;
		619	else if (ch >= 0x80)
		620	{
		621	--dec->cur;
		622
		623	STRLEN clen;
		624	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
		625	if (clen == (STRLEN)-1)
		626	ERR ("malformed UTF-8 character in JSON string");
		627
		628	do
		629	{
		630	cur++ = dec->cur++;
		631	}
		632	while (--clen);
		633
		634	utf8 = 1;
		635	}
		636	else if (!ch)
		637	ERR ("unexpected end of string while parsing json string");
		638	else
		639	ERR ("invalid character encountered");
		640
581	}	641	}
582	else if (ch >= 0x20 && ch <= 0x7f)	642	while (cur < buf + SHORT_STRING_LEN);
583	APPEND_CH (*dec->cur++);	643
584	else if (ch >= 0x80)	644	STRLEN len = cur - buf;
		645
		646	if (sv)
585	{	647	{
586	STRLEN clen;	648	SvGROW (sv, SvCUR (sv) + len + 1);
587	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);	649	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
588	if (clen == (STRLEN)-1)	650	SvCUR_set (sv, SvCUR (sv) + len);
589	ERR ("malformed UTF-8 character in JSON string");
590
591	APPEND_GROW (clen);
592	do
593	{
594	cur++ = dec->cur++;
595	}
596	while (--clen);
597
598	utf8 = 1;
599	}	651	}
600	else if (dec->cur == dec->end)
601	ERR ("unexpected end of string while parsing json string");
602	else	652	else
603	ERR ("invalid character encountered");	653	sv = newSVpvn (buf, len);
604	}	654	}
		655	while (*dec->cur != '"');
605		656
606	++dec->cur;	657	++dec->cur;
607		658
608	SvCUR_set (sv, cur - SvPVX (sv));	659	if (sv)
609		660	{
610	SvPOK_only (sv);	661	SvPOK_only (sv);
611	*SvEND (sv) = 0;	662	*SvEND (sv) = 0;
612		663
613	if (utf8)	664	if (utf8)
614	SvUTF8_on (sv);	665	SvUTF8_on (sv);
615		666	}
616	if (dec->flags & F_SHRINK)	667	else
617	shrink (sv);	668	sv = newSVpvn ("", 0);
618		669
619	return sv;	670	return sv;
620		671
621	fail:	672	fail:
622	SvREFCNT_dec (sv);
623	return 0;	673	return 0;
624	}	674	}
625		675
626	static SV *	676	static SV *
627	decode_num (dec_t *dec)	677	decode_num (dec_t *dec)
…		…
708	static SV *	758	static SV *
709	decode_av (dec_t *dec)	759	decode_av (dec_t *dec)
710	{	760	{
711	AV *av = newAV ();	761	AV *av = newAV ();
712		762
713	WS;	763	decode_ws (dec);
714	if (*dec->cur == ']')	764	if (*dec->cur == ']')
715	++dec->cur;	765	++dec->cur;
716	else	766	else
717	for (;;)	767	for (;;)
718	{	768	{
…		…
722	if (!value)	772	if (!value)
723	goto fail;	773	goto fail;
724		774
725	av_push (av, value);	775	av_push (av, value);
726		776
727	WS;	777	decode_ws (dec);
728		778
729	if (*dec->cur == ']')	779	if (*dec->cur == ']')
730	{	780	{
731	++dec->cur;	781	++dec->cur;
732	break;	782	break;
…		…
748	static SV *	798	static SV *
749	decode_hv (dec_t *dec)	799	decode_hv (dec_t *dec)
750	{	800	{
751	HV *hv = newHV ();	801	HV *hv = newHV ();
752		802
753	WS;	803	decode_ws (dec);
754	if (*dec->cur == '}')	804	if (*dec->cur == '}')
755	++dec->cur;	805	++dec->cur;
756	else	806	else
757	for (;;)	807	for (;;)
758	{	808	{
759	SV key, value;	809	SV key, value;
760		810
761	WS; EXPECT_CH ('"');	811	decode_ws (dec); EXPECT_CH ('"');
762		812
763	key = decode_str (dec);	813	key = decode_str (dec);
764	if (!key)	814	if (!key)
765	goto fail;	815	goto fail;
766		816
767	WS; EXPECT_CH (':');	817	decode_ws (dec); EXPECT_CH (':');
768		818
769	value = decode_sv (dec);	819	value = decode_sv (dec);
770	if (!value)	820	if (!value)
771	{	821	{
772	SvREFCNT_dec (key);	822	SvREFCNT_dec (key);
…		…
774	}	824	}
775		825
776	//TODO: optimise	826	//TODO: optimise
777	hv_store_ent (hv, key, value, 0);	827	hv_store_ent (hv, key, value, 0);
778		828
779	WS;	829	decode_ws (dec);
780		830
781	if (*dec->cur == '}')	831	if (*dec->cur == '}')
782	{	832	{
783	++dec->cur;	833	++dec->cur;
784	break;	834	break;
…		…
798	}	848	}
799		849
800	static SV *	850	static SV *
801	decode_sv (dec_t *dec)	851	decode_sv (dec_t *dec)
802	{	852	{
803	WS;	853	decode_ws (dec);
804	switch (*dec->cur)	854	switch (*dec->cur)
805	{	855	{
806	case '"': ++dec->cur; return decode_str (dec);	856	case '"': ++dec->cur; return decode_str (dec);
807	case '[': ++dec->cur; return decode_av (dec);	857	case '[': ++dec->cur; return decode_av (dec);
808	case '{': ++dec->cur; return decode_hv (dec);	858	case '{': ++dec->cur; return decode_hv (dec);
…		…
878	{	928	{
879	IV offset = dec.flags & F_UTF8	929	IV offset = dec.flags & F_UTF8
880	? dec.cur - SvPVX (string)	930	? dec.cur - SvPVX (string)
881	: utf8_distance (dec.cur, SvPVX (string));	931	: utf8_distance (dec.cur, SvPVX (string));
882	SV *uni = sv_newmortal ();	932	SV *uni = sv_newmortal ();
		933
883	// horrible hack to silence warning inside pv_uni_display	934	// horrible hack to silence warning inside pv_uni_display
884	COP cop;	935	COP cop = *PL_curcop;
885	memset (&cop, 0, sizeof (cop));
886	cop.cop_warnings = pWARN_NONE;	936	cop.cop_warnings = pWARN_NONE;
		937	ENTER;
887	SAVEVPTR (PL_curcop);	938	SAVEVPTR (PL_curcop);
888	PL_curcop = &cop;	939	PL_curcop = &cop;
889
890	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);	940	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
		941	LEAVE;
		942
891	croak ("%s, at character offset %d (%s)",	943	croak ("%s, at character offset %d (%s)",
892	dec.err,	944	dec.err,
893	(int)offset,	945	(int)offset,
894	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");	946	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
895	}	947	}
896		948
897	sv = sv_2mortal (sv);	949	sv = sv_2mortal (sv);
898		950
899	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))	951	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
900	croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)");	952	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
901		953
902	return sv;	954	return sv;
903	}	955	}
		956
		957	/////////////////////////////////////////////////////////////////////////////
		958	// XS interface functions
904		959
905	MODULE = JSON::XS PACKAGE = JSON::XS	960	MODULE = JSON::XS PACKAGE = JSON::XS
906		961
907	BOOT:	962	BOOT:
908	{	963	{
…		…
935	utf8 = F_UTF8	990	utf8 = F_UTF8
936	indent = F_INDENT	991	indent = F_INDENT
937	canonical = F_CANONICAL	992	canonical = F_CANONICAL
938	space_before = F_SPACE_BEFORE	993	space_before = F_SPACE_BEFORE
939	space_after = F_SPACE_AFTER	994	space_after = F_SPACE_AFTER
940	json_rpc = F_JSON_RPC
941	pretty = F_PRETTY	995	pretty = F_PRETTY
942	allow_nonref = F_ALLOW_NONREF	996	allow_nonref = F_ALLOW_NONREF
943	shrink = F_SHRINK	997	shrink = F_SHRINK
944	CODE:	998	CODE:
945	{	999	{

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing JSON-XS/XS.xs (file contents): Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC vs. Revision 1.12 by root, Sat Mar 24 22:10:08 2007 UTC

Diff Legend

Comparing JSON-XS/XS.xs (file contents):
Revision 1.7 by root, Fri Mar 23 15:57:18 2007 UTC vs.
Revision 1.12 by root, Sat Mar 24 22:10:08 2007 UTC