ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
Revision: 1.17
Committed: Sun Mar 25 02:46:41 2007 UTC (17 years, 1 month ago) by root
Branch: MAIN
Changes since 1.16: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #include "assert.h"
6 #include "string.h"
7 #include "stdlib.h"
8
9 #define F_ASCII 0x00000001
10 #define F_UTF8 0x00000002
11 #define F_INDENT 0x00000004
12 #define F_CANONICAL 0x00000008
13 #define F_SPACE_BEFORE 0x00000010
14 #define F_SPACE_AFTER 0x00000020
15 #define F_ALLOW_NONREF 0x00000080
16 #define F_SHRINK 0x00000100
17
18 // F_SKIPINVALID?
19 // F_EXECCODEREF?
20 // F_SELFCONVERT? <=> { &__class__ => }
21
22 #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
23 #define F_DEFAULT 0
24
25 #define INIT_SIZE 32 // initial scalar size to be allocated
26 #define INDENT_STEP 3 // spaces per indentation level
27
28 #define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
29 #define SHORT_STRING_LEN 512 // special-case strings of up to this size
30
31 #define SB do {
32 #define SE } while (0)
33
34 static HV *json_stash; // JSON::XS::
35
36 /////////////////////////////////////////////////////////////////////////////
37 // utility functions
38
39 static UV *
40 SvJSON (SV *sv)
41 {
42 if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
43 croak ("object is not of type JSON::XS");
44
45 return &SvUVX (SvRV (sv));
46 }
47
48 static void
49 shrink (SV *sv)
50 {
51 sv_utf8_downgrade (sv, 1);
52 if (SvLEN (sv) > SvCUR (sv) + 1)
53 {
54 #ifdef SvPV_shrink_to_cur
55 SvPV_shrink_to_cur (sv);
56 #elif defined (SvPV_renew)
57 SvPV_renew (sv, SvCUR (sv) + 1);
58 #endif
59 }
60 }
61
62 // decode an utf-8 character and return it, or (UV)-1 in
63 // case of an error.
64 // we special-case "safe" characters from U+80 .. U+7FF,
65 // but use the very good perl function to parse anything else.
66 // note that we never call this function for a ascii codepoints
67 static UV
68 decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen)
69 {
70 if (s[0] > 0xdf || s[0] < 0xc2)
71 return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
72 else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf)
73 {
74 *clen = 2;
75 return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
76 }
77 else
78 return (UV)-1;
79 }
80
81 /////////////////////////////////////////////////////////////////////////////
82 // encoder
83
84 // structure used for encoding JSON
85 typedef struct
86 {
87 char *cur; // SvPVX (sv) + current output position
88 char *end; // SvEND (sv)
89 SV *sv; // result scalar
90 UV flags; // F_*
91 int indent; // indentation level
92 int max_depth; // max. recursion level
93 } enc_t;
94
95 static void
96 need (enc_t *enc, STRLEN len)
97 {
98 if (enc->cur + len >= enc->end)
99 {
100 STRLEN cur = enc->cur - SvPVX (enc->sv);
101 SvGROW (enc->sv, cur + len + 1);
102 enc->cur = SvPVX (enc->sv) + cur;
103 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
104 }
105 }
106
107 static void
108 encode_ch (enc_t *enc, char ch)
109 {
110 need (enc, 1);
111 *enc->cur++ = ch;
112 }
113
114 static void
115 encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8)
116 {
117 char *end = str + len;
118
119 need (enc, len);
120
121 while (str < end)
122 {
123 unsigned char ch = *(unsigned char *)str;
124
125 if (ch >= 0x20 && ch < 0x80) // most common case
126 {
127 if (ch == '"') // but with slow exceptions
128 {
129 need (enc, len += 1);
130 *enc->cur++ = '\\';
131 *enc->cur++ = '"';
132 }
133 else if (ch == '\\')
134 {
135 need (enc, len += 1);
136 *enc->cur++ = '\\';
137 *enc->cur++ = '\\';
138 }
139 else
140 *enc->cur++ = ch;
141
142 ++str;
143 }
144 else
145 {
146 switch (ch)
147 {
148 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
149 case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
150 case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
151 case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
152 case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
153
154 default:
155 {
156 STRLEN clen;
157 UV uch;
158
159 if (is_utf8)
160 {
161 //uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
162 uch = decode_utf8 (str, end - str, &clen);
163 if (clen == (STRLEN)-1)
164 croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
165 }
166 else
167 {
168 uch = ch;
169 clen = 1;
170 }
171
172 if (uch > 0x10FFFFUL)
173 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
174
175 if (uch < 0x80 || enc->flags & F_ASCII)
176 {
177 if (uch > 0xFFFFUL)
178 {
179 need (enc, len += 11);
180 sprintf (enc->cur, "\\u%04x\\u%04x",
181 (int)((uch - 0x10000) / 0x400 + 0xD800),
182 (int)((uch - 0x10000) % 0x400 + 0xDC00));
183 enc->cur += 12;
184 }
185 else
186 {
187 static char hexdigit [16] = "0123456789abcdef";
188 need (enc, len += 5);
189 *enc->cur++ = '\\';
190 *enc->cur++ = 'u';
191 *enc->cur++ = hexdigit [ uch >> 12 ];
192 *enc->cur++ = hexdigit [(uch >> 8) & 15];
193 *enc->cur++ = hexdigit [(uch >> 4) & 15];
194 *enc->cur++ = hexdigit [(uch >> 0) & 15];
195 }
196
197 str += clen;
198 }
199 else if (is_utf8)
200 {
201 need (enc, len += clen);
202 do
203 {
204 *enc->cur++ = *str++;
205 }
206 while (--clen);
207 }
208 else
209 {
210 need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed
211 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
212 ++str;
213 }
214 }
215 }
216 }
217
218 --len;
219 }
220 }
221
222 static void
223 encode_indent (enc_t *enc)
224 {
225 if (enc->flags & F_INDENT)
226 {
227 int spaces = enc->indent * INDENT_STEP;
228
229 need (enc, spaces);
230 memset (enc->cur, ' ', spaces);
231 enc->cur += spaces;
232 }
233 }
234
235 static void
236 encode_space (enc_t *enc)
237 {
238 need (enc, 1);
239 encode_ch (enc, ' ');
240 }
241
242 static void
243 encode_nl (enc_t *enc)
244 {
245 if (enc->flags & F_INDENT)
246 {
247 need (enc, 1);
248 encode_ch (enc, '\n');
249 }
250 }
251
252 static void
253 encode_comma (enc_t *enc)
254 {
255 encode_ch (enc, ',');
256
257 if (enc->flags & F_INDENT)
258 encode_nl (enc);
259 else if (enc->flags & F_SPACE_AFTER)
260 encode_space (enc);
261 }
262
263 static void encode_sv (enc_t *enc, SV *sv);
264
265 static void
266 encode_av (enc_t *enc, AV *av)
267 {
268 int i, len = av_len (av);
269
270 encode_ch (enc, '['); encode_nl (enc);
271 ++enc->indent;
272
273 for (i = 0; i <= len; ++i)
274 {
275 encode_indent (enc);
276 encode_sv (enc, *av_fetch (av, i, 0));
277
278 if (i < len)
279 encode_comma (enc);
280 }
281
282 encode_nl (enc);
283
284 --enc->indent;
285 encode_indent (enc); encode_ch (enc, ']');
286 }
287
288 static void
289 encode_he (enc_t *enc, HE *he)
290 {
291 encode_ch (enc, '"');
292
293 if (HeKLEN (he) == HEf_SVKEY)
294 {
295 SV *sv = HeSVKEY (he);
296 STRLEN len;
297 char *str;
298
299 SvGETMAGIC (sv);
300 str = SvPV (sv, len);
301
302 encode_str (enc, str, len, SvUTF8 (sv));
303 }
304 else
305 encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
306
307 encode_ch (enc, '"');
308
309 if (enc->flags & F_SPACE_BEFORE) encode_space (enc);
310 encode_ch (enc, ':');
311 if (enc->flags & F_SPACE_AFTER ) encode_space (enc);
312 encode_sv (enc, HeVAL (he));
313 }
314
315 // compare hash entries, used when all keys are bytestrings
316 static int
317 he_cmp_fast (const void *a_, const void *b_)
318 {
319 int cmp;
320
321 HE *a = *(HE **)a_;
322 HE *b = *(HE **)b_;
323
324 STRLEN la = HeKLEN (a);
325 STRLEN lb = HeKLEN (b);
326
327 if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
328 cmp = la - lb;
329
330 return cmp;
331 }
332
333 // compare hash entries, used when some keys are sv's or utf-x
334 static int
335 he_cmp_slow (const void *a, const void *b)
336 {
337 return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b));
338 }
339
340 static void
341 encode_hv (enc_t *enc, HV *hv)
342 {
343 int count, i;
344
345 encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
346
347 if ((count = hv_iterinit (hv)))
348 {
349 // for canonical output we have to sort by keys first
350 // actually, this is mostly due to the stupid so-called
351 // security workaround added somewhere in 5.8.x.
352 // that randomises hash orderings
353 if (enc->flags & F_CANONICAL)
354 {
355 HE *he, *hes [count]; // if your compiler dies here, you need to enable C99 mode
356 int fast = 1;
357
358 i = 0;
359 while ((he = hv_iternext (hv)))
360 {
361 hes [i++] = he;
362 if (HeKLEN (he) < 0 || HeKUTF8 (he))
363 fast = 0;
364 }
365
366 assert (i == count);
367
368 if (fast)
369 qsort (hes, count, sizeof (HE *), he_cmp_fast);
370 else
371 {
372 // hack to forcefully disable "use bytes"
373 COP cop = *PL_curcop;
374 cop.op_private = 0;
375
376 ENTER;
377 SAVETMPS;
378
379 SAVEVPTR (PL_curcop);
380 PL_curcop = &cop;
381
382 qsort (hes, count, sizeof (HE *), he_cmp_slow);
383
384 FREETMPS;
385 LEAVE;
386 }
387
388 for (i = 0; i < count; ++i)
389 {
390 encode_indent (enc);
391 encode_he (enc, hes [i]);
392
393 if (i < count - 1)
394 encode_comma (enc);
395 }
396
397 encode_nl (enc);
398 }
399 else
400 {
401 SV *sv;
402 HE *he = hv_iternext (hv);
403
404 for (;;)
405 {
406 encode_indent (enc);
407 encode_he (enc, he);
408
409 if (!(he = hv_iternext (hv)))
410 break;
411
412 encode_comma (enc);
413 }
414
415 encode_nl (enc);
416 }
417 }
418
419 --enc->indent; encode_indent (enc); encode_ch (enc, '}');
420 }
421
422 static void
423 encode_sv (enc_t *enc, SV *sv)
424 {
425 SvGETMAGIC (sv);
426
427 if (SvPOKp (sv))
428 {
429 STRLEN len;
430 char *str = SvPV (sv, len);
431 encode_ch (enc, '"');
432 encode_str (enc, str, len, SvUTF8 (sv));
433 encode_ch (enc, '"');
434 }
435 else if (SvNOKp (sv))
436 {
437 need (enc, NV_DIG + 32);
438 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
439 enc->cur += strlen (enc->cur);
440 }
441 else if (SvIOKp (sv))
442 {
443 need (enc, 64);
444 enc->cur +=
445 SvIsUV(sv)
446 ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
447 : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
448 }
449 else if (SvROK (sv))
450 {
451 SV *rv = SvRV (sv);
452
453 if (enc->indent >= enc->max_depth)
454 croak ("data structure too deep (hit recursion limit)");
455
456 switch (SvTYPE (rv))
457 {
458 case SVt_PVAV: encode_av (enc, (AV *)rv); break;
459 case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
460
461 default:
462 croak ("encountered %s, but JSON can only represent references to arrays or hashes",
463 SvPV_nolen (sv));
464 }
465 }
466 else if (!SvOK (sv))
467 encode_str (enc, "null", 4, 0);
468 else
469 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
470 SvPV_nolen (sv), SvFLAGS (sv));
471 }
472
473 static SV *
474 encode_json (SV *scalar, UV flags)
475 {
476 if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
477 croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
478
479 enc_t enc;
480 enc.flags = flags;
481 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
482 enc.cur = SvPVX (enc.sv);
483 enc.end = SvEND (enc.sv);
484 enc.indent = 0;
485 enc.max_depth = 0x7fffffffUL;
486
487 SvPOK_only (enc.sv);
488 encode_sv (&enc, scalar);
489
490 if (!(flags & (F_ASCII | F_UTF8)))
491 SvUTF8_on (enc.sv);
492
493 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
494
495 if (enc.flags & F_SHRINK)
496 shrink (enc.sv);
497
498 return enc.sv;
499 }
500
501 /////////////////////////////////////////////////////////////////////////////
502 // decoder
503
504 // structure used for decoding JSON
505 typedef struct
506 {
507 char *cur; // current parser pointer
508 char *end; // end of input string
509 const char *err; // parse error, if != 0
510 UV flags; // F_*
511 } dec_t;
512
513 static void
514 decode_ws (dec_t *dec)
515 {
516 for (;;)
517 {
518 char ch = *dec->cur;
519
520 if (ch > 0x20
521 || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
522 break;
523
524 ++dec->cur;
525 }
526 }
527
528 #define ERR(reason) SB dec->err = reason; goto fail; SE
529 #define EXPECT_CH(ch) SB \
530 if (*dec->cur != ch) \
531 ERR (# ch " expected"); \
532 ++dec->cur; \
533 SE
534
535 static SV *decode_sv (dec_t *dec);
536
537 static signed char decode_hexdigit[256];
538
539 static UV
540 decode_4hex (dec_t *dec)
541 {
542 signed char d1, d2, d3, d4;
543 unsigned char *cur = (unsigned char *)dec->cur;
544
545 d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");
546 d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");
547 d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");
548 d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");
549
550 dec->cur += 4;
551
552 return ((UV)d1) << 12
553 | ((UV)d2) << 8
554 | ((UV)d3) << 4
555 | ((UV)d4);
556
557 fail:
558 return (UV)-1;
559 }
560
561 static SV *
562 decode_str (dec_t *dec)
563 {
564 SV *sv = 0;
565 int utf8 = 0;
566
567 do
568 {
569 char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];
570 char *cur = buf;
571
572 do
573 {
574 unsigned char ch = *(unsigned char *)dec->cur++;
575
576 if (ch == '"')
577 {
578 --dec->cur;
579 break;
580 }
581 else if (ch == '\\')
582 {
583 switch (*dec->cur)
584 {
585 case '\\':
586 case '/':
587 case '"': *cur++ = *dec->cur++; break;
588
589 case 'b': ++dec->cur; *cur++ = '\010'; break;
590 case 't': ++dec->cur; *cur++ = '\011'; break;
591 case 'n': ++dec->cur; *cur++ = '\012'; break;
592 case 'f': ++dec->cur; *cur++ = '\014'; break;
593 case 'r': ++dec->cur; *cur++ = '\015'; break;
594
595 case 'u':
596 {
597 UV lo, hi;
598 ++dec->cur;
599
600 hi = decode_4hex (dec);
601 if (hi == (UV)-1)
602 goto fail;
603
604 // possibly a surrogate pair
605 if (hi >= 0xd800)
606 if (hi < 0xdc00)
607 {
608 if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
609 ERR ("missing low surrogate character in surrogate pair");
610
611 dec->cur += 2;
612
613 lo = decode_4hex (dec);
614 if (lo == (UV)-1)
615 goto fail;
616
617 if (lo < 0xdc00 || lo >= 0xe000)
618 ERR ("surrogate pair expected");
619
620 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
621 }
622 else if (hi < 0xe000)
623 ERR ("missing high surrogate character in surrogate pair");
624
625 if (hi >= 0x80)
626 {
627 utf8 = 1;
628
629 cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
630 }
631 else
632 *cur++ = hi;
633 }
634 break;
635
636 default:
637 --dec->cur;
638 ERR ("illegal backslash escape sequence in string");
639 }
640 }
641 else if (ch >= 0x20 && ch <= 0x7f)
642 *cur++ = ch;
643 else if (ch >= 0x80)
644 {
645 --dec->cur;
646
647 STRLEN clen;
648 UV uch = decode_utf8 (dec->cur, dec->end - dec->cur, &clen);
649 if (clen == (STRLEN)-1)
650 ERR ("malformed UTF-8 character in JSON string");
651
652 do
653 {
654 *cur++ = *dec->cur++;
655 }
656 while (--clen);
657
658 utf8 = 1;
659 }
660 else if (!ch)
661 ERR ("unexpected end of string while parsing json string");
662 else
663 ERR ("invalid character encountered");
664
665 }
666 while (cur < buf + SHORT_STRING_LEN);
667
668 STRLEN len = cur - buf;
669
670 if (sv)
671 {
672 SvGROW (sv, SvCUR (sv) + len + 1);
673 memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
674 SvCUR_set (sv, SvCUR (sv) + len);
675 }
676 else
677 sv = newSVpvn (buf, len);
678 }
679 while (*dec->cur != '"');
680
681 ++dec->cur;
682
683 if (sv)
684 {
685 SvPOK_only (sv);
686 *SvEND (sv) = 0;
687
688 if (utf8)
689 SvUTF8_on (sv);
690 }
691 else
692 sv = newSVpvn ("", 0);
693
694 return sv;
695
696 fail:
697 return 0;
698 }
699
700 static SV *
701 decode_num (dec_t *dec)
702 {
703 int is_nv = 0;
704 char *start = dec->cur;
705
706 // [minus]
707 if (*dec->cur == '-')
708 ++dec->cur;
709
710 if (*dec->cur == '0')
711 {
712 ++dec->cur;
713 if (*dec->cur >= '0' && *dec->cur <= '9')
714 ERR ("malformed number (leading zero must not be followed by another digit)");
715 }
716 else if (*dec->cur < '0' || *dec->cur > '9')
717 ERR ("malformed number (no digits after initial minus)");
718 else
719 do
720 {
721 ++dec->cur;
722 }
723 while (*dec->cur >= '0' && *dec->cur <= '9');
724
725 // [frac]
726 if (*dec->cur == '.')
727 {
728 ++dec->cur;
729
730 if (*dec->cur < '0' || *dec->cur > '9')
731 ERR ("malformed number (no digits after decimal point)");
732
733 do
734 {
735 ++dec->cur;
736 }
737 while (*dec->cur >= '0' && *dec->cur <= '9');
738
739 is_nv = 1;
740 }
741
742 // [exp]
743 if (*dec->cur == 'e' || *dec->cur == 'E')
744 {
745 ++dec->cur;
746
747 if (*dec->cur == '-' || *dec->cur == '+')
748 ++dec->cur;
749
750 if (*dec->cur < '0' || *dec->cur > '9')
751 ERR ("malformed number (no digits after exp sign)");
752
753 do
754 {
755 ++dec->cur;
756 }
757 while (*dec->cur >= '0' && *dec->cur <= '9');
758
759 is_nv = 1;
760 }
761
762 if (!is_nv)
763 {
764 UV uv;
765 int numtype = grok_number (start, dec->cur - start, &uv);
766 if (numtype & IS_NUMBER_IN_UV)
767 if (numtype & IS_NUMBER_NEG)
768 {
769 if (uv < (UV)IV_MIN)
770 return newSViv (-(IV)uv);
771 }
772 else
773 return newSVuv (uv);
774 }
775
776 return newSVnv (Atof (start));
777
778 fail:
779 return 0;
780 }
781
782 static SV *
783 decode_av (dec_t *dec)
784 {
785 AV *av = newAV ();
786
787 decode_ws (dec);
788 if (*dec->cur == ']')
789 ++dec->cur;
790 else
791 for (;;)
792 {
793 SV *value;
794
795 value = decode_sv (dec);
796 if (!value)
797 goto fail;
798
799 av_push (av, value);
800
801 decode_ws (dec);
802
803 if (*dec->cur == ']')
804 {
805 ++dec->cur;
806 break;
807 }
808
809 if (*dec->cur != ',')
810 ERR (", or ] expected while parsing array");
811
812 ++dec->cur;
813 }
814
815 return newRV_noinc ((SV *)av);
816
817 fail:
818 SvREFCNT_dec (av);
819 return 0;
820 }
821
822 static SV *
823 decode_hv (dec_t *dec)
824 {
825 HV *hv = newHV ();
826
827 decode_ws (dec);
828 if (*dec->cur == '}')
829 ++dec->cur;
830 else
831 for (;;)
832 {
833 SV *key, *value;
834
835 decode_ws (dec); EXPECT_CH ('"');
836
837 key = decode_str (dec);
838 if (!key)
839 goto fail;
840
841 decode_ws (dec); EXPECT_CH (':');
842
843 value = decode_sv (dec);
844 if (!value)
845 {
846 SvREFCNT_dec (key);
847 goto fail;
848 }
849
850 //TODO: optimise
851 hv_store_ent (hv, key, value, 0);
852
853 decode_ws (dec);
854
855 if (*dec->cur == '}')
856 {
857 ++dec->cur;
858 break;
859 }
860
861 if (*dec->cur != ',')
862 ERR (", or } expected while parsing object/hash");
863
864 ++dec->cur;
865 }
866
867 return newRV_noinc ((SV *)hv);
868
869 fail:
870 SvREFCNT_dec (hv);
871 return 0;
872 }
873
874 static SV *
875 decode_sv (dec_t *dec)
876 {
877 decode_ws (dec);
878 switch (*dec->cur)
879 {
880 case '"': ++dec->cur; return decode_str (dec);
881 case '[': ++dec->cur; return decode_av (dec);
882 case '{': ++dec->cur; return decode_hv (dec);
883
884 case '-':
885 case '0': case '1': case '2': case '3': case '4':
886 case '5': case '6': case '7': case '8': case '9':
887 return decode_num (dec);
888
889 case 't':
890 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
891 {
892 dec->cur += 4;
893 return newSViv (1);
894 }
895 else
896 ERR ("'true' expected");
897
898 break;
899
900 case 'f':
901 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
902 {
903 dec->cur += 5;
904 return newSViv (0);
905 }
906 else
907 ERR ("'false' expected");
908
909 break;
910
911 case 'n':
912 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
913 {
914 dec->cur += 4;
915 return newSVsv (&PL_sv_undef);
916 }
917 else
918 ERR ("'null' expected");
919
920 break;
921
922 default:
923 ERR ("malformed json string, neither array, object, number, string or atom");
924 break;
925 }
926
927 fail:
928 return 0;
929 }
930
931 static SV *
932 decode_json (SV *string, UV flags)
933 {
934 SV *sv;
935
936 if (flags & F_UTF8)
937 sv_utf8_downgrade (string, 0);
938 else
939 sv_utf8_upgrade (string);
940
941 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
942
943 dec_t dec;
944 dec.flags = flags;
945 dec.cur = SvPVX (string);
946 dec.end = SvEND (string);
947 dec.err = 0;
948
949 sv = decode_sv (&dec);
950
951 if (!sv)
952 {
953 IV offset = dec.flags & F_UTF8
954 ? dec.cur - SvPVX (string)
955 : utf8_distance (dec.cur, SvPVX (string));
956 SV *uni = sv_newmortal ();
957
958 // horrible hack to silence warning inside pv_uni_display
959 COP cop = *PL_curcop;
960 cop.cop_warnings = pWARN_NONE;
961 ENTER;
962 SAVEVPTR (PL_curcop);
963 PL_curcop = &cop;
964 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
965 LEAVE;
966
967 croak ("%s, at character offset %d (%s)",
968 dec.err,
969 (int)offset,
970 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
971 }
972
973 sv = sv_2mortal (sv);
974
975 if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
976 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
977
978 return sv;
979 }
980
981 /////////////////////////////////////////////////////////////////////////////
982 // XS interface functions
983
984 MODULE = JSON::XS PACKAGE = JSON::XS
985
986 BOOT:
987 {
988 int i;
989
990 memset (decode_hexdigit, 0xff, 256);
991 for (i = 10; i--; )
992 decode_hexdigit ['0' + i] = i;
993
994 for (i = 7; i--; )
995 {
996 decode_hexdigit ['a' + i] = 10 + i;
997 decode_hexdigit ['A' + i] = 10 + i;
998 }
999
1000 json_stash = gv_stashpv ("JSON::XS", 1);
1001 }
1002
1003 PROTOTYPES: DISABLE
1004
1005 SV *new (char *dummy)
1006 CODE:
1007 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
1008 OUTPUT:
1009 RETVAL
1010
1011 SV *ascii (SV *self, int enable = 1)
1012 ALIAS:
1013 ascii = F_ASCII
1014 utf8 = F_UTF8
1015 indent = F_INDENT
1016 canonical = F_CANONICAL
1017 space_before = F_SPACE_BEFORE
1018 space_after = F_SPACE_AFTER
1019 pretty = F_PRETTY
1020 allow_nonref = F_ALLOW_NONREF
1021 shrink = F_SHRINK
1022 CODE:
1023 {
1024 UV *uv = SvJSON (self);
1025 if (enable)
1026 *uv |= ix;
1027 else
1028 *uv &= ~ix;
1029
1030 RETVAL = newSVsv (self);
1031 }
1032 OUTPUT:
1033 RETVAL
1034
1035 void encode (SV *self, SV *scalar)
1036 PPCODE:
1037 XPUSHs (encode_json (scalar, *SvJSON (self)));
1038
1039 void decode (SV *self, SV *jsonstr)
1040 PPCODE:
1041 XPUSHs (decode_json (jsonstr, *SvJSON (self)));
1042
1043 PROTOTYPES: ENABLE
1044
1045 void to_json (SV *scalar)
1046 ALIAS:
1047 objToJson = 0
1048 PPCODE:
1049 XPUSHs (encode_json (scalar, F_UTF8));
1050
1051 void from_json (SV *jsonstr)
1052 ALIAS:
1053 jsonToObj = 0
1054 PPCODE:
1055 XPUSHs (decode_json (jsonstr, F_UTF8));
1056