ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
Revision: 1.12
Committed: Sat Mar 24 22:10:08 2007 UTC (17 years, 1 month ago) by root
Branch: MAIN
Changes since 1.11: +226 -181 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include "assert.h"
6     #include "string.h"
7     #include "stdlib.h"
8    
9     #define F_ASCII 0x00000001
10     #define F_UTF8 0x00000002
11     #define F_INDENT 0x00000004
12     #define F_CANONICAL 0x00000008
13     #define F_SPACE_BEFORE 0x00000010
14     #define F_SPACE_AFTER 0x00000020
15 root 1.3 #define F_ALLOW_NONREF 0x00000080
16 root 1.6 #define F_SHRINK 0x00000100
17 root 1.1
18 root 1.2 #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
19 root 1.1 #define F_DEFAULT 0
20    
21     #define INIT_SIZE 32 // initial scalar size to be allocated
22 root 1.12 #define INDENT_STEP 3 // spaces per indentation level
23    
24     #define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
25     #define SHORT_STRING_LEN 256 // special-case strings of up to this size
26 root 1.1
27     #define SB do {
28     #define SE } while (0)
29    
30 root 1.12 static HV *json_stash; // JSON::XS::
31 root 1.1
32 root 1.12 /////////////////////////////////////////////////////////////////////////////
33     // utility functions
34 root 1.1
35     static UV *
36     SvJSON (SV *sv)
37     {
38     if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
39     croak ("object is not of type JSON::XS");
40    
41     return &SvUVX (SvRV (sv));
42     }
43    
44 root 1.7 static void
45     shrink (SV *sv)
46     {
47     sv_utf8_downgrade (sv, 1);
48 root 1.12 if (SvLEN (sv) > SvCUR (sv) + 1)
49     {
50 root 1.7 #ifdef SvPV_shrink_to_cur
51 root 1.12 SvPV_shrink_to_cur (sv);
52     #elif defined (SvPV_renew)
53     SvPV_renew (sv, SvCUR (sv) + 1);
54 root 1.7 #endif
55 root 1.12 }
56 root 1.7 }
57    
58 root 1.1 /////////////////////////////////////////////////////////////////////////////
59 root 1.12 // encoder
60    
61     // structure used for encoding JSON
62     typedef struct
63     {
64     char *cur; // SvPVX (sv) + current output position
65     char *end; // SvEND (sv)
66     SV *sv; // result scalar
67     UV flags; // F_*
68     int indent; // indentation level
69     int max_depth; // max. recursion level
70     } enc_t;
71 root 1.1
72     static void
73     need (enc_t *enc, STRLEN len)
74     {
75     if (enc->cur + len >= enc->end)
76     {
77     STRLEN cur = enc->cur - SvPVX (enc->sv);
78     SvGROW (enc->sv, cur + len + 1);
79     enc->cur = SvPVX (enc->sv) + cur;
80 root 1.4 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
81 root 1.1 }
82     }
83    
84     static void
85     encode_ch (enc_t *enc, char ch)
86     {
87     need (enc, 1);
88     *enc->cur++ = ch;
89     }
90    
91     static void
92     encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8)
93     {
94     char *end = str + len;
95    
96 root 1.4 need (enc, len);
97    
98 root 1.1 while (str < end)
99     {
100     unsigned char ch = *(unsigned char *)str;
101 root 1.4
102 root 1.6 if (ch >= 0x20 && ch < 0x80) // most common case
103 root 1.4 {
104 root 1.6 if (ch == '"') // but with slow exceptions
105     {
106     need (enc, len += 1);
107     *enc->cur++ = '\\';
108     *enc->cur++ = '"';
109     }
110     else if (ch == '\\')
111     {
112     need (enc, len += 1);
113     *enc->cur++ = '\\';
114     *enc->cur++ = '\\';
115     }
116     else
117     *enc->cur++ = ch;
118    
119 root 1.4 ++str;
120 root 1.1 }
121     else
122     {
123 root 1.6 switch (ch)
124 root 1.1 {
125 root 1.6 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
126     case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
127     case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
128     case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
129     case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
130 root 1.1
131 root 1.6 default:
132 root 1.1 {
133 root 1.6 STRLEN clen;
134     UV uch;
135    
136     if (is_utf8)
137     {
138     uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
139     if (clen == (STRLEN)-1)
140 root 1.9 croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
141 root 1.6 }
142     else
143     {
144     uch = ch;
145     clen = 1;
146     }
147    
148 root 1.9 if (uch > 0x10FFFFUL)
149     croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
150    
151 root 1.6 if (uch < 0x80 || enc->flags & F_ASCII)
152     {
153     if (uch > 0xFFFFUL)
154     {
155     need (enc, len += 11);
156     sprintf (enc->cur, "\\u%04x\\u%04x",
157 root 1.10 (int)((uch - 0x10000) / 0x400 + 0xD800),
158     (int)((uch - 0x10000) % 0x400 + 0xDC00));
159 root 1.6 enc->cur += 12;
160     }
161     else
162     {
163     static char hexdigit [16] = "0123456789abcdef";
164     need (enc, len += 5);
165     *enc->cur++ = '\\';
166     *enc->cur++ = 'u';
167     *enc->cur++ = hexdigit [ uch >> 12 ];
168     *enc->cur++ = hexdigit [(uch >> 8) & 15];
169     *enc->cur++ = hexdigit [(uch >> 4) & 15];
170     *enc->cur++ = hexdigit [(uch >> 0) & 15];
171     }
172 root 1.4
173 root 1.6 str += clen;
174     }
175     else if (is_utf8)
176     {
177     need (enc, len += clen);
178     do
179     {
180     *enc->cur++ = *str++;
181     }
182     while (--clen);
183     }
184     else
185     {
186 root 1.12 need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed
187 root 1.6 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
188     ++str;
189     }
190 root 1.5 }
191 root 1.4 }
192 root 1.1 }
193    
194     --len;
195     }
196     }
197    
198 root 1.12 static void
199     encode_indent (enc_t *enc)
200     {
201     if (enc->flags & F_INDENT)
202     {
203     int spaces = enc->indent * INDENT_STEP;
204    
205     need (enc, spaces);
206     memset (enc->cur, ' ', spaces);
207     enc->cur += spaces;
208     }
209     }
210    
211     static void
212     encode_space (enc_t *enc)
213     {
214     need (enc, 1);
215     encode_ch (enc, ' ');
216     }
217    
218     static void
219     encode_nl (enc_t *enc)
220     {
221     if (enc->flags & F_INDENT)
222     {
223     need (enc, 1);
224     encode_ch (enc, '\n');
225     }
226     }
227    
228     static void
229     encode_comma (enc_t *enc)
230     {
231     encode_ch (enc, ',');
232 root 1.1
233 root 1.12 if (enc->flags & F_INDENT)
234     encode_nl (enc);
235     else if (enc->flags & F_SPACE_AFTER)
236     encode_space (enc);
237     }
238 root 1.1
239     static void encode_sv (enc_t *enc, SV *sv);
240    
241     static void
242     encode_av (enc_t *enc, AV *av)
243     {
244     int i, len = av_len (av);
245    
246 root 1.12 encode_ch (enc, '['); encode_nl (enc);
247 root 1.1 ++enc->indent;
248    
249     for (i = 0; i <= len; ++i)
250     {
251 root 1.12 encode_indent (enc);
252 root 1.1 encode_sv (enc, *av_fetch (av, i, 0));
253    
254     if (i < len)
255 root 1.12 encode_comma (enc);
256 root 1.1 }
257    
258 root 1.12 encode_nl (enc);
259 root 1.1
260     --enc->indent;
261 root 1.12 encode_indent (enc); encode_ch (enc, ']');
262 root 1.1 }
263    
264     static void
265     encode_he (enc_t *enc, HE *he)
266     {
267     encode_ch (enc, '"');
268    
269     if (HeKLEN (he) == HEf_SVKEY)
270     {
271     SV *sv = HeSVKEY (he);
272     STRLEN len;
273 root 1.4 char *str;
274    
275     SvGETMAGIC (sv);
276     str = SvPV (sv, len);
277 root 1.1
278     encode_str (enc, str, len, SvUTF8 (sv));
279     }
280     else
281     encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
282    
283     encode_ch (enc, '"');
284    
285 root 1.12 if (enc->flags & F_SPACE_BEFORE) encode_space (enc);
286 root 1.1 encode_ch (enc, ':');
287 root 1.12 if (enc->flags & F_SPACE_AFTER ) encode_space (enc);
288 root 1.1 encode_sv (enc, HeVAL (he));
289     }
290    
291     // compare hash entries, used when all keys are bytestrings
292     static int
293     he_cmp_fast (const void *a_, const void *b_)
294     {
295     int cmp;
296    
297     HE *a = *(HE **)a_;
298     HE *b = *(HE **)b_;
299    
300     STRLEN la = HeKLEN (a);
301     STRLEN lb = HeKLEN (b);
302    
303 root 1.11 if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
304     cmp = la - lb;
305 root 1.1
306     return cmp;
307     }
308    
309     // compare hash entries, used when some keys are sv's or utf-x
310     static int
311     he_cmp_slow (const void *a, const void *b)
312     {
313     return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b));
314     }
315    
316     static void
317     encode_hv (enc_t *enc, HV *hv)
318     {
319     int count, i;
320    
321 root 1.12 encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
322 root 1.1
323     if ((count = hv_iterinit (hv)))
324     {
325     // for canonical output we have to sort by keys first
326     // actually, this is mostly due to the stupid so-called
327     // security workaround added somewhere in 5.8.x.
328     // that randomises hash orderings
329     if (enc->flags & F_CANONICAL)
330     {
331 root 1.12 HE *he, *hes [count]; // if your compiler dies here, you need to enable C99 mode
332 root 1.1 int fast = 1;
333    
334     i = 0;
335     while ((he = hv_iternext (hv)))
336     {
337     hes [i++] = he;
338     if (HeKLEN (he) < 0 || HeKUTF8 (he))
339     fast = 0;
340     }
341    
342     assert (i == count);
343    
344     if (fast)
345     qsort (hes, count, sizeof (HE *), he_cmp_fast);
346     else
347     {
348 root 1.8 // hack to forcefully disable "use bytes"
349     COP cop = *PL_curcop;
350 root 1.1 cop.op_private = 0;
351 root 1.8
352     ENTER;
353     SAVETMPS;
354    
355     SAVEVPTR (PL_curcop);
356 root 1.1 PL_curcop = &cop;
357    
358     qsort (hes, count, sizeof (HE *), he_cmp_slow);
359 root 1.8
360 root 1.1 FREETMPS;
361 root 1.8 LEAVE;
362 root 1.1 }
363    
364     for (i = 0; i < count; ++i)
365     {
366 root 1.12 encode_indent (enc);
367 root 1.1 encode_he (enc, hes [i]);
368    
369     if (i < count - 1)
370 root 1.12 encode_comma (enc);
371 root 1.1 }
372    
373 root 1.12 encode_nl (enc);
374 root 1.1 }
375     else
376     {
377     SV *sv;
378     HE *he = hv_iternext (hv);
379    
380     for (;;)
381     {
382 root 1.12 encode_indent (enc);
383 root 1.1 encode_he (enc, he);
384    
385     if (!(he = hv_iternext (hv)))
386     break;
387    
388 root 1.12 encode_comma (enc);
389 root 1.1 }
390    
391 root 1.12 encode_nl (enc);
392 root 1.1 }
393     }
394    
395 root 1.12 --enc->indent; encode_indent (enc); encode_ch (enc, '}');
396 root 1.1 }
397    
398     static void
399     encode_sv (enc_t *enc, SV *sv)
400     {
401 root 1.4 SvGETMAGIC (sv);
402    
403 root 1.1 if (SvPOKp (sv))
404     {
405     STRLEN len;
406     char *str = SvPV (sv, len);
407     encode_ch (enc, '"');
408     encode_str (enc, str, len, SvUTF8 (sv));
409     encode_ch (enc, '"');
410     }
411     else if (SvNOKp (sv))
412     {
413     need (enc, NV_DIG + 32);
414     Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
415     enc->cur += strlen (enc->cur);
416     }
417     else if (SvIOKp (sv))
418     {
419     need (enc, 64);
420     enc->cur +=
421     SvIsUV(sv)
422     ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
423     : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
424     }
425     else if (SvROK (sv))
426     {
427 root 1.9 SV *rv = SvRV (sv);
428    
429 root 1.12 if (enc->indent >= enc->max_depth)
430 root 1.1 croak ("data structure too deep (hit recursion limit)");
431    
432 root 1.9 switch (SvTYPE (rv))
433 root 1.1 {
434 root 1.9 case SVt_PVAV: encode_av (enc, (AV *)rv); break;
435     case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
436 root 1.1
437     default:
438 root 1.9 croak ("encountered %s, but JSON can only represent references to arrays or hashes",
439     SvPV_nolen (sv));
440 root 1.1 }
441     }
442     else if (!SvOK (sv))
443     encode_str (enc, "null", 4, 0);
444     else
445 root 1.9 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
446     SvPV_nolen (sv), SvFLAGS (sv));
447 root 1.1 }
448    
449     static SV *
450     encode_json (SV *scalar, UV flags)
451     {
452 root 1.3 if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
453 root 1.9 croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
454 root 1.3
455 root 1.1 enc_t enc;
456 root 1.12 enc.flags = flags;
457     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
458     enc.cur = SvPVX (enc.sv);
459     enc.end = SvEND (enc.sv);
460     enc.indent = 0;
461     enc.max_depth = 0x7fffffffUL;
462 root 1.1
463     SvPOK_only (enc.sv);
464     encode_sv (&enc, scalar);
465    
466     if (!(flags & (F_ASCII | F_UTF8)))
467     SvUTF8_on (enc.sv);
468    
469     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
470 root 1.6
471     if (enc.flags & F_SHRINK)
472 root 1.7 shrink (enc.sv);
473    
474 root 1.1 return enc.sv;
475     }
476    
477     /////////////////////////////////////////////////////////////////////////////
478 root 1.12 // decoder
479 root 1.1
480 root 1.12 // structure used for decoding JSON
481     typedef struct
482     {
483     char *cur; // current parser pointer
484     char *end; // end of input string
485     const char *err; // parse error, if != 0
486     UV flags; // F_*
487     } dec_t;
488    
489     static void
490     decode_ws (dec_t *dec)
491     {
492     for (;;)
493     {
494     char ch = *dec->cur;
495    
496     if (ch > 0x20
497     || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
498     break;
499    
500     ++dec->cur;
501 root 1.1 }
502 root 1.12 }
503 root 1.1
504     #define ERR(reason) SB dec->err = reason; goto fail; SE
505     #define EXPECT_CH(ch) SB \
506     if (*dec->cur != ch) \
507     ERR (# ch " expected"); \
508     ++dec->cur; \
509     SE
510    
511     static SV *decode_sv (dec_t *dec);
512    
513     static signed char decode_hexdigit[256];
514    
515     static UV
516     decode_4hex (dec_t *dec)
517     {
518     signed char d1, d2, d3, d4;
519 root 1.12 unsigned char *cur = (unsigned char *)dec->cur;
520 root 1.1
521 root 1.12 d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");
522     d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");
523     d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");
524     d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");
525 root 1.1
526     dec->cur += 4;
527    
528     return ((UV)d1) << 12
529     | ((UV)d2) << 8
530     | ((UV)d3) << 4
531     | ((UV)d4);
532    
533     fail:
534     return (UV)-1;
535     }
536    
537     static SV *
538     decode_str (dec_t *dec)
539     {
540 root 1.12 SV *sv = 0;
541 root 1.1 int utf8 = 0;
542    
543 root 1.12 do
544 root 1.1 {
545 root 1.12 char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];
546     char *cur = buf;
547 root 1.1
548 root 1.12 do
549 root 1.1 {
550 root 1.12 unsigned char ch = *(unsigned char *)dec->cur++;
551    
552     if (ch == '"')
553     {
554     --dec->cur;
555     break;
556     }
557     else if (ch == '\\')
558 root 1.1 {
559 root 1.12 switch (*dec->cur)
560 root 1.1 {
561 root 1.12 case '\\':
562     case '/':
563     case '"': *cur++ = *dec->cur++; break;
564    
565     case 'b': ++dec->cur; *cur++ = '\010'; break;
566     case 't': ++dec->cur; *cur++ = '\011'; break;
567     case 'n': ++dec->cur; *cur++ = '\012'; break;
568     case 'f': ++dec->cur; *cur++ = '\014'; break;
569     case 'r': ++dec->cur; *cur++ = '\015'; break;
570 root 1.1
571 root 1.12 case 'u':
572 root 1.1 {
573 root 1.12 UV lo, hi;
574     ++dec->cur;
575 root 1.1
576 root 1.12 hi = decode_4hex (dec);
577     if (hi == (UV)-1)
578     goto fail;
579 root 1.1
580 root 1.12 // possibly a surrogate pair
581     if (hi >= 0xd800)
582     if (hi < 0xdc00)
583     {
584     if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
585     ERR ("missing low surrogate character in surrogate pair");
586    
587     dec->cur += 2;
588    
589     lo = decode_4hex (dec);
590     if (lo == (UV)-1)
591     goto fail;
592    
593     if (lo < 0xdc00 || lo >= 0xe000)
594     ERR ("surrogate pair expected");
595    
596     hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
597     }
598     else if (hi < 0xe000)
599     ERR ("missing high surrogate character in surrogate pair");
600 root 1.1
601 root 1.12 if (hi >= 0x80)
602     {
603     utf8 = 1;
604 root 1.1
605 root 1.12 cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
606     }
607     else
608     *cur++ = hi;
609 root 1.1 }
610 root 1.12 break;
611    
612     default:
613     --dec->cur;
614     ERR ("illegal backslash escape sequence in string");
615     }
616     }
617     else if (ch >= 0x20 && ch <= 0x7f)
618     *cur++ = ch;
619     else if (ch >= 0x80)
620     {
621     --dec->cur;
622 root 1.1
623 root 1.12 STRLEN clen;
624     UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
625     if (clen == (STRLEN)-1)
626     ERR ("malformed UTF-8 character in JSON string");
627 root 1.1
628 root 1.12 do
629     {
630     *cur++ = *dec->cur++;
631 root 1.1 }
632 root 1.12 while (--clen);
633 root 1.5
634 root 1.12 utf8 = 1;
635 root 1.1 }
636 root 1.12 else if (!ch)
637     ERR ("unexpected end of string while parsing json string");
638     else
639     ERR ("invalid character encountered");
640    
641 root 1.1 }
642 root 1.12 while (cur < buf + SHORT_STRING_LEN);
643 root 1.1
644 root 1.12 STRLEN len = cur - buf;
645 root 1.5
646 root 1.12 if (sv)
647     {
648     SvGROW (sv, SvCUR (sv) + len + 1);
649     memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
650     SvCUR_set (sv, SvCUR (sv) + len);
651 root 1.1 }
652     else
653 root 1.12 sv = newSVpvn (buf, len);
654 root 1.1 }
655 root 1.12 while (*dec->cur != '"');
656 root 1.1
657     ++dec->cur;
658    
659 root 1.12 if (sv)
660     {
661     SvPOK_only (sv);
662     *SvEND (sv) = 0;
663 root 1.4
664 root 1.12 if (utf8)
665     SvUTF8_on (sv);
666     }
667     else
668     sv = newSVpvn ("", 0);
669 root 1.6
670 root 1.1 return sv;
671    
672     fail:
673     return 0;
674     }
675    
676     static SV *
677     decode_num (dec_t *dec)
678     {
679     int is_nv = 0;
680     char *start = dec->cur;
681    
682     // [minus]
683     if (*dec->cur == '-')
684     ++dec->cur;
685    
686     if (*dec->cur == '0')
687     {
688     ++dec->cur;
689     if (*dec->cur >= '0' && *dec->cur <= '9')
690     ERR ("malformed number (leading zero must not be followed by another digit)");
691     }
692 root 1.5 else if (*dec->cur < '0' || *dec->cur > '9')
693     ERR ("malformed number (no digits after initial minus)");
694     else
695     do
696     {
697     ++dec->cur;
698     }
699     while (*dec->cur >= '0' && *dec->cur <= '9');
700 root 1.1
701     // [frac]
702     if (*dec->cur == '.')
703     {
704 root 1.5 ++dec->cur;
705    
706     if (*dec->cur < '0' || *dec->cur > '9')
707     ERR ("malformed number (no digits after decimal point)");
708 root 1.1
709     do
710     {
711     ++dec->cur;
712     }
713     while (*dec->cur >= '0' && *dec->cur <= '9');
714 root 1.5
715     is_nv = 1;
716 root 1.1 }
717    
718     // [exp]
719     if (*dec->cur == 'e' || *dec->cur == 'E')
720     {
721 root 1.5 ++dec->cur;
722 root 1.1
723     if (*dec->cur == '-' || *dec->cur == '+')
724     ++dec->cur;
725    
726 root 1.5 if (*dec->cur < '0' || *dec->cur > '9')
727     ERR ("malformed number (no digits after exp sign)");
728    
729     do
730     {
731     ++dec->cur;
732     }
733     while (*dec->cur >= '0' && *dec->cur <= '9');
734    
735     is_nv = 1;
736 root 1.1 }
737    
738     if (!is_nv)
739     {
740     UV uv;
741     int numtype = grok_number (start, dec->cur - start, &uv);
742     if (numtype & IS_NUMBER_IN_UV)
743     if (numtype & IS_NUMBER_NEG)
744     {
745     if (uv < (UV)IV_MIN)
746     return newSViv (-(IV)uv);
747     }
748     else
749     return newSVuv (uv);
750     }
751    
752     return newSVnv (Atof (start));
753    
754     fail:
755     return 0;
756     }
757    
758     static SV *
759     decode_av (dec_t *dec)
760     {
761     AV *av = newAV ();
762    
763 root 1.12 decode_ws (dec);
764 root 1.5 if (*dec->cur == ']')
765     ++dec->cur;
766     else
767     for (;;)
768     {
769     SV *value;
770 root 1.1
771 root 1.5 value = decode_sv (dec);
772     if (!value)
773     goto fail;
774 root 1.1
775 root 1.5 av_push (av, value);
776 root 1.1
777 root 1.12 decode_ws (dec);
778 root 1.1
779 root 1.5 if (*dec->cur == ']')
780     {
781     ++dec->cur;
782     break;
783     }
784    
785     if (*dec->cur != ',')
786     ERR (", or ] expected while parsing array");
787 root 1.1
788 root 1.5 ++dec->cur;
789     }
790 root 1.1
791     return newRV_noinc ((SV *)av);
792    
793     fail:
794     SvREFCNT_dec (av);
795     return 0;
796     }
797    
798     static SV *
799     decode_hv (dec_t *dec)
800     {
801     HV *hv = newHV ();
802    
803 root 1.12 decode_ws (dec);
804 root 1.5 if (*dec->cur == '}')
805     ++dec->cur;
806     else
807     for (;;)
808     {
809     SV *key, *value;
810 root 1.1
811 root 1.12 decode_ws (dec); EXPECT_CH ('"');
812 root 1.1
813 root 1.5 key = decode_str (dec);
814     if (!key)
815     goto fail;
816 root 1.1
817 root 1.12 decode_ws (dec); EXPECT_CH (':');
818 root 1.1
819 root 1.5 value = decode_sv (dec);
820     if (!value)
821     {
822     SvREFCNT_dec (key);
823     goto fail;
824     }
825 root 1.1
826 root 1.5 //TODO: optimise
827     hv_store_ent (hv, key, value, 0);
828 root 1.1
829 root 1.12 decode_ws (dec);
830 root 1.1
831 root 1.5 if (*dec->cur == '}')
832     {
833     ++dec->cur;
834     break;
835     }
836 root 1.1
837 root 1.5 if (*dec->cur != ',')
838     ERR (", or } expected while parsing object/hash");
839 root 1.1
840 root 1.5 ++dec->cur;
841     }
842 root 1.1
843     return newRV_noinc ((SV *)hv);
844    
845     fail:
846     SvREFCNT_dec (hv);
847     return 0;
848     }
849    
850     static SV *
851     decode_sv (dec_t *dec)
852     {
853 root 1.12 decode_ws (dec);
854 root 1.1 switch (*dec->cur)
855     {
856     case '"': ++dec->cur; return decode_str (dec);
857     case '[': ++dec->cur; return decode_av (dec);
858     case '{': ++dec->cur; return decode_hv (dec);
859    
860     case '-':
861     case '0': case '1': case '2': case '3': case '4':
862     case '5': case '6': case '7': case '8': case '9':
863     return decode_num (dec);
864    
865     case 't':
866     if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
867     {
868     dec->cur += 4;
869     return newSViv (1);
870     }
871     else
872     ERR ("'true' expected");
873    
874     break;
875    
876     case 'f':
877     if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
878     {
879     dec->cur += 5;
880     return newSViv (0);
881     }
882     else
883     ERR ("'false' expected");
884    
885     break;
886    
887     case 'n':
888     if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
889     {
890     dec->cur += 4;
891 root 1.5 return newSVsv (&PL_sv_undef);
892 root 1.1 }
893     else
894     ERR ("'null' expected");
895    
896     break;
897    
898     default:
899 root 1.7 ERR ("malformed json string, neither array, object, number, string or atom");
900 root 1.1 break;
901     }
902    
903     fail:
904     return 0;
905     }
906    
907     static SV *
908     decode_json (SV *string, UV flags)
909     {
910     SV *sv;
911    
912 root 1.5 if (flags & F_UTF8)
913     sv_utf8_downgrade (string, 0);
914     else
915 root 1.1 sv_utf8_upgrade (string);
916    
917     SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
918    
919     dec_t dec;
920     dec.flags = flags;
921     dec.cur = SvPVX (string);
922     dec.end = SvEND (string);
923     dec.err = 0;
924    
925     sv = decode_sv (&dec);
926    
927     if (!sv)
928     {
929 root 1.7 IV offset = dec.flags & F_UTF8
930     ? dec.cur - SvPVX (string)
931     : utf8_distance (dec.cur, SvPVX (string));
932 root 1.1 SV *uni = sv_newmortal ();
933 root 1.8
934 root 1.5 // horrible hack to silence warning inside pv_uni_display
935 root 1.8 COP cop = *PL_curcop;
936 root 1.5 cop.cop_warnings = pWARN_NONE;
937 root 1.8 ENTER;
938 root 1.5 SAVEVPTR (PL_curcop);
939     PL_curcop = &cop;
940 root 1.8 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
941     LEAVE;
942 root 1.1
943 root 1.5 croak ("%s, at character offset %d (%s)",
944 root 1.1 dec.err,
945     (int)offset,
946     dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
947     }
948    
949 root 1.3 sv = sv_2mortal (sv);
950    
951     if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
952 root 1.9 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
953 root 1.3
954     return sv;
955 root 1.1 }
956    
957 root 1.12 /////////////////////////////////////////////////////////////////////////////
958     // XS interface functions
959    
960 root 1.1 MODULE = JSON::XS PACKAGE = JSON::XS
961    
962     BOOT:
963     {
964     int i;
965    
966     memset (decode_hexdigit, 0xff, 256);
967     for (i = 10; i--; )
968     decode_hexdigit ['0' + i] = i;
969    
970 root 1.4 for (i = 7; i--; )
971 root 1.1 {
972     decode_hexdigit ['a' + i] = 10 + i;
973     decode_hexdigit ['A' + i] = 10 + i;
974     }
975    
976     json_stash = gv_stashpv ("JSON::XS", 1);
977     }
978    
979 root 1.4 PROTOTYPES: DISABLE
980    
981 root 1.1 SV *new (char *dummy)
982     CODE:
983     RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
984     OUTPUT:
985     RETVAL
986    
987 root 1.6 SV *ascii (SV *self, int enable = 1)
988 root 1.1 ALIAS:
989     ascii = F_ASCII
990     utf8 = F_UTF8
991     indent = F_INDENT
992     canonical = F_CANONICAL
993     space_before = F_SPACE_BEFORE
994     space_after = F_SPACE_AFTER
995 root 1.2 pretty = F_PRETTY
996 root 1.3 allow_nonref = F_ALLOW_NONREF
997 root 1.6 shrink = F_SHRINK
998 root 1.1 CODE:
999     {
1000     UV *uv = SvJSON (self);
1001     if (enable)
1002     *uv |= ix;
1003     else
1004     *uv &= ~ix;
1005    
1006     RETVAL = newSVsv (self);
1007     }
1008     OUTPUT:
1009     RETVAL
1010    
1011     void encode (SV *self, SV *scalar)
1012     PPCODE:
1013     XPUSHs (encode_json (scalar, *SvJSON (self)));
1014    
1015 root 1.2 void decode (SV *self, SV *jsonstr)
1016 root 1.1 PPCODE:
1017 root 1.2 XPUSHs (decode_json (jsonstr, *SvJSON (self)));
1018    
1019 root 1.4 PROTOTYPES: ENABLE
1020    
1021 root 1.2 void to_json (SV *scalar)
1022     PPCODE:
1023     XPUSHs (encode_json (scalar, F_UTF8));
1024    
1025     void from_json (SV *jsonstr)
1026     PPCODE:
1027     XPUSHs (decode_json (jsonstr, F_UTF8));
1028 root 1.1