ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
Revision: 1.9
Committed: Fri Mar 23 17:40:29 2007 UTC (17 years, 2 months ago) by root
Branch: MAIN
CVS Tags: rel-0_3
Changes since 1.8: +15 -10 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #include "EXTERN.h"
2     #include "perl.h"
3     #include "XSUB.h"
4    
5     #include "assert.h"
6     #include "string.h"
7     #include "stdlib.h"
8    
9     #define F_ASCII 0x00000001
10     #define F_UTF8 0x00000002
11     #define F_INDENT 0x00000004
12     #define F_CANONICAL 0x00000008
13     #define F_SPACE_BEFORE 0x00000010
14     #define F_SPACE_AFTER 0x00000020
15     #define F_JSON_RPC 0x00000040
16 root 1.3 #define F_ALLOW_NONREF 0x00000080
17 root 1.6 #define F_SHRINK 0x00000100
18 root 1.1
19 root 1.2 #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
20 root 1.1 #define F_DEFAULT 0
21    
22     #define INIT_SIZE 32 // initial scalar size to be allocated
23    
24     #define SB do {
25     #define SE } while (0)
26    
27     static HV *json_stash;
28    
29     // structure used for encoding JSON
30     typedef struct
31     {
32     char *cur;
33     STRLEN len; // SvLEN (sv)
34     char *end; // SvEND (sv)
35     SV *sv;
36     UV flags;
37     int max_recurse;
38     int indent;
39     } enc_t;
40    
41     // structure used for decoding JSON
42     typedef struct
43     {
44     char *cur;
45     char *end;
46 root 1.4 const char *err;
47 root 1.1 UV flags;
48     } dec_t;
49    
50     static UV *
51     SvJSON (SV *sv)
52     {
53     if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
54     croak ("object is not of type JSON::XS");
55    
56     return &SvUVX (SvRV (sv));
57     }
58    
59 root 1.7 static void
60     shrink (SV *sv)
61     {
62     sv_utf8_downgrade (sv, 1);
63     #ifdef SvPV_shrink_to_cur
64     SvPV_shrink_to_cur (sv);
65     #endif
66     }
67    
68 root 1.1 /////////////////////////////////////////////////////////////////////////////
69    
70     static void
71     need (enc_t *enc, STRLEN len)
72     {
73     if (enc->cur + len >= enc->end)
74     {
75     STRLEN cur = enc->cur - SvPVX (enc->sv);
76     SvGROW (enc->sv, cur + len + 1);
77     enc->cur = SvPVX (enc->sv) + cur;
78 root 1.4 enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
79 root 1.1 }
80     }
81    
82     static void
83     encode_ch (enc_t *enc, char ch)
84     {
85     need (enc, 1);
86     *enc->cur++ = ch;
87     }
88    
89     static void
90     encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8)
91     {
92     char *end = str + len;
93    
94 root 1.4 need (enc, len);
95    
96 root 1.1 while (str < end)
97     {
98     unsigned char ch = *(unsigned char *)str;
99 root 1.4
100 root 1.6 if (ch >= 0x20 && ch < 0x80) // most common case
101 root 1.4 {
102 root 1.6 if (ch == '"') // but with slow exceptions
103     {
104     need (enc, len += 1);
105     *enc->cur++ = '\\';
106     *enc->cur++ = '"';
107     }
108     else if (ch == '\\')
109     {
110     need (enc, len += 1);
111     *enc->cur++ = '\\';
112     *enc->cur++ = '\\';
113     }
114     else
115     *enc->cur++ = ch;
116    
117 root 1.4 ++str;
118 root 1.1 }
119     else
120     {
121 root 1.6 switch (ch)
122 root 1.1 {
123 root 1.6 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
124     case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
125     case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
126     case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
127     case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
128 root 1.1
129 root 1.6 default:
130 root 1.1 {
131 root 1.6 STRLEN clen;
132     UV uch;
133    
134     if (is_utf8)
135     {
136     uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
137     if (clen == (STRLEN)-1)
138 root 1.9 croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
139 root 1.6 }
140     else
141     {
142     uch = ch;
143     clen = 1;
144     }
145    
146 root 1.9 if (uch > 0x10FFFFUL)
147     croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
148    
149 root 1.6 if (uch < 0x80 || enc->flags & F_ASCII)
150     {
151     if (uch > 0xFFFFUL)
152     {
153     need (enc, len += 11);
154     sprintf (enc->cur, "\\u%04x\\u%04x",
155     (uch - 0x10000) / 0x400 + 0xD800,
156     (uch - 0x10000) % 0x400 + 0xDC00);
157     enc->cur += 12;
158     }
159     else
160     {
161     static char hexdigit [16] = "0123456789abcdef";
162     need (enc, len += 5);
163     *enc->cur++ = '\\';
164     *enc->cur++ = 'u';
165     *enc->cur++ = hexdigit [ uch >> 12 ];
166     *enc->cur++ = hexdigit [(uch >> 8) & 15];
167     *enc->cur++ = hexdigit [(uch >> 4) & 15];
168     *enc->cur++ = hexdigit [(uch >> 0) & 15];
169     }
170 root 1.4
171 root 1.6 str += clen;
172     }
173     else if (is_utf8)
174     {
175     need (enc, len += clen);
176     do
177     {
178     *enc->cur++ = *str++;
179     }
180     while (--clen);
181     }
182     else
183     {
184 root 1.7 need (enc, len += 10); // never more than 11 bytes needed
185 root 1.6 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
186     ++str;
187     }
188 root 1.5 }
189 root 1.4 }
190 root 1.1 }
191    
192     --len;
193     }
194     }
195    
196     #define INDENT SB \
197     if (enc->flags & F_INDENT) \
198     { \
199     int i_; \
200     need (enc, enc->indent); \
201     for (i_ = enc->indent * 3; i_--; )\
202     encode_ch (enc, ' '); \
203     } \
204     SE
205    
206 root 1.2 #define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE
207 root 1.1 #define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE
208     #define COMMA SB \
209     encode_ch (enc, ','); \
210     if (enc->flags & F_INDENT) \
211     NL; \
212     else if (enc->flags & F_SPACE_AFTER) \
213     SPACE; \
214     SE
215    
216     static void encode_sv (enc_t *enc, SV *sv);
217    
218     static void
219     encode_av (enc_t *enc, AV *av)
220     {
221     int i, len = av_len (av);
222    
223     encode_ch (enc, '['); NL;
224     ++enc->indent;
225    
226     for (i = 0; i <= len; ++i)
227     {
228     INDENT;
229     encode_sv (enc, *av_fetch (av, i, 0));
230    
231     if (i < len)
232     COMMA;
233     }
234    
235     NL;
236    
237     --enc->indent;
238     INDENT; encode_ch (enc, ']');
239     }
240    
241     static void
242     encode_he (enc_t *enc, HE *he)
243     {
244     encode_ch (enc, '"');
245    
246     if (HeKLEN (he) == HEf_SVKEY)
247     {
248     SV *sv = HeSVKEY (he);
249     STRLEN len;
250 root 1.4 char *str;
251    
252     SvGETMAGIC (sv);
253     str = SvPV (sv, len);
254 root 1.1
255     encode_str (enc, str, len, SvUTF8 (sv));
256     }
257     else
258     encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
259    
260     encode_ch (enc, '"');
261    
262     if (enc->flags & F_SPACE_BEFORE) SPACE;
263     encode_ch (enc, ':');
264     if (enc->flags & F_SPACE_AFTER ) SPACE;
265     encode_sv (enc, HeVAL (he));
266     }
267    
268     // compare hash entries, used when all keys are bytestrings
269     static int
270     he_cmp_fast (const void *a_, const void *b_)
271     {
272     int cmp;
273    
274     HE *a = *(HE **)a_;
275     HE *b = *(HE **)b_;
276    
277     STRLEN la = HeKLEN (a);
278     STRLEN lb = HeKLEN (b);
279    
280     if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
281     cmp = la < lb ? -1 : la == lb ? 0 : 1;
282    
283     return cmp;
284     }
285    
286     // compare hash entries, used when some keys are sv's or utf-x
287     static int
288     he_cmp_slow (const void *a, const void *b)
289     {
290     return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b));
291     }
292    
293     static void
294     encode_hv (enc_t *enc, HV *hv)
295     {
296     int count, i;
297    
298     encode_ch (enc, '{'); NL; ++enc->indent;
299    
300     if ((count = hv_iterinit (hv)))
301     {
302     // for canonical output we have to sort by keys first
303     // actually, this is mostly due to the stupid so-called
304     // security workaround added somewhere in 5.8.x.
305     // that randomises hash orderings
306     if (enc->flags & F_CANONICAL)
307     {
308     HE *he, *hes [count];
309     int fast = 1;
310    
311     i = 0;
312     while ((he = hv_iternext (hv)))
313     {
314     hes [i++] = he;
315     if (HeKLEN (he) < 0 || HeKUTF8 (he))
316     fast = 0;
317     }
318    
319     assert (i == count);
320    
321     if (fast)
322     qsort (hes, count, sizeof (HE *), he_cmp_fast);
323     else
324     {
325 root 1.8 // hack to forcefully disable "use bytes"
326     COP cop = *PL_curcop;
327 root 1.1 cop.op_private = 0;
328 root 1.8
329     ENTER;
330     SAVETMPS;
331    
332     SAVEVPTR (PL_curcop);
333 root 1.1 PL_curcop = &cop;
334    
335     qsort (hes, count, sizeof (HE *), he_cmp_slow);
336 root 1.8
337 root 1.1 FREETMPS;
338 root 1.8 LEAVE;
339 root 1.1 }
340    
341     for (i = 0; i < count; ++i)
342     {
343     INDENT;
344     encode_he (enc, hes [i]);
345    
346     if (i < count - 1)
347     COMMA;
348     }
349    
350     NL;
351     }
352     else
353     {
354     SV *sv;
355     HE *he = hv_iternext (hv);
356    
357     for (;;)
358     {
359     INDENT;
360     encode_he (enc, he);
361    
362     if (!(he = hv_iternext (hv)))
363     break;
364    
365     COMMA;
366     }
367    
368     NL;
369     }
370     }
371    
372     --enc->indent; INDENT; encode_ch (enc, '}');
373     }
374    
375     static void
376     encode_sv (enc_t *enc, SV *sv)
377     {
378 root 1.4 SvGETMAGIC (sv);
379    
380 root 1.1 if (SvPOKp (sv))
381     {
382     STRLEN len;
383     char *str = SvPV (sv, len);
384     encode_ch (enc, '"');
385     encode_str (enc, str, len, SvUTF8 (sv));
386     encode_ch (enc, '"');
387     }
388     else if (SvNOKp (sv))
389     {
390     need (enc, NV_DIG + 32);
391     Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
392     enc->cur += strlen (enc->cur);
393     }
394     else if (SvIOKp (sv))
395     {
396     need (enc, 64);
397     enc->cur +=
398     SvIsUV(sv)
399     ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
400     : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
401     }
402     else if (SvROK (sv))
403     {
404 root 1.9 SV *rv = SvRV (sv);
405    
406 root 1.1 if (!--enc->max_recurse)
407     croak ("data structure too deep (hit recursion limit)");
408    
409 root 1.9 switch (SvTYPE (rv))
410 root 1.1 {
411 root 1.9 case SVt_PVAV: encode_av (enc, (AV *)rv); break;
412     case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
413 root 1.1
414     default:
415 root 1.9 croak ("encountered %s, but JSON can only represent references to arrays or hashes",
416     SvPV_nolen (sv));
417 root 1.1 }
418     }
419     else if (!SvOK (sv))
420     encode_str (enc, "null", 4, 0);
421     else
422 root 1.9 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
423     SvPV_nolen (sv), SvFLAGS (sv));
424 root 1.1 }
425    
426     static SV *
427     encode_json (SV *scalar, UV flags)
428     {
429 root 1.3 if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
430 root 1.9 croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
431 root 1.3
432 root 1.1 enc_t enc;
433     enc.flags = flags;
434     enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
435     enc.cur = SvPVX (enc.sv);
436     enc.end = SvEND (enc.sv);
437     enc.max_recurse = 0;
438     enc.indent = 0;
439    
440     SvPOK_only (enc.sv);
441     encode_sv (&enc, scalar);
442    
443     if (!(flags & (F_ASCII | F_UTF8)))
444     SvUTF8_on (enc.sv);
445    
446     SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
447 root 1.6
448     if (enc.flags & F_SHRINK)
449 root 1.7 shrink (enc.sv);
450    
451 root 1.1 return enc.sv;
452     }
453    
454     /////////////////////////////////////////////////////////////////////////////
455    
456     #define WS \
457     for (;;) \
458     { \
459     char ch = *dec->cur; \
460     if (ch > 0x20 \
461     || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \
462     break; \
463     ++dec->cur; \
464     }
465    
466     #define ERR(reason) SB dec->err = reason; goto fail; SE
467     #define EXPECT_CH(ch) SB \
468     if (*dec->cur != ch) \
469     ERR (# ch " expected"); \
470     ++dec->cur; \
471     SE
472    
473     static SV *decode_sv (dec_t *dec);
474    
475     static signed char decode_hexdigit[256];
476    
477     static UV
478     decode_4hex (dec_t *dec)
479     {
480     signed char d1, d2, d3, d4;
481    
482     d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];
483     if (d1 < 0) ERR ("four hexadecimal digits expected");
484     d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];
485     if (d2 < 0) ERR ("four hexadecimal digits expected");
486     d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
487     if (d3 < 0) ERR ("four hexadecimal digits expected");
488     d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
489     if (d4 < 0) ERR ("four hexadecimal digits expected");
490    
491     dec->cur += 4;
492    
493     return ((UV)d1) << 12
494     | ((UV)d2) << 8
495     | ((UV)d3) << 4
496     | ((UV)d4);
497    
498     fail:
499     return (UV)-1;
500     }
501    
502 root 1.4 #define APPEND_GROW(n) SB \
503     if (cur + (n) >= end) \
504     { \
505     STRLEN ofs = cur - SvPVX (sv); \
506     SvGROW (sv, ofs + (n) + 1); \
507     cur = SvPVX (sv) + ofs; \
508     end = SvEND (sv); \
509     } \
510     SE
511    
512     #define APPEND_CH(ch) SB \
513     APPEND_GROW (1); \
514     *cur++ = (ch); \
515     SE
516    
517 root 1.1 static SV *
518     decode_str (dec_t *dec)
519     {
520     SV *sv = NEWSV (0,2);
521     int utf8 = 0;
522 root 1.4 char *cur = SvPVX (sv);
523     char *end = SvEND (sv);
524 root 1.1
525     for (;;)
526     {
527     unsigned char ch = *(unsigned char *)dec->cur;
528    
529     if (ch == '"')
530     break;
531     else if (ch == '\\')
532     {
533     switch (*++dec->cur)
534     {
535     case '\\':
536     case '/':
537     case '"': APPEND_CH (*dec->cur++); break;
538    
539     case 'b': APPEND_CH ('\010'); ++dec->cur; break;
540     case 't': APPEND_CH ('\011'); ++dec->cur; break;
541     case 'n': APPEND_CH ('\012'); ++dec->cur; break;
542     case 'f': APPEND_CH ('\014'); ++dec->cur; break;
543     case 'r': APPEND_CH ('\015'); ++dec->cur; break;
544    
545     case 'u':
546     {
547     UV lo, hi;
548     ++dec->cur;
549    
550     hi = decode_4hex (dec);
551     if (hi == (UV)-1)
552     goto fail;
553    
554     // possibly a surrogate pair
555     if (hi >= 0xd800 && hi < 0xdc00)
556     {
557     if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
558 root 1.5 ERR ("missing low surrogate character in surrogate pair");
559 root 1.1
560     dec->cur += 2;
561    
562     lo = decode_4hex (dec);
563     if (lo == (UV)-1)
564     goto fail;
565    
566     if (lo < 0xdc00 || lo >= 0xe000)
567     ERR ("surrogate pair expected");
568    
569     hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
570     }
571 root 1.5 else if (hi >= 0xdc00 && hi < 0xe000)
572     ERR ("missing high surrogate character in surrogate pair");
573 root 1.1
574     if (hi >= 0x80)
575     {
576     utf8 = 1;
577    
578 root 1.4 APPEND_GROW (4); // at most 4 bytes for 21 bits
579     cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
580 root 1.1 }
581     else
582     APPEND_CH (hi);
583     }
584     break;
585 root 1.5
586     default:
587     --dec->cur;
588     ERR ("illegal backslash escape sequence in string");
589 root 1.1 }
590     }
591     else if (ch >= 0x20 && ch <= 0x7f)
592     APPEND_CH (*dec->cur++);
593     else if (ch >= 0x80)
594     {
595     STRLEN clen;
596     UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
597 root 1.5 if (clen == (STRLEN)-1)
598 root 1.7 ERR ("malformed UTF-8 character in JSON string");
599 root 1.1
600 root 1.4 APPEND_GROW (clen);
601 root 1.5 do
602     {
603     *cur++ = *dec->cur++;
604     }
605     while (--clen);
606    
607     utf8 = 1;
608 root 1.1 }
609 root 1.5 else if (dec->cur == dec->end)
610     ERR ("unexpected end of string while parsing json string");
611 root 1.1 else
612     ERR ("invalid character encountered");
613     }
614    
615     ++dec->cur;
616    
617 root 1.4 SvCUR_set (sv, cur - SvPVX (sv));
618    
619 root 1.1 SvPOK_only (sv);
620     *SvEND (sv) = 0;
621    
622     if (utf8)
623     SvUTF8_on (sv);
624    
625 root 1.6 if (dec->flags & F_SHRINK)
626 root 1.7 shrink (sv);
627 root 1.6
628 root 1.1 return sv;
629    
630     fail:
631     SvREFCNT_dec (sv);
632     return 0;
633     }
634    
635     static SV *
636     decode_num (dec_t *dec)
637     {
638     int is_nv = 0;
639     char *start = dec->cur;
640    
641     // [minus]
642     if (*dec->cur == '-')
643     ++dec->cur;
644    
645     if (*dec->cur == '0')
646     {
647     ++dec->cur;
648     if (*dec->cur >= '0' && *dec->cur <= '9')
649     ERR ("malformed number (leading zero must not be followed by another digit)");
650     }
651 root 1.5 else if (*dec->cur < '0' || *dec->cur > '9')
652     ERR ("malformed number (no digits after initial minus)");
653     else
654     do
655     {
656     ++dec->cur;
657     }
658     while (*dec->cur >= '0' && *dec->cur <= '9');
659 root 1.1
660     // [frac]
661     if (*dec->cur == '.')
662     {
663 root 1.5 ++dec->cur;
664    
665     if (*dec->cur < '0' || *dec->cur > '9')
666     ERR ("malformed number (no digits after decimal point)");
667 root 1.1
668     do
669     {
670     ++dec->cur;
671     }
672     while (*dec->cur >= '0' && *dec->cur <= '9');
673 root 1.5
674     is_nv = 1;
675 root 1.1 }
676    
677     // [exp]
678     if (*dec->cur == 'e' || *dec->cur == 'E')
679     {
680 root 1.5 ++dec->cur;
681 root 1.1
682     if (*dec->cur == '-' || *dec->cur == '+')
683     ++dec->cur;
684    
685 root 1.5 if (*dec->cur < '0' || *dec->cur > '9')
686     ERR ("malformed number (no digits after exp sign)");
687    
688     do
689     {
690     ++dec->cur;
691     }
692     while (*dec->cur >= '0' && *dec->cur <= '9');
693    
694     is_nv = 1;
695 root 1.1 }
696    
697     if (!is_nv)
698     {
699     UV uv;
700     int numtype = grok_number (start, dec->cur - start, &uv);
701     if (numtype & IS_NUMBER_IN_UV)
702     if (numtype & IS_NUMBER_NEG)
703     {
704     if (uv < (UV)IV_MIN)
705     return newSViv (-(IV)uv);
706     }
707     else
708     return newSVuv (uv);
709     }
710    
711     return newSVnv (Atof (start));
712    
713     fail:
714     return 0;
715     }
716    
717     static SV *
718     decode_av (dec_t *dec)
719     {
720     AV *av = newAV ();
721    
722 root 1.5 WS;
723     if (*dec->cur == ']')
724     ++dec->cur;
725     else
726     for (;;)
727     {
728     SV *value;
729 root 1.1
730 root 1.5 value = decode_sv (dec);
731     if (!value)
732     goto fail;
733 root 1.1
734 root 1.5 av_push (av, value);
735 root 1.1
736 root 1.5 WS;
737 root 1.1
738 root 1.5 if (*dec->cur == ']')
739     {
740     ++dec->cur;
741     break;
742     }
743    
744     if (*dec->cur != ',')
745     ERR (", or ] expected while parsing array");
746 root 1.1
747 root 1.5 ++dec->cur;
748     }
749 root 1.1
750     return newRV_noinc ((SV *)av);
751    
752     fail:
753     SvREFCNT_dec (av);
754     return 0;
755     }
756    
757     static SV *
758     decode_hv (dec_t *dec)
759     {
760     HV *hv = newHV ();
761    
762 root 1.5 WS;
763     if (*dec->cur == '}')
764     ++dec->cur;
765     else
766     for (;;)
767     {
768     SV *key, *value;
769 root 1.1
770 root 1.5 WS; EXPECT_CH ('"');
771 root 1.1
772 root 1.5 key = decode_str (dec);
773     if (!key)
774     goto fail;
775 root 1.1
776 root 1.5 WS; EXPECT_CH (':');
777 root 1.1
778 root 1.5 value = decode_sv (dec);
779     if (!value)
780     {
781     SvREFCNT_dec (key);
782     goto fail;
783     }
784 root 1.1
785 root 1.5 //TODO: optimise
786     hv_store_ent (hv, key, value, 0);
787 root 1.1
788 root 1.5 WS;
789 root 1.1
790 root 1.5 if (*dec->cur == '}')
791     {
792     ++dec->cur;
793     break;
794     }
795 root 1.1
796 root 1.5 if (*dec->cur != ',')
797     ERR (", or } expected while parsing object/hash");
798 root 1.1
799 root 1.5 ++dec->cur;
800     }
801 root 1.1
802     return newRV_noinc ((SV *)hv);
803    
804     fail:
805     SvREFCNT_dec (hv);
806     return 0;
807     }
808    
809     static SV *
810     decode_sv (dec_t *dec)
811     {
812     WS;
813     switch (*dec->cur)
814     {
815     case '"': ++dec->cur; return decode_str (dec);
816     case '[': ++dec->cur; return decode_av (dec);
817     case '{': ++dec->cur; return decode_hv (dec);
818    
819     case '-':
820     case '0': case '1': case '2': case '3': case '4':
821     case '5': case '6': case '7': case '8': case '9':
822     return decode_num (dec);
823    
824     case 't':
825     if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
826     {
827     dec->cur += 4;
828     return newSViv (1);
829     }
830     else
831     ERR ("'true' expected");
832    
833     break;
834    
835     case 'f':
836     if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
837     {
838     dec->cur += 5;
839     return newSViv (0);
840     }
841     else
842     ERR ("'false' expected");
843    
844     break;
845    
846     case 'n':
847     if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
848     {
849     dec->cur += 4;
850 root 1.5 return newSVsv (&PL_sv_undef);
851 root 1.1 }
852     else
853     ERR ("'null' expected");
854    
855     break;
856    
857     default:
858 root 1.7 ERR ("malformed json string, neither array, object, number, string or atom");
859 root 1.1 break;
860     }
861    
862     fail:
863     return 0;
864     }
865    
866     static SV *
867     decode_json (SV *string, UV flags)
868     {
869     SV *sv;
870    
871 root 1.5 if (flags & F_UTF8)
872     sv_utf8_downgrade (string, 0);
873     else
874 root 1.1 sv_utf8_upgrade (string);
875    
876     SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
877    
878     dec_t dec;
879     dec.flags = flags;
880     dec.cur = SvPVX (string);
881     dec.end = SvEND (string);
882     dec.err = 0;
883    
884     sv = decode_sv (&dec);
885    
886     if (!sv)
887     {
888 root 1.7 IV offset = dec.flags & F_UTF8
889     ? dec.cur - SvPVX (string)
890     : utf8_distance (dec.cur, SvPVX (string));
891 root 1.1 SV *uni = sv_newmortal ();
892 root 1.8
893 root 1.5 // horrible hack to silence warning inside pv_uni_display
894 root 1.8 COP cop = *PL_curcop;
895 root 1.5 cop.cop_warnings = pWARN_NONE;
896 root 1.8 ENTER;
897 root 1.5 SAVEVPTR (PL_curcop);
898     PL_curcop = &cop;
899 root 1.8 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
900     LEAVE;
901 root 1.1
902 root 1.5 croak ("%s, at character offset %d (%s)",
903 root 1.1 dec.err,
904     (int)offset,
905     dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
906     }
907    
908 root 1.3 sv = sv_2mortal (sv);
909    
910     if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
911 root 1.9 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
912 root 1.3
913     return sv;
914 root 1.1 }
915    
916     MODULE = JSON::XS PACKAGE = JSON::XS
917    
918     BOOT:
919     {
920     int i;
921    
922     memset (decode_hexdigit, 0xff, 256);
923     for (i = 10; i--; )
924     decode_hexdigit ['0' + i] = i;
925    
926 root 1.4 for (i = 7; i--; )
927 root 1.1 {
928     decode_hexdigit ['a' + i] = 10 + i;
929     decode_hexdigit ['A' + i] = 10 + i;
930     }
931    
932     json_stash = gv_stashpv ("JSON::XS", 1);
933     }
934    
935 root 1.4 PROTOTYPES: DISABLE
936    
937 root 1.1 SV *new (char *dummy)
938     CODE:
939     RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
940     OUTPUT:
941     RETVAL
942    
943 root 1.6 SV *ascii (SV *self, int enable = 1)
944 root 1.1 ALIAS:
945     ascii = F_ASCII
946     utf8 = F_UTF8
947     indent = F_INDENT
948     canonical = F_CANONICAL
949     space_before = F_SPACE_BEFORE
950     space_after = F_SPACE_AFTER
951     json_rpc = F_JSON_RPC
952 root 1.2 pretty = F_PRETTY
953 root 1.3 allow_nonref = F_ALLOW_NONREF
954 root 1.6 shrink = F_SHRINK
955 root 1.1 CODE:
956     {
957     UV *uv = SvJSON (self);
958     if (enable)
959     *uv |= ix;
960     else
961     *uv &= ~ix;
962    
963     RETVAL = newSVsv (self);
964     }
965     OUTPUT:
966     RETVAL
967    
968     void encode (SV *self, SV *scalar)
969     PPCODE:
970     XPUSHs (encode_json (scalar, *SvJSON (self)));
971    
972 root 1.2 void decode (SV *self, SV *jsonstr)
973 root 1.1 PPCODE:
974 root 1.2 XPUSHs (decode_json (jsonstr, *SvJSON (self)));
975    
976 root 1.4 PROTOTYPES: ENABLE
977    
978 root 1.2 void to_json (SV *scalar)
979     PPCODE:
980     XPUSHs (encode_json (scalar, F_UTF8));
981    
982     void from_json (SV *jsonstr)
983     PPCODE:
984     XPUSHs (decode_json (jsonstr, F_UTF8));
985 root 1.1