ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
Revision: 1.1
Committed: Thu Mar 22 16:40:16 2007 UTC (17 years, 2 months ago) by root
Branch: MAIN
Log Message:
initial check-in

File Contents

# Content
1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #include "assert.h"
6 #include "string.h"
7 #include "stdlib.h"
8
9 #define F_ASCII 0x00000001
10 #define F_UTF8 0x00000002
11 #define F_INDENT 0x00000004
12 #define F_CANONICAL 0x00000008
13 #define F_SPACE_BEFORE 0x00000010
14 #define F_SPACE_AFTER 0x00000020
15 #define F_JSON_RPC 0x00000040
16
17 #define F_DEFAULT 0
18
19 #define INIT_SIZE 32 // initial scalar size to be allocated
20
21 #define SB do {
22 #define SE } while (0)
23
24 static HV *json_stash;
25
26 // structure used for encoding JSON
27 typedef struct
28 {
29 char *cur;
30 STRLEN len; // SvLEN (sv)
31 char *end; // SvEND (sv)
32 SV *sv;
33 UV flags;
34 int max_recurse;
35 int indent;
36 } enc_t;
37
38 // structure used for decoding JSON
39 typedef struct
40 {
41 char *cur;
42 char *end;
43 char *err;
44 UV flags;
45 } dec_t;
46
47 static UV *
48 SvJSON (SV *sv)
49 {
50 if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
51 croak ("object is not of type JSON::XS");
52
53 return &SvUVX (SvRV (sv));
54 }
55
56 /////////////////////////////////////////////////////////////////////////////
57
58 static void
59 need (enc_t *enc, STRLEN len)
60 {
61 if (enc->cur + len >= enc->end)
62 {
63 STRLEN cur = enc->cur - SvPVX (enc->sv);
64 SvGROW (enc->sv, cur + len + 1);
65 enc->cur = SvPVX (enc->sv) + cur;
66 enc->end = SvEND (enc->sv);
67 }
68 }
69
70 static void
71 encode_ch (enc_t *enc, char ch)
72 {
73 need (enc, 1);
74 *enc->cur++ = ch;
75 }
76
77 static void
78 encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8)
79 {
80 char *end = str + len;
81
82 while (str < end)
83 {
84 unsigned char ch = *(unsigned char *)str;
85 if (ch >= 0x20 && ch < 0x80) // most common case
86 {
87 *enc->cur++ = ch;
88 str++;
89 }
90 else
91 {
92 STRLEN clen;
93 UV uch;
94
95 if (is_utf8)
96 {
97 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
98 if (clen < 0)
99 croak ("malformed UTF-8 character in string, cannot convert to JSON");
100 }
101 else
102 {
103 uch = ch;
104 clen = 1;
105 }
106
107 need (enc, len += 6);
108
109 if (uch < 0xa0 || enc->flags & F_ASCII)
110 {
111 if (uch > 0xFFFFUL)
112 {
113 len += 6;
114 need (enc, len += 6);
115 sprintf (enc->cur, "\\u%04x\\u%04x",
116 (uch - 0x10000) / 0x400 + 0xD800,
117 (uch - 0x10000) % 0x400 + 0xDC00);
118 enc->cur += 12;
119 }
120 else
121 {
122 sprintf (enc->cur, "\\u%04x", uch);
123 enc->cur += 6;
124 }
125 }
126 else if (is_utf8)
127 {
128 memcpy (enc->cur, str, clen);
129 enc->cur += clen;
130 }
131 else
132 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
133
134 str += clen;
135 }
136
137 --len;
138 }
139 }
140
141 #define INDENT SB \
142 if (enc->flags & F_INDENT) \
143 { \
144 int i_; \
145 need (enc, enc->indent); \
146 for (i_ = enc->indent * 3; i_--; )\
147 encode_ch (enc, ' '); \
148 } \
149 SE
150
151 #define SPACE SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, ' '); } SE
152 #define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE
153 #define COMMA SB \
154 encode_ch (enc, ','); \
155 if (enc->flags & F_INDENT) \
156 NL; \
157 else if (enc->flags & F_SPACE_AFTER) \
158 SPACE; \
159 SE
160
161 static void encode_sv (enc_t *enc, SV *sv);
162
163 static void
164 encode_av (enc_t *enc, AV *av)
165 {
166 int i, len = av_len (av);
167
168 encode_ch (enc, '['); NL;
169 ++enc->indent;
170
171 for (i = 0; i <= len; ++i)
172 {
173 INDENT;
174 encode_sv (enc, *av_fetch (av, i, 0));
175
176 if (i < len)
177 COMMA;
178 }
179
180 NL;
181
182 --enc->indent;
183 INDENT; encode_ch (enc, ']');
184 }
185
186 static void
187 encode_he (enc_t *enc, HE *he)
188 {
189 encode_ch (enc, '"');
190
191 if (HeKLEN (he) == HEf_SVKEY)
192 {
193 SV *sv = HeSVKEY (he);
194 STRLEN len;
195 char *str = SvPV (sv, len);
196
197 encode_str (enc, str, len, SvUTF8 (sv));
198 }
199 else
200 encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
201
202 encode_ch (enc, '"');
203
204 if (enc->flags & F_SPACE_BEFORE) SPACE;
205 encode_ch (enc, ':');
206 if (enc->flags & F_SPACE_AFTER ) SPACE;
207 encode_sv (enc, HeVAL (he));
208 }
209
210 // compare hash entries, used when all keys are bytestrings
211 static int
212 he_cmp_fast (const void *a_, const void *b_)
213 {
214 int cmp;
215
216 HE *a = *(HE **)a_;
217 HE *b = *(HE **)b_;
218
219 STRLEN la = HeKLEN (a);
220 STRLEN lb = HeKLEN (b);
221
222 if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
223 cmp = la < lb ? -1 : la == lb ? 0 : 1;
224
225 return cmp;
226 }
227
228 // compare hash entries, used when some keys are sv's or utf-x
229 static int
230 he_cmp_slow (const void *a, const void *b)
231 {
232 return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b));
233 }
234
235 static void
236 encode_hv (enc_t *enc, HV *hv)
237 {
238 int count, i;
239
240 encode_ch (enc, '{'); NL; ++enc->indent;
241
242 if ((count = hv_iterinit (hv)))
243 {
244 // for canonical output we have to sort by keys first
245 // actually, this is mostly due to the stupid so-called
246 // security workaround added somewhere in 5.8.x.
247 // that randomises hash orderings
248 if (enc->flags & F_CANONICAL)
249 {
250 HE *he, *hes [count];
251 int fast = 1;
252
253 i = 0;
254 while ((he = hv_iternext (hv)))
255 {
256 hes [i++] = he;
257 if (HeKLEN (he) < 0 || HeKUTF8 (he))
258 fast = 0;
259 }
260
261 assert (i == count);
262
263 if (fast)
264 qsort (hes, count, sizeof (HE *), he_cmp_fast);
265 else
266 {
267 // hack to disable "use bytes"
268 COP *oldcop = PL_curcop, cop;
269 cop.op_private = 0;
270 PL_curcop = &cop;
271
272 SAVETMPS;
273 qsort (hes, count, sizeof (HE *), he_cmp_slow);
274 FREETMPS;
275
276 PL_curcop = oldcop;
277 }
278
279 for (i = 0; i < count; ++i)
280 {
281 INDENT;
282 encode_he (enc, hes [i]);
283
284 if (i < count - 1)
285 COMMA;
286 }
287
288 NL;
289 }
290 else
291 {
292 SV *sv;
293 HE *he = hv_iternext (hv);
294
295 for (;;)
296 {
297 INDENT;
298 encode_he (enc, he);
299
300 if (!(he = hv_iternext (hv)))
301 break;
302
303 COMMA;
304 }
305
306 NL;
307 }
308 }
309
310 --enc->indent; INDENT; encode_ch (enc, '}');
311 }
312
313 static void
314 encode_sv (enc_t *enc, SV *sv)
315 {
316 if (SvPOKp (sv))
317 {
318 STRLEN len;
319 char *str = SvPV (sv, len);
320 encode_ch (enc, '"');
321 encode_str (enc, str, len, SvUTF8 (sv));
322 encode_ch (enc, '"');
323 }
324 else if (SvNOKp (sv))
325 {
326 need (enc, NV_DIG + 32);
327 Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
328 enc->cur += strlen (enc->cur);
329 }
330 else if (SvIOKp (sv))
331 {
332 need (enc, 64);
333 enc->cur +=
334 SvIsUV(sv)
335 ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
336 : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
337 }
338 else if (SvROK (sv))
339 {
340 if (!--enc->max_recurse)
341 croak ("data structure too deep (hit recursion limit)");
342
343 sv = SvRV (sv);
344
345 switch (SvTYPE (sv))
346 {
347 case SVt_PVAV: encode_av (enc, (AV *)sv); break;
348 case SVt_PVHV: encode_hv (enc, (HV *)sv); break;
349
350 default:
351 croak ("JSON can only represent references to arrays or hashes");
352 }
353 }
354 else if (!SvOK (sv))
355 encode_str (enc, "null", 4, 0);
356 else
357 croak ("encountered perl type that JSON cannot handle");
358 }
359
360 static SV *
361 encode_json (SV *scalar, UV flags)
362 {
363 enc_t enc;
364 enc.flags = flags;
365 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
366 enc.cur = SvPVX (enc.sv);
367 enc.end = SvEND (enc.sv);
368 enc.max_recurse = 0;
369 enc.indent = 0;
370
371 SvPOK_only (enc.sv);
372 encode_sv (&enc, scalar);
373
374 if (!(flags & (F_ASCII | F_UTF8)))
375 SvUTF8_on (enc.sv);
376
377 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
378 return enc.sv;
379 }
380
381 /////////////////////////////////////////////////////////////////////////////
382
383 #define WS \
384 for (;;) \
385 { \
386 char ch = *dec->cur; \
387 if (ch > 0x20 \
388 || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \
389 break; \
390 ++dec->cur; \
391 }
392
393 #define ERR(reason) SB dec->err = reason; goto fail; SE
394 #define EXPECT_CH(ch) SB \
395 if (*dec->cur != ch) \
396 ERR (# ch " expected"); \
397 ++dec->cur; \
398 SE
399
400 static SV *decode_sv (dec_t *dec);
401
402 #define APPEND_CH(ch) SB \
403 SvGROW (sv, cur + 1 + 1); \
404 SvPVX (sv)[cur++] = (ch); \
405 SE
406
407 static signed char decode_hexdigit[256];
408
409 static UV
410 decode_4hex (dec_t *dec)
411 {
412 signed char d1, d2, d3, d4;
413
414 d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];
415 if (d1 < 0) ERR ("four hexadecimal digits expected");
416 d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];
417 if (d2 < 0) ERR ("four hexadecimal digits expected");
418 d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
419 if (d3 < 0) ERR ("four hexadecimal digits expected");
420 d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
421 if (d4 < 0) ERR ("four hexadecimal digits expected");
422
423 dec->cur += 4;
424
425 return ((UV)d1) << 12
426 | ((UV)d2) << 8
427 | ((UV)d3) << 4
428 | ((UV)d4);
429
430 fail:
431 return (UV)-1;
432 }
433
434 static SV *
435 decode_str (dec_t *dec)
436 {
437 SV *sv = NEWSV (0,2);
438 STRLEN cur = 0;
439 int utf8 = 0;
440
441 for (;;)
442 {
443 unsigned char ch = *(unsigned char *)dec->cur;
444
445 if (ch == '"')
446 break;
447 else if (ch == '\\')
448 {
449 switch (*++dec->cur)
450 {
451 case '\\':
452 case '/':
453 case '"': APPEND_CH (*dec->cur++); break;
454
455 case 'b': APPEND_CH ('\010'); ++dec->cur; break;
456 case 't': APPEND_CH ('\011'); ++dec->cur; break;
457 case 'n': APPEND_CH ('\012'); ++dec->cur; break;
458 case 'f': APPEND_CH ('\014'); ++dec->cur; break;
459 case 'r': APPEND_CH ('\015'); ++dec->cur; break;
460
461 case 'u':
462 {
463 UV lo, hi;
464 ++dec->cur;
465
466 hi = decode_4hex (dec);
467 if (hi == (UV)-1)
468 goto fail;
469
470 // possibly a surrogate pair
471 if (hi >= 0xd800 && hi < 0xdc00)
472 {
473 if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
474 ERR ("illegal surrogate character");
475
476 dec->cur += 2;
477
478 lo = decode_4hex (dec);
479 if (lo == (UV)-1)
480 goto fail;
481
482 if (lo < 0xdc00 || lo >= 0xe000)
483 ERR ("surrogate pair expected");
484
485 hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
486 }
487 else if (lo >= 0xdc00 && lo < 0xe000)
488 ERR ("illegal surrogate character");
489
490 if (hi >= 0x80)
491 {
492 utf8 = 1;
493
494 SvGROW (sv, cur + 4 + 1); // at most 4 bytes for 21 bits
495 cur = (char *)uvuni_to_utf8_flags (SvPVX (sv) + cur, hi, 0) - SvPVX (sv);
496 }
497 else
498 APPEND_CH (hi);
499 }
500 break;
501 }
502 }
503 else if (ch >= 0x20 && ch <= 0x7f)
504 APPEND_CH (*dec->cur++);
505 else if (ch >= 0x80)
506 {
507 STRLEN clen;
508 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
509 if (clen < 0)
510 ERR ("malformed UTF-8 character in string, cannot convert to JSON");
511
512 SvGROW (sv, cur + clen + 1); // at most 4 bytes for 21 bits
513 memcpy (SvPVX (sv) + cur, dec->cur, clen);
514 dec->cur += clen;
515 }
516 else
517 ERR ("invalid character encountered");
518 }
519
520 ++dec->cur;
521
522 SvPOK_only (sv);
523
524 SvCUR_set (sv, cur);
525 *SvEND (sv) = 0;
526
527 if (utf8)
528 SvUTF8_on (sv);
529
530 return sv;
531
532 fail:
533 SvREFCNT_dec (sv);
534 return 0;
535 }
536
537 static SV *
538 decode_num (dec_t *dec)
539 {
540 int is_nv = 0;
541 char *start = dec->cur;
542
543 // [minus]
544 if (*dec->cur == '-')
545 ++dec->cur;
546
547 if (*dec->cur == '0')
548 {
549 ++dec->cur;
550 if (*dec->cur >= '0' && *dec->cur <= '9')
551 ERR ("malformed number (leading zero must not be followed by another digit)");
552 }
553
554 // int
555 while (*dec->cur >= '0' && *dec->cur <= '9')
556 ++dec->cur;
557
558 // [frac]
559 if (*dec->cur == '.')
560 {
561 is_nv = 1;
562
563 do
564 {
565 ++dec->cur;
566 }
567 while (*dec->cur >= '0' && *dec->cur <= '9');
568 }
569
570 // [exp]
571 if (*dec->cur == 'e' || *dec->cur == 'E')
572 {
573 is_nv = 1;
574
575 ++dec->cur;
576 if (*dec->cur == '-' || *dec->cur == '+')
577 ++dec->cur;
578
579 while (*dec->cur >= '0' && *dec->cur <= '9')
580 ++dec->cur;
581 }
582
583 if (!is_nv)
584 {
585 UV uv;
586 int numtype = grok_number (start, dec->cur - start, &uv);
587 if (numtype & IS_NUMBER_IN_UV)
588 if (numtype & IS_NUMBER_NEG)
589 {
590 if (uv < (UV)IV_MIN)
591 return newSViv (-(IV)uv);
592 }
593 else
594 return newSVuv (uv);
595 }
596
597 return newSVnv (Atof (start));
598
599 fail:
600 return 0;
601 }
602
603 static SV *
604 decode_av (dec_t *dec)
605 {
606 AV *av = newAV ();
607
608 for (;;)
609 {
610 SV *value;
611
612 value = decode_sv (dec);
613 if (!value)
614 goto fail;
615
616 av_push (av, value);
617
618 WS;
619
620 if (*dec->cur == ']')
621 {
622 ++dec->cur;
623 break;
624 }
625
626 if (*dec->cur != ',')
627 ERR (", or ] expected while parsing array");
628
629 ++dec->cur;
630 }
631
632 return newRV_noinc ((SV *)av);
633
634 fail:
635 SvREFCNT_dec (av);
636 return 0;
637 }
638
639 static SV *
640 decode_hv (dec_t *dec)
641 {
642 HV *hv = newHV ();
643
644 for (;;)
645 {
646 SV *key, *value;
647
648 WS; EXPECT_CH ('"');
649
650 key = decode_str (dec);
651 if (!key)
652 goto fail;
653
654 WS; EXPECT_CH (':');
655
656 value = decode_sv (dec);
657 if (!value)
658 {
659 SvREFCNT_dec (key);
660 goto fail;
661 }
662
663 //TODO: optimise
664 hv_store_ent (hv, key, value, 0);
665
666 WS;
667
668 if (*dec->cur == '}')
669 {
670 ++dec->cur;
671 break;
672 }
673
674 if (*dec->cur != ',')
675 ERR (", or } expected while parsing object/hash");
676
677 ++dec->cur;
678 }
679
680 return newRV_noinc ((SV *)hv);
681
682 fail:
683 SvREFCNT_dec (hv);
684 return 0;
685 }
686
687 static SV *
688 decode_sv (dec_t *dec)
689 {
690 WS;
691 switch (*dec->cur)
692 {
693 case '"': ++dec->cur; return decode_str (dec);
694 case '[': ++dec->cur; return decode_av (dec);
695 case '{': ++dec->cur; return decode_hv (dec);
696
697 case '-':
698 case '0': case '1': case '2': case '3': case '4':
699 case '5': case '6': case '7': case '8': case '9':
700 return decode_num (dec);
701
702 case 't':
703 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
704 {
705 dec->cur += 4;
706 return newSViv (1);
707 }
708 else
709 ERR ("'true' expected");
710
711 break;
712
713 case 'f':
714 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
715 {
716 dec->cur += 5;
717 return newSViv (0);
718 }
719 else
720 ERR ("'false' expected");
721
722 break;
723
724 case 'n':
725 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
726 {
727 dec->cur += 4;
728 return newSViv (1);
729 }
730 else
731 ERR ("'null' expected");
732
733 break;
734
735 default:
736 ERR ("malformed json string");
737 break;
738 }
739
740 fail:
741 return 0;
742 }
743
744 static SV *
745 decode_json (SV *string, UV flags)
746 {
747 SV *sv;
748
749 if (!(flags & F_UTF8))
750 sv_utf8_upgrade (string);
751
752 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
753
754 dec_t dec;
755 dec.flags = flags;
756 dec.cur = SvPVX (string);
757 dec.end = SvEND (string);
758 dec.err = 0;
759
760 *dec.end = 1; // invalid anywhere
761 sv = decode_sv (&dec);
762 *dec.end = 0;
763
764 if (!sv)
765 {
766 IV offset = utf8_distance (dec.cur, SvPVX (string));
767 SV *uni = sv_newmortal ();
768
769 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
770 croak ("%s, at character %d (%s)",
771 dec.err,
772 (int)offset,
773 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
774 }
775
776 sv_dump (sv);//D
777 return sv_2mortal (sv);
778 }
779
780 MODULE = JSON::XS PACKAGE = JSON::XS
781
782 BOOT:
783 {
784 int i;
785
786 memset (decode_hexdigit, 0xff, 256);
787 for (i = 10; i--; )
788 decode_hexdigit ['0' + i] = i;
789
790 for (i = 6; --i; )
791 {
792 decode_hexdigit ['a' + i] = 10 + i;
793 decode_hexdigit ['A' + i] = 10 + i;
794 }
795
796 json_stash = gv_stashpv ("JSON::XS", 1);
797 }
798
799 SV *new (char *dummy)
800 CODE:
801 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
802 OUTPUT:
803 RETVAL
804
805 SV *ascii (SV *self, int enable)
806 ALIAS:
807 ascii = F_ASCII
808 utf8 = F_UTF8
809 indent = F_INDENT
810 canonical = F_CANONICAL
811 space_before = F_SPACE_BEFORE
812 space_after = F_SPACE_AFTER
813 json_rpc = F_JSON_RPC
814 CODE:
815 {
816 UV *uv = SvJSON (self);
817 if (enable)
818 *uv |= ix;
819 else
820 *uv &= ~ix;
821
822 RETVAL = newSVsv (self);
823 }
824 OUTPUT:
825 RETVAL
826
827 void encode (SV *self, SV *scalar)
828 PPCODE:
829 XPUSHs (encode_json (scalar, *SvJSON (self)));
830
831 void decode (SV *self, SV *jsondata)
832 PPCODE:
833 XPUSHs (decode_json (jsondata, *SvJSON (self)));
834