ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.5 by root, Thu Mar 22 23:24:18 2007 UTC vs.
Revision 1.10 by root, Sat Mar 24 01:15:22 2007 UTC

10#define F_UTF8 0x00000002 10#define F_UTF8 0x00000002
11#define F_INDENT 0x00000004 11#define F_INDENT 0x00000004
12#define F_CANONICAL 0x00000008 12#define F_CANONICAL 0x00000008
13#define F_SPACE_BEFORE 0x00000010 13#define F_SPACE_BEFORE 0x00000010
14#define F_SPACE_AFTER 0x00000020 14#define F_SPACE_AFTER 0x00000020
15#define F_JSON_RPC 0x00000040
16#define F_ALLOW_NONREF 0x00000080 15#define F_ALLOW_NONREF 0x00000080
16#define F_SHRINK 0x00000100
17 17
18#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER 18#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
19#define F_DEFAULT 0 19#define F_DEFAULT 0
20 20
21#define INIT_SIZE 32 // initial scalar size to be allocated 21#define INIT_SIZE 32 // initial scalar size to be allocated
53 croak ("object is not of type JSON::XS"); 53 croak ("object is not of type JSON::XS");
54 54
55 return &SvUVX (SvRV (sv)); 55 return &SvUVX (SvRV (sv));
56} 56}
57 57
58static void
59shrink (SV *sv)
60{
61 sv_utf8_downgrade (sv, 1);
62#ifdef SvPV_shrink_to_cur
63 SvPV_shrink_to_cur (sv);
64#endif
65}
66
58///////////////////////////////////////////////////////////////////////////// 67/////////////////////////////////////////////////////////////////////////////
59 68
60static void 69static void
61need (enc_t *enc, STRLEN len) 70need (enc_t *enc, STRLEN len)
62{ 71{
85 94
86 while (str < end) 95 while (str < end)
87 { 96 {
88 unsigned char ch = *(unsigned char *)str; 97 unsigned char ch = *(unsigned char *)str;
89 98
90 if (ch == '"') 99 if (ch >= 0x20 && ch < 0x80) // most common case
91 { 100 {
101 if (ch == '"') // but with slow exceptions
102 {
92 need (enc, len += 1); 103 need (enc, len += 1);
93 *enc->cur++ = '\\'; 104 *enc->cur++ = '\\';
94 *enc->cur++ = '"'; 105 *enc->cur++ = '"';
95 ++str;
96 } 106 }
97 else if (ch == '\\') 107 else if (ch == '\\')
98 { 108 {
99 need (enc, len += 1); 109 need (enc, len += 1);
100 *enc->cur++ = '\\'; 110 *enc->cur++ = '\\';
101 *enc->cur++ = '\\'; 111 *enc->cur++ = '\\';
102 ++str;
103 } 112 }
104 else if (ch >= 0x20 && ch < 0x80) // most common case 113 else
105 {
106 *enc->cur++ = ch; 114 *enc->cur++ = ch;
107 ++str; 115
108 }
109 else if (ch == '\015')
110 {
111 need (enc, len += 1);
112 *enc->cur++ = '\\';
113 *enc->cur++ = 'r';
114 ++str;
115 }
116 else if (ch == '\012')
117 {
118 need (enc, len += 1);
119 *enc->cur++ = '\\';
120 *enc->cur++ = 'n';
121 ++str; 116 ++str;
122 } 117 }
123 else 118 else
124 { 119 {
125 STRLEN clen; 120 switch (ch)
126 UV uch;
127
128 if (is_utf8)
129 { 121 {
130 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY); 122 case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
131 if (clen == (STRLEN)-1) 123 case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
132 croak ("malformed UTF-8 character in string, cannot convert to JSON"); 124 case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
133 } 125 case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
134 else 126 case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;
135 {
136 uch = ch;
137 clen = 1;
138 }
139 127
140 if (uch < 0x80 || enc->flags & F_ASCII) 128 default:
141 {
142 if (uch > 0xFFFFUL)
143 { 129 {
130 STRLEN clen;
131 UV uch;
132
133 if (is_utf8)
134 {
135 uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
136 if (clen == (STRLEN)-1)
137 croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
138 }
139 else
140 {
141 uch = ch;
142 clen = 1;
143 }
144
145 if (uch > 0x10FFFFUL)
146 croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
147
148 if (uch < 0x80 || enc->flags & F_ASCII)
149 {
150 if (uch > 0xFFFFUL)
151 {
144 need (enc, len += 11); 152 need (enc, len += 11);
145 sprintf (enc->cur, "\\u%04x\\u%04x", 153 sprintf (enc->cur, "\\u%04x\\u%04x",
146 (uch - 0x10000) / 0x400 + 0xD800, 154 (int)((uch - 0x10000) / 0x400 + 0xD800),
147 (uch - 0x10000) % 0x400 + 0xDC00); 155 (int)((uch - 0x10000) % 0x400 + 0xDC00));
148 enc->cur += 12; 156 enc->cur += 12;
157 }
158 else
159 {
160 static char hexdigit [16] = "0123456789abcdef";
161 need (enc, len += 5);
162 *enc->cur++ = '\\';
163 *enc->cur++ = 'u';
164 *enc->cur++ = hexdigit [ uch >> 12 ];
165 *enc->cur++ = hexdigit [(uch >> 8) & 15];
166 *enc->cur++ = hexdigit [(uch >> 4) & 15];
167 *enc->cur++ = hexdigit [(uch >> 0) & 15];
168 }
169
170 str += clen;
171 }
172 else if (is_utf8)
173 {
174 need (enc, len += clen);
175 do
176 {
177 *enc->cur++ = *str++;
178 }
179 while (--clen);
180 }
181 else
182 {
183 need (enc, len += 10); // never more than 11 bytes needed
184 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
185 ++str;
186 }
149 } 187 }
150 else
151 {
152 static char hexdigit [16] = "0123456789abcdef";
153 need (enc, len += 5);
154 *enc->cur++ = '\\';
155 *enc->cur++ = 'u';
156 *enc->cur++ = hexdigit [ uch >> 12 ];
157 *enc->cur++ = hexdigit [(uch >> 8) & 15];
158 *enc->cur++ = hexdigit [(uch >> 4) & 15];
159 *enc->cur++ = hexdigit [(uch >> 0) & 15];
160 }
161
162 str += clen;
163 }
164 else if (is_utf8)
165 {
166 need (enc, len += clen);
167 do
168 {
169 *enc->cur++ = *str++;
170 }
171 while (--clen);
172 }
173 else
174 {
175 need (enc, 10); // never more than 11 bytes needed
176 enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
177 ++str;
178 } 188 }
179 } 189 }
180 190
181 --len; 191 --len;
182 } 192 }
309 319
310 if (fast) 320 if (fast)
311 qsort (hes, count, sizeof (HE *), he_cmp_fast); 321 qsort (hes, count, sizeof (HE *), he_cmp_fast);
312 else 322 else
313 { 323 {
314 // hack to disable "use bytes" 324 // hack to forcefully disable "use bytes"
315 COP *oldcop = PL_curcop, cop; 325 COP cop = *PL_curcop;
316 cop.op_private = 0; 326 cop.op_private = 0;
327
328 ENTER;
329 SAVETMPS;
330
331 SAVEVPTR (PL_curcop);
317 PL_curcop = &cop; 332 PL_curcop = &cop;
318 333
319 SAVETMPS;
320 qsort (hes, count, sizeof (HE *), he_cmp_slow); 334 qsort (hes, count, sizeof (HE *), he_cmp_slow);
335
321 FREETMPS; 336 FREETMPS;
322 337 LEAVE;
323 PL_curcop = oldcop;
324 } 338 }
325 339
326 for (i = 0; i < count; ++i) 340 for (i = 0; i < count; ++i)
327 { 341 {
328 INDENT; 342 INDENT;
384 ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv)) 398 ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
385 : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv)); 399 : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
386 } 400 }
387 else if (SvROK (sv)) 401 else if (SvROK (sv))
388 { 402 {
403 SV *rv = SvRV (sv);
404
389 if (!--enc->max_recurse) 405 if (!--enc->max_recurse)
390 croak ("data structure too deep (hit recursion limit)"); 406 croak ("data structure too deep (hit recursion limit)");
391 407
392 sv = SvRV (sv);
393
394 switch (SvTYPE (sv)) 408 switch (SvTYPE (rv))
395 { 409 {
396 case SVt_PVAV: encode_av (enc, (AV *)sv); break; 410 case SVt_PVAV: encode_av (enc, (AV *)rv); break;
397 case SVt_PVHV: encode_hv (enc, (HV *)sv); break; 411 case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
398 412
399 default: 413 default:
400 croak ("JSON can only represent references to arrays or hashes"); 414 croak ("encountered %s, but JSON can only represent references to arrays or hashes",
415 SvPV_nolen (sv));
401 } 416 }
402 } 417 }
403 else if (!SvOK (sv)) 418 else if (!SvOK (sv))
404 encode_str (enc, "null", 4, 0); 419 encode_str (enc, "null", 4, 0);
405 else 420 else
406 croak ("encountered perl type that JSON cannot handle"); 421 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
422 SvPV_nolen (sv), SvFLAGS (sv));
407} 423}
408 424
409static SV * 425static SV *
410encode_json (SV *scalar, UV flags) 426encode_json (SV *scalar, UV flags)
411{ 427{
412 if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar)) 428 if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
413 croak ("hash- or arraref required (not a simple scalar, use allow_nonref to allow this)"); 429 croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
414 430
415 enc_t enc; 431 enc_t enc;
416 enc.flags = flags; 432 enc.flags = flags;
417 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 433 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
418 enc.cur = SvPVX (enc.sv); 434 enc.cur = SvPVX (enc.sv);
425 441
426 if (!(flags & (F_ASCII | F_UTF8))) 442 if (!(flags & (F_ASCII | F_UTF8)))
427 SvUTF8_on (enc.sv); 443 SvUTF8_on (enc.sv);
428 444
429 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 445 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
446
447 if (enc.flags & F_SHRINK)
448 shrink (enc.sv);
449
430 return enc.sv; 450 return enc.sv;
431} 451}
432 452
433///////////////////////////////////////////////////////////////////////////// 453/////////////////////////////////////////////////////////////////////////////
434 454
572 else if (ch >= 0x80) 592 else if (ch >= 0x80)
573 { 593 {
574 STRLEN clen; 594 STRLEN clen;
575 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY); 595 UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
576 if (clen == (STRLEN)-1) 596 if (clen == (STRLEN)-1)
577 ERR ("malformed UTF-8 character in string, cannot convert to JSON"); 597 ERR ("malformed UTF-8 character in JSON string");
578 598
579 APPEND_GROW (clen); 599 APPEND_GROW (clen);
580 do 600 do
581 { 601 {
582 *cur++ = *dec->cur++; 602 *cur++ = *dec->cur++;
598 SvPOK_only (sv); 618 SvPOK_only (sv);
599 *SvEND (sv) = 0; 619 *SvEND (sv) = 0;
600 620
601 if (utf8) 621 if (utf8)
602 SvUTF8_on (sv); 622 SvUTF8_on (sv);
623
624 if (dec->flags & F_SHRINK)
625 shrink (sv);
603 626
604 return sv; 627 return sv;
605 628
606fail: 629fail:
607 SvREFCNT_dec (sv); 630 SvREFCNT_dec (sv);
829 ERR ("'null' expected"); 852 ERR ("'null' expected");
830 853
831 break; 854 break;
832 855
833 default: 856 default:
834 ERR ("malformed json string"); 857 ERR ("malformed json string, neither array, object, number, string or atom");
835 break; 858 break;
836 } 859 }
837 860
838fail: 861fail:
839 return 0; 862 return 0;
859 882
860 sv = decode_sv (&dec); 883 sv = decode_sv (&dec);
861 884
862 if (!sv) 885 if (!sv)
863 { 886 {
887 IV offset = dec.flags & F_UTF8
888 ? dec.cur - SvPVX (string)
864 IV offset = utf8_distance (dec.cur, SvPVX (string)); 889 : utf8_distance (dec.cur, SvPVX (string));
865 SV *uni = sv_newmortal (); 890 SV *uni = sv_newmortal ();
891
866 // horrible hack to silence warning inside pv_uni_display 892 // horrible hack to silence warning inside pv_uni_display
867 COP cop; 893 COP cop = *PL_curcop;
868 memset (&cop, 0, sizeof (cop));
869 cop.cop_warnings = pWARN_NONE; 894 cop.cop_warnings = pWARN_NONE;
895 ENTER;
870 SAVEVPTR (PL_curcop); 896 SAVEVPTR (PL_curcop);
871 PL_curcop = &cop; 897 PL_curcop = &cop;
872
873 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 898 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
899 LEAVE;
900
874 croak ("%s, at character offset %d (%s)", 901 croak ("%s, at character offset %d (%s)",
875 dec.err, 902 dec.err,
876 (int)offset, 903 (int)offset,
877 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 904 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
878 } 905 }
879 906
880 sv = sv_2mortal (sv); 907 sv = sv_2mortal (sv);
881 908
882 if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv)) 909 if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
883 croak ("JSON object or array expected (but number, string, true, false or null found, use allow_nonref to allow this)"); 910 croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
884 911
885 return sv; 912 return sv;
886} 913}
887 914
888MODULE = JSON::XS PACKAGE = JSON::XS 915MODULE = JSON::XS PACKAGE = JSON::XS
910 CODE: 937 CODE:
911 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash); 938 RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
912 OUTPUT: 939 OUTPUT:
913 RETVAL 940 RETVAL
914 941
915SV *ascii (SV *self, int enable) 942SV *ascii (SV *self, int enable = 1)
916 ALIAS: 943 ALIAS:
917 ascii = F_ASCII 944 ascii = F_ASCII
918 utf8 = F_UTF8 945 utf8 = F_UTF8
919 indent = F_INDENT 946 indent = F_INDENT
920 canonical = F_CANONICAL 947 canonical = F_CANONICAL
921 space_before = F_SPACE_BEFORE 948 space_before = F_SPACE_BEFORE
922 space_after = F_SPACE_AFTER 949 space_after = F_SPACE_AFTER
923 json_rpc = F_JSON_RPC
924 pretty = F_PRETTY 950 pretty = F_PRETTY
925 allow_nonref = F_ALLOW_NONREF 951 allow_nonref = F_ALLOW_NONREF
952 shrink = F_SHRINK
926 CODE: 953 CODE:
927{ 954{
928 UV *uv = SvJSON (self); 955 UV *uv = SvJSON (self);
929 if (enable) 956 if (enable)
930 *uv |= ix; 957 *uv |= ix;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines