ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.104 by root, Tue Jan 19 00:31:13 2010 UTC vs.
Revision 1.117 by root, Fri Oct 25 20:19:57 2013 UTC

42#define INIT_SIZE 32 // initial scalar size to be allocated 42#define INIT_SIZE 32 // initial scalar size to be allocated
43#define INDENT_STEP 3 // spaces per indentation level 43#define INDENT_STEP 3 // spaces per indentation level
44 44
45#define SHORT_STRING_LEN 16384 // special-case strings of up to this size 45#define SHORT_STRING_LEN 16384 // special-case strings of up to this size
46 46
47#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8)
48
47#define SB do { 49#define SB do {
48#define SE } while (0) 50#define SE } while (0)
49 51
50#if __GNUC__ >= 3 52#if __GNUC__ >= 3
51# define expect(expr,value) __builtin_expect ((expr), (value)) 53# define expect(expr,value) __builtin_expect ((expr), (value))
69# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) 71# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
70#else 72#else
71# define JSON_SLOW 0 73# define JSON_SLOW 0
72# define JSON_STASH json_stash 74# define JSON_STASH json_stash
73#endif 75#endif
76
77// the amount of HEs to allocate on the stack, when sorting keys
78#define STACK_HES 64
74 79
75static HV *json_stash, *json_boolean_stash; // JSON::XS:: 80static HV *json_stash, *json_boolean_stash; // JSON::XS::
76static SV *json_true, *json_false; 81static SV *json_true, *json_false;
77 82
78enum { 83enum {
192///////////////////////////////////////////////////////////////////////////// 197/////////////////////////////////////////////////////////////////////////////
193// fp hell 198// fp hell
194 199
195// scan a group of digits, and a trailing exponent 200// scan a group of digits, and a trailing exponent
196static void 201static void
197json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp) 202json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp, int maxdepth)
198{ 203{
199 UV uaccum = 0; 204 UV uaccum = 0;
200 int eaccum = 0; 205 int eaccum = 0;
206
207 // if we recurse too deep, skip all remaining digits
208 // to avoid a stack overflow attack
209 if (expect_false (--maxdepth <= 0))
210 while (((U8)*s - '0') < 10)
211 ++s;
201 212
202 for (;;) 213 for (;;)
203 { 214 {
204 U8 dig = (U8)*s - '0'; 215 U8 dig = (U8)*s - '0';
205 216
206 if (expect_false (dig >= 10)) 217 if (expect_false (dig >= 10))
207 { 218 {
208 if (dig == (U8)((U8)'.' - (U8)'0')) 219 if (dig == (U8)((U8)'.' - (U8)'0'))
209 { 220 {
210 ++s; 221 ++s;
211 json_atof_scan1 (s, accum, expo, 1); 222 json_atof_scan1 (s, accum, expo, 1, maxdepth);
212 } 223 }
213 else if ((dig | ' ') == 'e' - '0') 224 else if ((dig | ' ') == 'e' - '0')
214 { 225 {
215 int exp2 = 0; 226 int exp2 = 0;
216 int neg = 0; 227 int neg = 0;
242 // if we have too many digits, then recurse for more 253 // if we have too many digits, then recurse for more
243 // we actually do this for rather few digits 254 // we actually do this for rather few digits
244 if (uaccum >= (UV_MAX - 9) / 10) 255 if (uaccum >= (UV_MAX - 9) / 10)
245 { 256 {
246 if (postdp) *expo -= eaccum; 257 if (postdp) *expo -= eaccum;
247 json_atof_scan1 (s, accum, expo, postdp); 258 json_atof_scan1 (s, accum, expo, postdp, maxdepth);
248 if (postdp) *expo += eaccum; 259 if (postdp) *expo += eaccum;
249 260
250 break; 261 break;
251 } 262 }
252 } 263 }
253 264
265 // this relies greatly on the quality of the pow ()
266 // implementation of the platform, but a good
267 // implementation is hard to beat.
268 // (IEEE 754 conformant ones are required to be exact)
254 if (postdp) *expo -= eaccum; 269 if (postdp) *expo -= eaccum;
255 *accum += uaccum * pow (10., *expo); 270 *accum += uaccum * Perl_pow (10., *expo);
256 *expo += eaccum; 271 *expo += eaccum;
257} 272}
258 273
259static NV 274static NV
260json_atof (const char *s) 275json_atof (const char *s)
267 { 282 {
268 ++s; 283 ++s;
269 neg = 1; 284 neg = 1;
270 } 285 }
271 286
287 // a recursion depth of ten gives us >>500 bits
272 json_atof_scan1 (s, &accum, &expo, 0); 288 json_atof_scan1 (s, &accum, &expo, 0, 10);
273 289
274 return neg ? -accum : accum; 290 return neg ? -accum : accum;
275} 291}
276///////////////////////////////////////////////////////////////////////////// 292/////////////////////////////////////////////////////////////////////////////
277// encoder 293// encoder
467 483
468 if (enc->indent >= enc->json.max_depth) 484 if (enc->indent >= enc->json.max_depth)
469 croak (ERR_NESTING_EXCEEDED); 485 croak (ERR_NESTING_EXCEEDED);
470 486
471 encode_ch (enc, '['); 487 encode_ch (enc, '[');
472 488
473 if (len >= 0) 489 if (len >= 0)
474 { 490 {
475 encode_nl (enc); ++enc->indent; 491 encode_nl (enc); ++enc->indent;
476 492
477 for (i = 0; i <= len; ++i) 493 for (i = 0; i <= len; ++i)
489 encode_comma (enc); 505 encode_comma (enc);
490 } 506 }
491 507
492 encode_nl (enc); --enc->indent; encode_indent (enc); 508 encode_nl (enc); --enc->indent; encode_indent (enc);
493 } 509 }
494 510
495 encode_ch (enc, ']'); 511 encode_ch (enc, ']');
496} 512}
497 513
498static void 514static void
499encode_hk (enc_t *enc, HE *he) 515encode_hk (enc_t *enc, HE *he)
503 if (HeKLEN (he) == HEf_SVKEY) 519 if (HeKLEN (he) == HEf_SVKEY)
504 { 520 {
505 SV *sv = HeSVKEY (he); 521 SV *sv = HeSVKEY (he);
506 STRLEN len; 522 STRLEN len;
507 char *str; 523 char *str;
508 524
509 SvGETMAGIC (sv); 525 SvGETMAGIC (sv);
510 str = SvPV (sv, len); 526 str = SvPV (sv, len);
511 527
512 encode_str (enc, str, len, SvUTF8 (sv)); 528 encode_str (enc, str, len, SvUTF8 (sv));
513 } 529 }
579 } 595 }
580 596
581 if (count) 597 if (count)
582 { 598 {
583 int i, fast = 1; 599 int i, fast = 1;
584#if defined(__BORLANDC__) || defined(_MSC_VER) 600 HE *hes_stack [STACK_HES];
585 HE **hes = _alloca (count * sizeof (HE)); 601 HE **hes = hes_stack;
586#else 602
587 HE *hes [count]; // if your compiler dies here, you need to enable C99 mode 603 // allocate larger arrays on the heap
588#endif 604 if (count > STACK_HES)
605 {
606 SV *sv = sv_2mortal (NEWSV (0, count * sizeof (*hes)));
607 hes = (HE **)SvPVX (sv);
608 }
589 609
590 i = 0; 610 i = 0;
591 while ((he = hv_iternext (hv))) 611 while ((he = hv_iternext (hv)))
592 { 612 {
593 hes [i++] = he; 613 hes [i++] = he;
812 } 832 }
813 else 833 else
814 { 834 {
815 // large integer, use the (rather slow) snprintf way. 835 // large integer, use the (rather slow) snprintf way.
816 need (enc, IVUV_MAXCHARS); 836 need (enc, IVUV_MAXCHARS);
817 enc->cur += 837 enc->cur +=
818 SvIsUV(sv) 838 SvIsUV(sv)
819 ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) 839 ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv))
820 : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); 840 : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv));
821 } 841 }
822 } 842 }
823 else if (SvROK (sv)) 843 else if (SvROK (sv))
824 encode_rv (enc, SvRV (sv)); 844 encode_rv (enc, SvRV (sv));
825 else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) 845 else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN)
826 encode_str (enc, "null", 4, 0); 846 encode_str (enc, "null", 4, 0);
827 else 847 else
828 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", 848 croak ("encountered perl type (%s,0x%x) that JSON cannot handle, check your input data",
829 SvPV_nolen (sv), SvFLAGS (sv)); 849 SvPV_nolen (sv), (unsigned int)SvFLAGS (sv));
830} 850}
831 851
832static SV * 852static SV *
833encode_json (SV *scalar, JSON *json) 853encode_json (SV *scalar, JSON *json)
834{ 854{
1246 if (*dec->cur == ']') 1266 if (*dec->cur == ']')
1247 { 1267 {
1248 ++dec->cur; 1268 ++dec->cur;
1249 break; 1269 break;
1250 } 1270 }
1251 1271
1252 if (*dec->cur != ',') 1272 if (*dec->cur != ',')
1253 ERR (", or ] expected while parsing array"); 1273 ERR (", or ] expected while parsing array");
1254 1274
1255 ++dec->cur; 1275 ++dec->cur;
1256 1276
1391 dSP; 1411 dSP;
1392 int count; 1412 int count;
1393 1413
1394 ENTER; SAVETMPS; PUSHMARK (SP); 1414 ENTER; SAVETMPS; PUSHMARK (SP);
1395 XPUSHs (HeVAL (he)); 1415 XPUSHs (HeVAL (he));
1416 sv_2mortal (sv);
1396 1417
1397 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; 1418 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
1398 1419
1399 if (count == 1) 1420 if (count == 1)
1400 { 1421 {
1401 sv = newSVsv (POPs); 1422 sv = newSVsv (POPs);
1402 FREETMPS; LEAVE; 1423 FREETMPS; LEAVE;
1403 return sv; 1424 return sv;
1404 } 1425 }
1405 1426
1427 SvREFCNT_inc (sv);
1406 FREETMPS; LEAVE; 1428 FREETMPS; LEAVE;
1407 } 1429 }
1408 } 1430 }
1409 1431
1410 if (dec->json.cb_object) 1432 if (dec->json.cb_object)
1442{ 1464{
1443 // the beauty of JSON: you need exactly one character lookahead 1465 // the beauty of JSON: you need exactly one character lookahead
1444 // to parse everything. 1466 // to parse everything.
1445 switch (*dec->cur) 1467 switch (*dec->cur)
1446 { 1468 {
1447 case '"': ++dec->cur; return decode_str (dec); 1469 case '"': ++dec->cur; return decode_str (dec);
1448 case '[': ++dec->cur; return decode_av (dec); 1470 case '[': ++dec->cur; return decode_av (dec);
1449 case '{': ++dec->cur; return decode_hv (dec); 1471 case '{': ++dec->cur; return decode_hv (dec);
1450 1472
1451 case '-': 1473 case '-':
1452 case '0': case '1': case '2': case '3': case '4': 1474 case '0': case '1': case '2': case '3': case '4':
1453 case '5': case '6': case '7': case '8': case '9': 1475 case '5': case '6': case '7': case '8': case '9':
1506{ 1528{
1507 dec_t dec; 1529 dec_t dec;
1508 SV *sv; 1530 SV *sv;
1509 1531
1510 /* work around bugs in 5.10 where manipulating magic values 1532 /* work around bugs in 5.10 where manipulating magic values
1511 * will perl ignore the magic in subsequent accesses 1533 * makes perl ignore the magic in subsequent accesses.
1534 * also make a copy of non-PV values, to get them into a clean
1535 * state (SvPV should do that, but it's buggy, see below).
1512 */ 1536 */
1513 /*SvGETMAGIC (string);*/ 1537 /*SvGETMAGIC (string);*/
1514 if (SvMAGICAL (string)) 1538 if (SvMAGICAL (string) || !SvPOK (string))
1515 string = sv_2mortal (newSVsv (string)); 1539 string = sv_2mortal (newSVsv (string));
1516 1540
1517 SvUPGRADE (string, SVt_PV); 1541 SvUPGRADE (string, SVt_PV);
1518 1542
1519 /* work around a bug in perl 5.10, which causes SvCUR to fail an 1543 /* work around a bug in perl 5.10, which causes SvCUR to fail an
1536 if (offset > json->max_size && json->max_size) 1560 if (offset > json->max_size && json->max_size)
1537 croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", 1561 croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
1538 (unsigned long)SvCUR (string), (unsigned long)json->max_size); 1562 (unsigned long)SvCUR (string), (unsigned long)json->max_size);
1539 } 1563 }
1540 1564
1541 if (json->flags & F_UTF8) 1565 if (DECODE_WANTS_OCTETS (json))
1542 sv_utf8_downgrade (string, 0); 1566 sv_utf8_downgrade (string, 0);
1543 else 1567 else
1544 sv_utf8_upgrade (string); 1568 sv_utf8_upgrade (string);
1545 1569
1546 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP 1570 SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
1588 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); 1612 pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
1589 LEAVE; 1613 LEAVE;
1590 1614
1591 croak ("%s, at character offset %d (before \"%s\")", 1615 croak ("%s, at character offset %d (before \"%s\")",
1592 dec.err, 1616 dec.err,
1593 ptr_to_index (string, dec.cur), 1617 (int)ptr_to_index (string, dec.cur),
1594 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); 1618 dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
1595 } 1619 }
1596 1620
1597 sv = sv_2mortal (sv); 1621 sv = sv_2mortal (sv);
1598 1622
1788 json_boolean_stash = 0; 1812 json_boolean_stash = 0;
1789 1813
1790void new (char *klass) 1814void new (char *klass)
1791 PPCODE: 1815 PPCODE:
1792{ 1816{
1793 SV *pv = NEWSV (0, sizeof (JSON)); 1817 SV *pv = NEWSV (0, sizeof (JSON));
1794 SvPOK_only (pv); 1818 SvPOK_only (pv);
1795 json_init ((JSON *)SvPVX (pv)); 1819 json_init ((JSON *)SvPVX (pv));
1796 XPUSHs (sv_2mortal (sv_bless ( 1820 XPUSHs (sv_2mortal (sv_bless (
1797 newRV_noinc (pv), 1821 newRV_noinc (pv),
1798 strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) 1822 strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1)
1875} 1899}
1876 1900
1877void filter_json_single_key_object (JSON *self, SV *key, SV *cb = &PL_sv_undef) 1901void filter_json_single_key_object (JSON *self, SV *key, SV *cb = &PL_sv_undef)
1878 PPCODE: 1902 PPCODE:
1879{ 1903{
1880 if (!self->cb_sk_object) 1904 if (!self->cb_sk_object)
1881 self->cb_sk_object = newHV (); 1905 self->cb_sk_object = newHV ();
1882 1906
1883 if (SvOK (cb)) 1907 if (SvOK (cb))
1884 hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0); 1908 hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0);
1885 else 1909 else
1896 XPUSHs (ST (0)); 1920 XPUSHs (ST (0));
1897} 1921}
1898 1922
1899void encode (JSON *self, SV *scalar) 1923void encode (JSON *self, SV *scalar)
1900 PPCODE: 1924 PPCODE:
1901 XPUSHs (encode_json (scalar, self)); 1925 PUTBACK; scalar = encode_json (scalar, self); SPAGAIN;
1926 XPUSHs (scalar);
1902 1927
1903void decode (JSON *self, SV *jsonstr) 1928void decode (JSON *self, SV *jsonstr)
1904 PPCODE: 1929 PPCODE:
1905 XPUSHs (decode_json (jsonstr, self, 0)); 1930 PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN;
1931 XPUSHs (jsonstr);
1906 1932
1907void decode_prefix (JSON *self, SV *jsonstr) 1933void decode_prefix (JSON *self, SV *jsonstr)
1908 PPCODE: 1934 PPCODE:
1909{ 1935{
1936 SV *sv;
1910 char *offset; 1937 char *offset;
1938 PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN;
1911 EXTEND (SP, 2); 1939 EXTEND (SP, 2);
1912 PUSHs (decode_json (jsonstr, self, &offset)); 1940 PUSHs (sv);
1913 PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset)))); 1941 PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, offset))));
1914} 1942}
1915 1943
1916void incr_parse (JSON *self, SV *jsonstr = 0) 1944void incr_parse (JSON *self, SV *jsonstr = 0)
1917 PPCODE: 1945 PPCODE:
1918{ 1946{
1919 if (!self->incr_text) 1947 if (!self->incr_text)
1920 self->incr_text = newSVpvn ("", 0); 1948 self->incr_text = newSVpvn ("", 0);
1949
1950 /* if utf8-ness doesn't match the decoder, need to upgrade/downgrade */
1951 if (!DECODE_WANTS_OCTETS (self) == !SvUTF8 (self->incr_text))
1952 if (DECODE_WANTS_OCTETS (self))
1953 {
1954 if (self->incr_pos)
1955 self->incr_pos = utf8_length ((U8 *)SvPVX (self->incr_text),
1956 (U8 *)SvPVX (self->incr_text) + self->incr_pos);
1957
1958 sv_utf8_downgrade (self->incr_text, 0);
1959 }
1960 else
1961 {
1962 sv_utf8_upgrade (self->incr_text);
1963
1964 if (self->incr_pos)
1965 self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
1966 - (U8 *)SvPVX (self->incr_text);
1967 }
1921 1968
1922 // append data, if any 1969 // append data, if any
1923 if (jsonstr) 1970 if (jsonstr)
1924 { 1971 {
1972 /* make sure both strings have same encoding */
1973 if (SvUTF8 (jsonstr) != SvUTF8 (self->incr_text))
1925 if (SvUTF8 (jsonstr)) 1974 if (SvUTF8 (jsonstr))
1975 sv_utf8_downgrade (jsonstr, 0);
1926 { 1976 else
1927 if (!SvUTF8 (self->incr_text))
1928 {
1929 /* utf-8-ness differs, need to upgrade */
1930 sv_utf8_upgrade (self->incr_text);
1931
1932 if (self->incr_pos)
1933 self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos)
1934 - (U8 *)SvPVX (self->incr_text);
1935 }
1936 }
1937 else if (SvUTF8 (self->incr_text))
1938 sv_utf8_upgrade (jsonstr); 1977 sv_utf8_upgrade (jsonstr);
1939 1978
1979 /* and then just blindly append */
1940 { 1980 {
1941 STRLEN len; 1981 STRLEN len;
1942 const char *str = SvPV (jsonstr, len); 1982 const char *str = SvPV (jsonstr, len);
1943 STRLEN cur = SvCUR (self->incr_text); 1983 STRLEN cur = SvCUR (self->incr_text);
1944 1984
1952 } 1992 }
1953 1993
1954 if (GIMME_V != G_VOID) 1994 if (GIMME_V != G_VOID)
1955 do 1995 do
1956 { 1996 {
1997 SV *sv;
1957 char *offset; 1998 char *offset;
1958 1999
1959 if (!INCR_DONE (self)) 2000 if (!INCR_DONE (self))
1960 { 2001 {
1961 incr_parse (self); 2002 incr_parse (self);
1963 if (self->incr_pos > self->max_size && self->max_size) 2004 if (self->incr_pos > self->max_size && self->max_size)
1964 croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", 2005 croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu",
1965 (unsigned long)self->incr_pos, (unsigned long)self->max_size); 2006 (unsigned long)self->incr_pos, (unsigned long)self->max_size);
1966 2007
1967 if (!INCR_DONE (self)) 2008 if (!INCR_DONE (self))
2009 {
2010 // as an optimisation, do not accumulate white space in the incr buffer
2011 if (self->incr_mode == INCR_M_WS && self->incr_pos)
2012 {
2013 self->incr_pos = 0;
2014 SvCUR_set (self->incr_text, 0);
2015 }
2016
1968 break; 2017 break;
2018 }
1969 } 2019 }
1970 2020
1971 XPUSHs (decode_json (self->incr_text, self, &offset)); 2021 PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN;
2022 XPUSHs (sv);
1972 2023
1973 self->incr_pos -= offset - SvPVX (self->incr_text); 2024 self->incr_pos -= offset - SvPVX (self->incr_text);
1974 self->incr_nest = 0; 2025 self->incr_nest = 0;
1975 self->incr_mode = 0; 2026 self->incr_mode = 0;
1976 2027
2028 PPCODE: 2079 PPCODE:
2029{ 2080{
2030 JSON json; 2081 JSON json;
2031 json_init (&json); 2082 json_init (&json);
2032 json.flags |= ix; 2083 json.flags |= ix;
2033 XPUSHs (encode_json (scalar, &json)); 2084 PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN;
2085 XPUSHs (scalar);
2034} 2086}
2035 2087
2036void decode_json (SV *jsonstr) 2088void decode_json (SV *jsonstr)
2037 ALIAS: 2089 ALIAS:
2038 from_json_ = 0 2090 from_json_ = 0
2040 PPCODE: 2092 PPCODE:
2041{ 2093{
2042 JSON json; 2094 JSON json;
2043 json_init (&json); 2095 json_init (&json);
2044 json.flags |= ix; 2096 json.flags |= ix;
2045 XPUSHs (decode_json (jsonstr, &json, 0)); 2097 PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN;
2098 XPUSHs (jsonstr);
2046} 2099}
2047 2100

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines