ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/JSON-XS/XS.xs
(Generate patch)

Comparing JSON-XS/XS.xs (file contents):
Revision 1.55 by root, Mon Jul 23 22:57:40 2007 UTC vs.
Revision 1.63 by root, Mon Aug 27 01:49:01 2007 UTC

1#include "EXTERN.h" 1#include "EXTERN.h"
2#include "perl.h" 2#include "perl.h"
3#include "XSUB.h" 3#include "XSUB.h"
4 4
5#include "assert.h" 5#include <assert.h>
6#include "string.h" 6#include <string.h>
7#include "stdlib.h" 7#include <stdlib.h>
8#include "stdio.h" 8#include <stdio.h>
9#include <float.h>
9 10
10#if defined(__BORLANDC__) || defined(_MSC_VER) 11#if defined(__BORLANDC__) || defined(_MSC_VER)
11# define snprintf _snprintf // C compilers have this in stdio.h 12# define snprintf _snprintf // C compilers have this in stdio.h
12#endif 13#endif
13 14
26#define F_SPACE_AFTER 0x00000040UL 27#define F_SPACE_AFTER 0x00000040UL
27#define F_ALLOW_NONREF 0x00000100UL 28#define F_ALLOW_NONREF 0x00000100UL
28#define F_SHRINK 0x00000200UL 29#define F_SHRINK 0x00000200UL
29#define F_ALLOW_BLESSED 0x00000400UL 30#define F_ALLOW_BLESSED 0x00000400UL
30#define F_CONV_BLESSED 0x00000800UL 31#define F_CONV_BLESSED 0x00000800UL
32#define F_RELAXED 0x00001000UL
33
31#define F_MAXDEPTH 0xf8000000UL 34#define F_MAXDEPTH 0xf8000000UL
32#define S_MAXDEPTH 27 35#define S_MAXDEPTH 27
33#define F_MAXSIZE 0x01f00000UL 36#define F_MAXSIZE 0x01f00000UL
34#define S_MAXSIZE 20 37#define S_MAXSIZE 20
35#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing 38#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing
56# define inline static 59# define inline static
57#endif 60#endif
58 61
59#define expect_false(expr) expect ((expr) != 0, 0) 62#define expect_false(expr) expect ((expr) != 0, 0)
60#define expect_true(expr) expect ((expr) != 0, 1) 63#define expect_true(expr) expect ((expr) != 0, 1)
64
65#ifdef USE_ITHREADS
66# define JSON_SLOW 1
67# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1))
68#else
69# define JSON_SLOW 0
70# define JSON_STASH json_stash
71#endif
61 72
62static HV *json_stash, *json_boolean_stash; // JSON::XS:: 73static HV *json_stash, *json_boolean_stash; // JSON::XS::
63static SV *json_true, *json_false; 74static SV *json_true, *json_false;
64 75
65typedef struct { 76typedef struct {
326 --enc->indent; 337 --enc->indent;
327 encode_indent (enc); encode_ch (enc, ']'); 338 encode_indent (enc); encode_ch (enc, ']');
328} 339}
329 340
330static void 341static void
331encode_he (enc_t *enc, HE *he) 342encode_hk (enc_t *enc, HE *he)
332{ 343{
333 encode_ch (enc, '"'); 344 encode_ch (enc, '"');
334 345
335 if (HeKLEN (he) == HEf_SVKEY) 346 if (HeKLEN (he) == HEf_SVKEY)
336 { 347 {
349 encode_ch (enc, '"'); 360 encode_ch (enc, '"');
350 361
351 if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc); 362 if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc);
352 encode_ch (enc, ':'); 363 encode_ch (enc, ':');
353 if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc); 364 if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc);
354 encode_sv (enc, HeVAL (he));
355} 365}
356 366
357// compare hash entries, used when all keys are bytestrings 367// compare hash entries, used when all keys are bytestrings
358static int 368static int
359he_cmp_fast (const void *a_, const void *b_) 369he_cmp_fast (const void *a_, const void *b_)
364 HE *b = *(HE **)b_; 374 HE *b = *(HE **)b_;
365 375
366 STRLEN la = HeKLEN (a); 376 STRLEN la = HeKLEN (a);
367 STRLEN lb = HeKLEN (b); 377 STRLEN lb = HeKLEN (b);
368 378
369 if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb))) 379 if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la)))
370 cmp = la - lb; 380 cmp = lb - la;
371 381
372 return cmp; 382 return cmp;
373} 383}
374 384
375// compare hash entries, used when some keys are sv's or utf-x 385// compare hash entries, used when some keys are sv's or utf-x
376static int 386static int
377he_cmp_slow (const void *a, const void *b) 387he_cmp_slow (const void *a, const void *b)
378{ 388{
379 return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b)); 389 return sv_cmp (HeSVKEY_force (*(HE **)b), HeSVKEY_force (*(HE **)a));
380} 390}
381 391
382static void 392static void
383encode_hv (enc_t *enc, HV *hv) 393encode_hv (enc_t *enc, HV *hv)
384{ 394{
395 HE *he;
385 int count, i; 396 int count;
386 397
387 if (enc->indent >= enc->maxdepth) 398 if (enc->indent >= enc->maxdepth)
388 croak ("data structure too deep (hit recursion limit)"); 399 croak ("data structure too deep (hit recursion limit)");
389 400
390 encode_ch (enc, '{'); encode_nl (enc); ++enc->indent; 401 encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
391 402
392 if ((count = hv_iterinit (hv)))
393 {
394 // for canonical output we have to sort by keys first 403 // for canonical output we have to sort by keys first
395 // actually, this is mostly due to the stupid so-called 404 // actually, this is mostly due to the stupid so-called
396 // security workaround added somewhere in 5.8.x. 405 // security workaround added somewhere in 5.8.x.
397 // that randomises hash orderings 406 // that randomises hash orderings
398 if (enc->json.flags & F_CANONICAL) 407 if (enc->json.flags & F_CANONICAL)
408 {
409 int count = hv_iterinit (hv);
410
411 if (SvMAGICAL (hv))
399 { 412 {
413 // need to count by iterating. could improve by dynamically building the vector below
414 // but I don't care for the speed of this special case.
415 // note also that we will run into undefined behaviour when the two iterations
416 // do not result in the same count, something I might care for in some later release.
417
418 count = 0;
419 while (hv_iternext (hv))
420 ++count;
421
422 hv_iterinit (hv);
423 }
424
425 if (count)
426 {
400 int fast = 1; 427 int i, fast = 1;
401 HE *he;
402#if defined(__BORLANDC__) || defined(_MSC_VER) 428#if defined(__BORLANDC__) || defined(_MSC_VER)
403 HE **hes = _alloca (count * sizeof (HE)); 429 HE **hes = _alloca (count * sizeof (HE));
404#else 430#else
405 HE *hes [count]; // if your compiler dies here, you need to enable C99 mode 431 HE *hes [count]; // if your compiler dies here, you need to enable C99 mode
406#endif 432#endif
433 459
434 FREETMPS; 460 FREETMPS;
435 LEAVE; 461 LEAVE;
436 } 462 }
437 463
438 for (i = 0; i < count; ++i) 464 while (count--)
439 { 465 {
440 encode_indent (enc); 466 encode_indent (enc);
467 he = hes [count];
441 encode_he (enc, hes [i]); 468 encode_hk (enc, he);
469 encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
442 470
443 if (i < count - 1) 471 if (count)
444 encode_comma (enc); 472 encode_comma (enc);
445 } 473 }
446
447 encode_nl (enc);
448 } 474 }
475 }
449 else 476 else
450 { 477 {
478 if (hv_iterinit (hv) || SvMAGICAL (hv))
451 HE *he = hv_iternext (hv); 479 if ((he = hv_iternext (hv)))
452
453 for (;;) 480 for (;;)
454 { 481 {
455 encode_indent (enc); 482 encode_indent (enc);
456 encode_he (enc, he); 483 encode_hk (enc, he);
484 encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he));
457 485
458 if (!(he = hv_iternext (hv))) 486 if (!(he = hv_iternext (hv)))
459 break; 487 break;
460 488
461 encode_comma (enc); 489 encode_comma (enc);
462 } 490 }
491 }
463 492
464 encode_nl (enc); 493 encode_nl (enc);
465 }
466 }
467 494
468 --enc->indent; encode_indent (enc); encode_ch (enc, '}'); 495 --enc->indent; encode_indent (enc); encode_ch (enc, '}');
469} 496}
470 497
471// encode objects, arrays and special \0=false and \1=true values. 498// encode objects, arrays and special \0=false and \1=true values.
477 SvGETMAGIC (sv); 504 SvGETMAGIC (sv);
478 svt = SvTYPE (sv); 505 svt = SvTYPE (sv);
479 506
480 if (expect_false (SvOBJECT (sv))) 507 if (expect_false (SvOBJECT (sv)))
481 { 508 {
509 HV *stash = !JSON_SLOW || json_boolean_stash
510 ? json_boolean_stash
511 : gv_stashpv ("JSON::XS::Boolean", 1);
512
482 if (SvSTASH (sv) == json_boolean_stash) 513 if (SvSTASH (sv) == stash)
483 { 514 {
484 if (SvIV (sv)) 515 if (SvIV (sv))
485 encode_str (enc, "true", 4, 0); 516 encode_str (enc, "true", 4, 0);
486 else 517 else
487 encode_str (enc, "false", 5, 0); 518 encode_str (enc, "false", 5, 0);
499 // we re-bless the reference to get overload and other niceties right 530 // we re-bless the reference to get overload and other niceties right
500 GV *to_json = gv_fetchmethod_autoload (SvSTASH (sv), "TO_JSON", 0); 531 GV *to_json = gv_fetchmethod_autoload (SvSTASH (sv), "TO_JSON", 0);
501 532
502 if (to_json) 533 if (to_json)
503 { 534 {
504 int count;
505 dSP; 535 dSP;
506 536
507 ENTER; SAVETMPS; PUSHMARK (SP); 537 ENTER; SAVETMPS; PUSHMARK (SP);
508 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); 538 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv)));
509 539
680 710
681 if (ch > 0x20 711 if (ch > 0x20
682 || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) 712 || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
683 break; 713 break;
684 714
715 if (ch == '#' && dec->json.flags & F_RELAXED)
716 ++dec->cur;
717
685 ++dec->cur; 718 ++dec->cur;
686 } 719 }
687} 720}
688 721
689#define ERR(reason) SB dec->err = reason; goto fail; SE 722#define ERR(reason) SB dec->err = reason; goto fail; SE
937 is_nv = 1; 970 is_nv = 1;
938 } 971 }
939 972
940 if (!is_nv) 973 if (!is_nv)
941 { 974 {
975 int len = dec->cur - start;
976
942 // special case the rather common 1..4-digit-int case, assumes 32 bit ints or so 977 // special case the rather common 1..4-digit-int case, assumes 32 bit ints or so
943 if (*start == '-') 978 if (*start == '-')
944 switch (dec->cur - start) 979 switch (len)
945 { 980 {
946 case 2: return newSViv (-( start [1] - '0' * 1)); 981 case 2: return newSViv (-( start [1] - '0' * 1));
947 case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); 982 case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11));
948 case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); 983 case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111));
949 case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); 984 case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111));
950 } 985 }
951 else 986 else
952 switch (dec->cur - start) 987 switch (len)
953 { 988 {
954 case 1: return newSViv ( start [0] - '0' * 1); 989 case 1: return newSViv ( start [0] - '0' * 1);
955 case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); 990 case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11);
956 case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); 991 case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111);
957 case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); 992 case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111);
958 } 993 }
959 994
960 { 995 {
961 UV uv; 996 UV uv;
962 int numtype = grok_number (start, dec->cur - start, &uv); 997 int numtype = grok_number (start, len, &uv);
963 if (numtype & IS_NUMBER_IN_UV) 998 if (numtype & IS_NUMBER_IN_UV)
964 if (numtype & IS_NUMBER_NEG) 999 if (numtype & IS_NUMBER_NEG)
965 { 1000 {
966 if (uv < (UV)IV_MIN) 1001 if (uv < (UV)IV_MIN)
967 return newSViv (-(IV)uv); 1002 return newSViv (-(IV)uv);
968 } 1003 }
969 else 1004 else
970 return newSVuv (uv); 1005 return newSVuv (uv);
971
972 // here would likely be the place for bigint support
973 } 1006 }
974 }
975 1007
976 // if we ever support bigint or bigfloat, this is the place for bigfloat 1008 len -= *start == '-' ? 1 : 0;
1009
1010 // does not fit into IV or UV, try NV
1011 if ((sizeof (NV) == sizeof (double) && DBL_DIG >= len)
1012 #if defined (LDBL_DIG)
1013 || (sizeof (NV) == sizeof (long double) && LDBL_DIG >= len)
1014 #endif
1015 )
1016 // fits into NV without loss of precision
1017 return newSVnv (Atof (start));
1018
1019 // everything else fails, convert it to a string
1020 return newSVpvn (start, dec->cur - start);
1021 }
1022
1023 // loss of precision here
977 return newSVnv (Atof (start)); 1024 return newSVnv (Atof (start));
978 1025
979fail: 1026fail:
980 return 0; 1027 return 0;
981} 1028}
1011 1058
1012 if (*dec->cur != ',') 1059 if (*dec->cur != ',')
1013 ERR (", or ] expected while parsing array"); 1060 ERR (", or ] expected while parsing array");
1014 1061
1015 ++dec->cur; 1062 ++dec->cur;
1063
1064 decode_ws (dec);
1065
1066 if (*dec->cur == ']' && dec->json.flags & F_RELAXED)
1067 {
1068 ++dec->cur;
1069 break;
1070 }
1016 } 1071 }
1017 1072
1018 DEC_DEC_DEPTH; 1073 DEC_DEC_DEPTH;
1019 return newRV_noinc ((SV *)av); 1074 return newRV_noinc ((SV *)av);
1020 1075
1036 if (*dec->cur == '}') 1091 if (*dec->cur == '}')
1037 ++dec->cur; 1092 ++dec->cur;
1038 else 1093 else
1039 for (;;) 1094 for (;;)
1040 { 1095 {
1041 decode_ws (dec); EXPECT_CH ('"'); 1096 EXPECT_CH ('"');
1042 1097
1043 // heuristic: assume that 1098 // heuristic: assume that
1044 // a) decode_str + hv_store_ent are abysmally slow. 1099 // a) decode_str + hv_store_ent are abysmally slow.
1045 // b) most hash keys are short, simple ascii text. 1100 // b) most hash keys are short, simple ascii text.
1046 // => try to "fast-match" such strings to avoid 1101 // => try to "fast-match" such strings to avoid
1060 if (!key) 1115 if (!key)
1061 goto fail; 1116 goto fail;
1062 1117
1063 decode_ws (dec); EXPECT_CH (':'); 1118 decode_ws (dec); EXPECT_CH (':');
1064 1119
1120 decode_ws (dec);
1065 value = decode_sv (dec); 1121 value = decode_sv (dec);
1066 if (!value) 1122 if (!value)
1067 { 1123 {
1068 SvREFCNT_dec (key); 1124 SvREFCNT_dec (key);
1069 goto fail; 1125 goto fail;
1081 int len = p - key; 1137 int len = p - key;
1082 dec->cur = p + 1; 1138 dec->cur = p + 1;
1083 1139
1084 decode_ws (dec); EXPECT_CH (':'); 1140 decode_ws (dec); EXPECT_CH (':');
1085 1141
1142 decode_ws (dec);
1086 value = decode_sv (dec); 1143 value = decode_sv (dec);
1087 if (!value) 1144 if (!value)
1088 goto fail; 1145 goto fail;
1089 1146
1090 hv_store (hv, key, len, value, 0); 1147 hv_store (hv, key, len, value, 0);
1106 1163
1107 if (*dec->cur != ',') 1164 if (*dec->cur != ',')
1108 ERR (", or } expected while parsing object/hash"); 1165 ERR (", or } expected while parsing object/hash");
1109 1166
1110 ++dec->cur; 1167 ++dec->cur;
1168
1169 decode_ws (dec);
1170
1171 if (*dec->cur == '}' && dec->json.flags & F_RELAXED)
1172 {
1173 ++dec->cur;
1174 break;
1175 }
1111 } 1176 }
1112 1177
1113 DEC_DEC_DEPTH; 1178 DEC_DEC_DEPTH;
1114 sv = newRV_noinc ((SV *)hv); 1179 sv = newRV_noinc ((SV *)hv);
1115 1180
1180} 1245}
1181 1246
1182static SV * 1247static SV *
1183decode_sv (dec_t *dec) 1248decode_sv (dec_t *dec)
1184{ 1249{
1185 decode_ws (dec);
1186
1187 // the beauty of JSON: you need exactly one character lookahead 1250 // the beauty of JSON: you need exactly one character lookahead
1188 // to parse anything. 1251 // to parse anything.
1189 switch (*dec->cur) 1252 switch (*dec->cur)
1190 { 1253 {
1191 case '"': ++dec->cur; return decode_str (dec); 1254 case '"': ++dec->cur; return decode_str (dec);
1199 1262
1200 case 't': 1263 case 't':
1201 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) 1264 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
1202 { 1265 {
1203 dec->cur += 4; 1266 dec->cur += 4;
1267#if JSON_SLOW
1268 json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true);
1269#endif
1204 return SvREFCNT_inc (json_true); 1270 return SvREFCNT_inc (json_true);
1205 } 1271 }
1206 else 1272 else
1207 ERR ("'true' expected"); 1273 ERR ("'true' expected");
1208 1274
1210 1276
1211 case 'f': 1277 case 'f':
1212 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) 1278 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
1213 { 1279 {
1214 dec->cur += 5; 1280 dec->cur += 5;
1281#if JSON_SLOW
1282 json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);
1283#endif
1215 return SvREFCNT_inc (json_false); 1284 return SvREFCNT_inc (json_false);
1216 } 1285 }
1217 else 1286 else
1218 ERR ("'false' expected"); 1287 ERR ("'false' expected");
1219 1288
1269 1338
1270 if (dec.json.cb_object || dec.json.cb_sk_object) 1339 if (dec.json.cb_object || dec.json.cb_sk_object)
1271 dec.json.flags |= F_HOOK; 1340 dec.json.flags |= F_HOOK;
1272 1341
1273 *dec.end = 0; // this should basically be a nop, too, but make sure it's there 1342 *dec.end = 0; // this should basically be a nop, too, but make sure it's there
1343
1344 decode_ws (&dec);
1274 sv = decode_sv (&dec); 1345 sv = decode_sv (&dec);
1275 1346
1276 if (!(offset_return || !sv)) 1347 if (!(offset_return || !sv))
1277 { 1348 {
1278 // check for trailing garbage 1349 // check for trailing garbage
1346 json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); 1417 json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false);
1347} 1418}
1348 1419
1349PROTOTYPES: DISABLE 1420PROTOTYPES: DISABLE
1350 1421
1422void CLONE (...)
1423 CODE:
1424 json_stash = 0;
1425 json_boolean_stash = 0;
1426
1351void new (char *klass) 1427void new (char *klass)
1352 PPCODE: 1428 PPCODE:
1353{ 1429{
1354 SV *pv = NEWSV (0, sizeof (JSON)); 1430 SV *pv = NEWSV (0, sizeof (JSON));
1355 SvPOK_only (pv); 1431 SvPOK_only (pv);
1356 Zero (SvPVX (pv), 1, JSON); 1432 Zero (SvPVX (pv), 1, JSON);
1357 ((JSON *)SvPVX (pv))->flags = F_DEFAULT; 1433 ((JSON *)SvPVX (pv))->flags = F_DEFAULT;
1358 XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), json_stash))); 1434 XPUSHs (sv_2mortal (sv_bless (newRV_noinc (pv), JSON_STASH)));
1359} 1435}
1360 1436
1361void ascii (JSON *self, int enable = 1) 1437void ascii (JSON *self, int enable = 1)
1362 ALIAS: 1438 ALIAS:
1363 ascii = F_ASCII 1439 ascii = F_ASCII
1370 pretty = F_PRETTY 1446 pretty = F_PRETTY
1371 allow_nonref = F_ALLOW_NONREF 1447 allow_nonref = F_ALLOW_NONREF
1372 shrink = F_SHRINK 1448 shrink = F_SHRINK
1373 allow_blessed = F_ALLOW_BLESSED 1449 allow_blessed = F_ALLOW_BLESSED
1374 convert_blessed = F_CONV_BLESSED 1450 convert_blessed = F_CONV_BLESSED
1451 relaxed = F_RELAXED
1375 PPCODE: 1452 PPCODE:
1376{ 1453{
1377 if (enable) 1454 if (enable)
1378 self->flags |= ix; 1455 self->flags |= ix;
1379 else 1456 else

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines