… | |
… | |
27 | #define F_SPACE_AFTER 0x00000040UL |
27 | #define F_SPACE_AFTER 0x00000040UL |
28 | #define F_ALLOW_NONREF 0x00000100UL |
28 | #define F_ALLOW_NONREF 0x00000100UL |
29 | #define F_SHRINK 0x00000200UL |
29 | #define F_SHRINK 0x00000200UL |
30 | #define F_ALLOW_BLESSED 0x00000400UL |
30 | #define F_ALLOW_BLESSED 0x00000400UL |
31 | #define F_CONV_BLESSED 0x00000800UL |
31 | #define F_CONV_BLESSED 0x00000800UL |
|
|
32 | #define F_RELAXED 0x00001000UL |
|
|
33 | |
32 | #define F_MAXDEPTH 0xf8000000UL |
34 | #define F_MAXDEPTH 0xf8000000UL |
33 | #define S_MAXDEPTH 27 |
35 | #define S_MAXDEPTH 27 |
34 | #define F_MAXSIZE 0x01f00000UL |
36 | #define F_MAXSIZE 0x01f00000UL |
35 | #define S_MAXSIZE 20 |
37 | #define S_MAXSIZE 20 |
36 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
38 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
… | |
… | |
335 | --enc->indent; |
337 | --enc->indent; |
336 | encode_indent (enc); encode_ch (enc, ']'); |
338 | encode_indent (enc); encode_ch (enc, ']'); |
337 | } |
339 | } |
338 | |
340 | |
339 | static void |
341 | static void |
340 | encode_he (enc_t *enc, HE *he) |
342 | encode_hk (enc_t *enc, HE *he) |
341 | { |
343 | { |
342 | encode_ch (enc, '"'); |
344 | encode_ch (enc, '"'); |
343 | |
345 | |
344 | if (HeKLEN (he) == HEf_SVKEY) |
346 | if (HeKLEN (he) == HEf_SVKEY) |
345 | { |
347 | { |
… | |
… | |
358 | encode_ch (enc, '"'); |
360 | encode_ch (enc, '"'); |
359 | |
361 | |
360 | if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc); |
362 | if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc); |
361 | encode_ch (enc, ':'); |
363 | encode_ch (enc, ':'); |
362 | if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc); |
364 | if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc); |
363 | encode_sv (enc, HeVAL (he)); |
|
|
364 | } |
365 | } |
365 | |
366 | |
366 | // compare hash entries, used when all keys are bytestrings |
367 | // compare hash entries, used when all keys are bytestrings |
367 | static int |
368 | static int |
368 | he_cmp_fast (const void *a_, const void *b_) |
369 | he_cmp_fast (const void *a_, const void *b_) |
… | |
… | |
373 | HE *b = *(HE **)b_; |
374 | HE *b = *(HE **)b_; |
374 | |
375 | |
375 | STRLEN la = HeKLEN (a); |
376 | STRLEN la = HeKLEN (a); |
376 | STRLEN lb = HeKLEN (b); |
377 | STRLEN lb = HeKLEN (b); |
377 | |
378 | |
378 | if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb))) |
379 | if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la))) |
379 | cmp = la - lb; |
380 | cmp = lb - la; |
380 | |
381 | |
381 | return cmp; |
382 | return cmp; |
382 | } |
383 | } |
383 | |
384 | |
384 | // compare hash entries, used when some keys are sv's or utf-x |
385 | // compare hash entries, used when some keys are sv's or utf-x |
385 | static int |
386 | static int |
386 | he_cmp_slow (const void *a, const void *b) |
387 | he_cmp_slow (const void *a, const void *b) |
387 | { |
388 | { |
388 | return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b)); |
389 | return sv_cmp (HeSVKEY_force (*(HE **)b), HeSVKEY_force (*(HE **)a)); |
389 | } |
390 | } |
390 | |
391 | |
391 | static void |
392 | static void |
392 | encode_hv (enc_t *enc, HV *hv) |
393 | encode_hv (enc_t *enc, HV *hv) |
393 | { |
394 | { |
|
|
395 | HE *he; |
394 | int count, i; |
396 | int count; |
395 | |
397 | |
396 | if (enc->indent >= enc->maxdepth) |
398 | if (enc->indent >= enc->maxdepth) |
397 | croak ("data structure too deep (hit recursion limit)"); |
399 | croak ("data structure too deep (hit recursion limit)"); |
398 | |
400 | |
399 | encode_ch (enc, '{'); encode_nl (enc); ++enc->indent; |
401 | encode_ch (enc, '{'); encode_nl (enc); ++enc->indent; |
400 | |
402 | |
401 | if ((count = hv_iterinit (hv))) |
|
|
402 | { |
|
|
403 | // for canonical output we have to sort by keys first |
403 | // for canonical output we have to sort by keys first |
404 | // actually, this is mostly due to the stupid so-called |
404 | // actually, this is mostly due to the stupid so-called |
405 | // security workaround added somewhere in 5.8.x. |
405 | // security workaround added somewhere in 5.8.x. |
406 | // that randomises hash orderings |
406 | // that randomises hash orderings |
407 | if (enc->json.flags & F_CANONICAL) |
407 | if (enc->json.flags & F_CANONICAL) |
|
|
408 | { |
|
|
409 | int count = hv_iterinit (hv); |
|
|
410 | |
|
|
411 | if (SvMAGICAL (hv)) |
408 | { |
412 | { |
|
|
413 | // need to count by iterating. could improve by dynamically building the vector below |
|
|
414 | // but I don't care for the speed of this special case. |
|
|
415 | // note also that we will run into undefined behaviour when the two iterations |
|
|
416 | // do not result in the same count, something I might care for in some later release. |
|
|
417 | |
|
|
418 | count = 0; |
|
|
419 | while (hv_iternext (hv)) |
|
|
420 | ++count; |
|
|
421 | |
|
|
422 | hv_iterinit (hv); |
|
|
423 | } |
|
|
424 | |
|
|
425 | if (count) |
|
|
426 | { |
409 | int fast = 1; |
427 | int i, fast = 1; |
410 | HE *he; |
|
|
411 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
428 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
412 | HE **hes = _alloca (count * sizeof (HE)); |
429 | HE **hes = _alloca (count * sizeof (HE)); |
413 | #else |
430 | #else |
414 | HE *hes [count]; // if your compiler dies here, you need to enable C99 mode |
431 | HE *hes [count]; // if your compiler dies here, you need to enable C99 mode |
415 | #endif |
432 | #endif |
… | |
… | |
442 | |
459 | |
443 | FREETMPS; |
460 | FREETMPS; |
444 | LEAVE; |
461 | LEAVE; |
445 | } |
462 | } |
446 | |
463 | |
447 | for (i = 0; i < count; ++i) |
464 | while (count--) |
448 | { |
465 | { |
449 | encode_indent (enc); |
466 | encode_indent (enc); |
|
|
467 | he = hes [count]; |
450 | encode_he (enc, hes [i]); |
468 | encode_hk (enc, he); |
|
|
469 | encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he)); |
451 | |
470 | |
452 | if (i < count - 1) |
471 | if (count) |
453 | encode_comma (enc); |
472 | encode_comma (enc); |
454 | } |
473 | } |
455 | |
|
|
456 | encode_nl (enc); |
|
|
457 | } |
474 | } |
|
|
475 | } |
458 | else |
476 | else |
459 | { |
477 | { |
|
|
478 | if (hv_iterinit (hv) || SvMAGICAL (hv)) |
460 | HE *he = hv_iternext (hv); |
479 | if ((he = hv_iternext (hv))) |
461 | |
|
|
462 | for (;;) |
480 | for (;;) |
463 | { |
481 | { |
464 | encode_indent (enc); |
482 | encode_indent (enc); |
465 | encode_he (enc, he); |
483 | encode_hk (enc, he); |
|
|
484 | encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he)); |
466 | |
485 | |
467 | if (!(he = hv_iternext (hv))) |
486 | if (!(he = hv_iternext (hv))) |
468 | break; |
487 | break; |
469 | |
488 | |
470 | encode_comma (enc); |
489 | encode_comma (enc); |
471 | } |
490 | } |
|
|
491 | } |
472 | |
492 | |
473 | encode_nl (enc); |
493 | encode_nl (enc); |
474 | } |
|
|
475 | } |
|
|
476 | |
494 | |
477 | --enc->indent; encode_indent (enc); encode_ch (enc, '}'); |
495 | --enc->indent; encode_indent (enc); encode_ch (enc, '}'); |
478 | } |
496 | } |
479 | |
497 | |
480 | // encode objects, arrays and special \0=false and \1=true values. |
498 | // encode objects, arrays and special \0=false and \1=true values. |
… | |
… | |
682 | U32 depth; // recursion depth |
700 | U32 depth; // recursion depth |
683 | U32 maxdepth; // recursion depth limit |
701 | U32 maxdepth; // recursion depth limit |
684 | } dec_t; |
702 | } dec_t; |
685 | |
703 | |
686 | inline void |
704 | inline void |
|
|
705 | decode_comment (dec_t *dec) |
|
|
706 | { |
|
|
707 | // only '#'-style comments allowed a.t.m. |
|
|
708 | |
|
|
709 | while (*dec->cur && *dec->cur != 0x0a && *dec->cur != 0x0d) |
|
|
710 | ++dec->cur; |
|
|
711 | } |
|
|
712 | |
|
|
713 | inline void |
687 | decode_ws (dec_t *dec) |
714 | decode_ws (dec_t *dec) |
688 | { |
715 | { |
689 | for (;;) |
716 | for (;;) |
690 | { |
717 | { |
691 | char ch = *dec->cur; |
718 | char ch = *dec->cur; |
692 | |
719 | |
693 | if (ch > 0x20 |
720 | if (ch > 0x20) |
|
|
721 | { |
|
|
722 | if (expect_false (ch == '#')) |
|
|
723 | { |
|
|
724 | if (dec->json.flags & F_RELAXED) |
|
|
725 | decode_comment (dec); |
|
|
726 | else |
|
|
727 | break; |
|
|
728 | } |
|
|
729 | else |
|
|
730 | break; |
|
|
731 | } |
694 | || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) |
732 | else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) |
695 | break; |
733 | break; // parse error, but let higher level handle it, gives better error messages |
696 | |
734 | |
697 | ++dec->cur; |
735 | ++dec->cur; |
698 | } |
736 | } |
699 | } |
737 | } |
700 | |
738 | |
… | |
… | |
1037 | |
1075 | |
1038 | if (*dec->cur != ',') |
1076 | if (*dec->cur != ',') |
1039 | ERR (", or ] expected while parsing array"); |
1077 | ERR (", or ] expected while parsing array"); |
1040 | |
1078 | |
1041 | ++dec->cur; |
1079 | ++dec->cur; |
|
|
1080 | |
|
|
1081 | decode_ws (dec); |
|
|
1082 | |
|
|
1083 | if (*dec->cur == ']' && dec->json.flags & F_RELAXED) |
|
|
1084 | { |
|
|
1085 | ++dec->cur; |
|
|
1086 | break; |
|
|
1087 | } |
1042 | } |
1088 | } |
1043 | |
1089 | |
1044 | DEC_DEC_DEPTH; |
1090 | DEC_DEC_DEPTH; |
1045 | return newRV_noinc ((SV *)av); |
1091 | return newRV_noinc ((SV *)av); |
1046 | |
1092 | |
… | |
… | |
1062 | if (*dec->cur == '}') |
1108 | if (*dec->cur == '}') |
1063 | ++dec->cur; |
1109 | ++dec->cur; |
1064 | else |
1110 | else |
1065 | for (;;) |
1111 | for (;;) |
1066 | { |
1112 | { |
1067 | decode_ws (dec); EXPECT_CH ('"'); |
1113 | EXPECT_CH ('"'); |
1068 | |
1114 | |
1069 | // heuristic: assume that |
1115 | // heuristic: assume that |
1070 | // a) decode_str + hv_store_ent are abysmally slow. |
1116 | // a) decode_str + hv_store_ent are abysmally slow. |
1071 | // b) most hash keys are short, simple ascii text. |
1117 | // b) most hash keys are short, simple ascii text. |
1072 | // => try to "fast-match" such strings to avoid |
1118 | // => try to "fast-match" such strings to avoid |
… | |
… | |
1086 | if (!key) |
1132 | if (!key) |
1087 | goto fail; |
1133 | goto fail; |
1088 | |
1134 | |
1089 | decode_ws (dec); EXPECT_CH (':'); |
1135 | decode_ws (dec); EXPECT_CH (':'); |
1090 | |
1136 | |
|
|
1137 | decode_ws (dec); |
1091 | value = decode_sv (dec); |
1138 | value = decode_sv (dec); |
1092 | if (!value) |
1139 | if (!value) |
1093 | { |
1140 | { |
1094 | SvREFCNT_dec (key); |
1141 | SvREFCNT_dec (key); |
1095 | goto fail; |
1142 | goto fail; |
… | |
… | |
1107 | int len = p - key; |
1154 | int len = p - key; |
1108 | dec->cur = p + 1; |
1155 | dec->cur = p + 1; |
1109 | |
1156 | |
1110 | decode_ws (dec); EXPECT_CH (':'); |
1157 | decode_ws (dec); EXPECT_CH (':'); |
1111 | |
1158 | |
|
|
1159 | decode_ws (dec); |
1112 | value = decode_sv (dec); |
1160 | value = decode_sv (dec); |
1113 | if (!value) |
1161 | if (!value) |
1114 | goto fail; |
1162 | goto fail; |
1115 | |
1163 | |
1116 | hv_store (hv, key, len, value, 0); |
1164 | hv_store (hv, key, len, value, 0); |
… | |
… | |
1132 | |
1180 | |
1133 | if (*dec->cur != ',') |
1181 | if (*dec->cur != ',') |
1134 | ERR (", or } expected while parsing object/hash"); |
1182 | ERR (", or } expected while parsing object/hash"); |
1135 | |
1183 | |
1136 | ++dec->cur; |
1184 | ++dec->cur; |
|
|
1185 | |
|
|
1186 | decode_ws (dec); |
|
|
1187 | |
|
|
1188 | if (*dec->cur == '}' && dec->json.flags & F_RELAXED) |
|
|
1189 | { |
|
|
1190 | ++dec->cur; |
|
|
1191 | break; |
|
|
1192 | } |
1137 | } |
1193 | } |
1138 | |
1194 | |
1139 | DEC_DEC_DEPTH; |
1195 | DEC_DEC_DEPTH; |
1140 | sv = newRV_noinc ((SV *)hv); |
1196 | sv = newRV_noinc ((SV *)hv); |
1141 | |
1197 | |
… | |
… | |
1206 | } |
1262 | } |
1207 | |
1263 | |
1208 | static SV * |
1264 | static SV * |
1209 | decode_sv (dec_t *dec) |
1265 | decode_sv (dec_t *dec) |
1210 | { |
1266 | { |
1211 | decode_ws (dec); |
|
|
1212 | |
|
|
1213 | // the beauty of JSON: you need exactly one character lookahead |
1267 | // the beauty of JSON: you need exactly one character lookahead |
1214 | // to parse anything. |
1268 | // to parse anything. |
1215 | switch (*dec->cur) |
1269 | switch (*dec->cur) |
1216 | { |
1270 | { |
1217 | case '"': ++dec->cur; return decode_str (dec); |
1271 | case '"': ++dec->cur; return decode_str (dec); |
… | |
… | |
1225 | |
1279 | |
1226 | case 't': |
1280 | case 't': |
1227 | if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) |
1281 | if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) |
1228 | { |
1282 | { |
1229 | dec->cur += 4; |
1283 | dec->cur += 4; |
|
|
1284 | #if JSON_SLOW |
|
|
1285 | json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true); |
|
|
1286 | #endif |
1230 | return SvREFCNT_inc (json_true); |
1287 | return SvREFCNT_inc (json_true); |
1231 | } |
1288 | } |
1232 | else |
1289 | else |
1233 | ERR ("'true' expected"); |
1290 | ERR ("'true' expected"); |
1234 | |
1291 | |
… | |
… | |
1236 | |
1293 | |
1237 | case 'f': |
1294 | case 'f': |
1238 | if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) |
1295 | if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) |
1239 | { |
1296 | { |
1240 | dec->cur += 5; |
1297 | dec->cur += 5; |
|
|
1298 | #if JSON_SLOW |
|
|
1299 | json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); |
|
|
1300 | #endif |
1241 | return SvREFCNT_inc (json_false); |
1301 | return SvREFCNT_inc (json_false); |
1242 | } |
1302 | } |
1243 | else |
1303 | else |
1244 | ERR ("'false' expected"); |
1304 | ERR ("'false' expected"); |
1245 | |
1305 | |
… | |
… | |
1295 | |
1355 | |
1296 | if (dec.json.cb_object || dec.json.cb_sk_object) |
1356 | if (dec.json.cb_object || dec.json.cb_sk_object) |
1297 | dec.json.flags |= F_HOOK; |
1357 | dec.json.flags |= F_HOOK; |
1298 | |
1358 | |
1299 | *dec.end = 0; // this should basically be a nop, too, but make sure it's there |
1359 | *dec.end = 0; // this should basically be a nop, too, but make sure it's there |
|
|
1360 | |
|
|
1361 | decode_ws (&dec); |
1300 | sv = decode_sv (&dec); |
1362 | sv = decode_sv (&dec); |
1301 | |
1363 | |
1302 | if (!(offset_return || !sv)) |
1364 | if (!(offset_return || !sv)) |
1303 | { |
1365 | { |
1304 | // check for trailing garbage |
1366 | // check for trailing garbage |
… | |
… | |
1401 | pretty = F_PRETTY |
1463 | pretty = F_PRETTY |
1402 | allow_nonref = F_ALLOW_NONREF |
1464 | allow_nonref = F_ALLOW_NONREF |
1403 | shrink = F_SHRINK |
1465 | shrink = F_SHRINK |
1404 | allow_blessed = F_ALLOW_BLESSED |
1466 | allow_blessed = F_ALLOW_BLESSED |
1405 | convert_blessed = F_CONV_BLESSED |
1467 | convert_blessed = F_CONV_BLESSED |
|
|
1468 | relaxed = F_RELAXED |
1406 | PPCODE: |
1469 | PPCODE: |
1407 | { |
1470 | { |
1408 | if (enable) |
1471 | if (enable) |
1409 | self->flags |= ix; |
1472 | self->flags |= ix; |
1410 | else |
1473 | else |