--- JSON-XS/XS.pm 2007/04/03 23:59:04 1.28 +++ JSON-XS/XS.pm 2007/07/02 00:29:38 1.50 @@ -12,10 +12,6 @@ $utf8_encoded_json_text = to_json $perl_hash_or_arrayref; $perl_hash_or_arrayref = from_json $utf8_encoded_json_text; - # objToJson and jsonToObj aliases to to_json and from_json - # are exported for compatibility to the JSON module, - # but should not be used in new code. - # OO-interface $coder = JSON::XS->new->ascii->pretty->allow_nonref; @@ -87,16 +83,13 @@ use strict; -BEGIN { - our $VERSION = '1.1'; - our @ISA = qw(Exporter); +our $VERSION = '1.4'; +our @ISA = qw(Exporter); - our @EXPORT = qw(to_json from_json objToJson jsonToObj); - require Exporter; +our @EXPORT = qw(to_json from_json); - require XSLoader; - XSLoader::load JSON::XS::, $VERSION; -} +use Exporter; +use XSLoader; =head1 FUNCTIONAL INTERFACE @@ -129,6 +122,15 @@ except being faster. +=item $is_boolean = JSON::XS::is_bool $scalar + +Returns true if the passed scalar represents either JSON::XS::true or +JSON::XS::false, two constants that act like C<1> and C<0>, respectively +and are used to represent JSON C and C values in Perl. + +See MAPPING, below, for more information on how JSON values are mapped to +Perl. + =back @@ -156,15 +158,44 @@ generate characters outside the code range C<0..127> (which is ASCII). Any unicode characters outside that range will be escaped using either a single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence, -as per RFC4627. +as per RFC4627. The resulting encoded JSON text can be treated as a native +unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string, +or any other superset of ASCII. If C<$enable> is false, then the C method will not escape Unicode -characters unless required by the JSON syntax. This results in a faster -and more compact format. +characters unless required by the JSON syntax or other flags. This results +in a faster and more compact format. + +The main use for this flag is to produce JSON texts that can be +transmitted over a 7-bit channel, as the encoded JSON texts will not +contain any 8 bit characters. JSON::XS->new->ascii (1)->encode ([chr 0x10401]) => ["\ud801\udc01"] +=item $json = $json->latin1 ([$enable]) + +If C<$enable> is true (or missing), then the C method will encode +the resulting JSON text as latin1 (or iso-8859-1), escaping any characters +outside the code range C<0..255>. The resulting string can be treated as a +latin1-encoded JSON text or a native unicode string. The C method +will not be affected in any way by this flag, as C by default +expects unicode, which is a strict superset of latin1. + +If C<$enable> is false, then the C method will not escape Unicode +characters unless required by the JSON syntax or other flags. + +The main use for this flag is efficiently encoding binary data as JSON +text, as most octets will not be escaped, resulting in a smaller encoded +size. The disadvantage is that the resulting JSON text is encoded +in latin1 (and must correctly be treated as such when storing and +transfering), a rare encoding for JSON. It is therefore most useful when +you want to store data structures known to contain binary data efficiently +in files or databases, not when talking to other JSON encoders/decoders. + + JSON::XS->new->latin1->encode (["\x{89}\x{abc}"] + => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) + =item $json = $json->utf8 ([$enable]) If C<$enable> is true (or missing), then the C method will encode @@ -283,6 +314,42 @@ JSON::XS->new->allow_nonref->encode ("Hello, World!") => "Hello, World!" +=item $json = $json->allow_blessed ([$enable]) + +If C<$enable> is true (or missing), then the C method will not +barf when it encounters a blessed reference. Instead, the value of the +B option will decide wether C (C +disabled or no C method found) or a representation of the +object (C enabled and C method found) is being +encoded. Has no effect on C. + +If C<$enable> is false (the default), then C will throw an +exception when it encounters a blessed object. + +=item $json = $json->convert_blessed ([$enable]) + +If C<$enable> is true (or missing), then C, upon encountering a +blessed object, will check for the availability of the C method +on the object's class. If found, it will be called in scalar context +and the resulting scalar will be encoded instead of the object. If no +C method is found, the value of C will decide what +to do. + +The C method may safely call die if it wants. If C +returns other blessed objects, those will be handled in the same +way. C must take care of not causing an endless recursion cycle +(== crash) in this case. The name of C was chosen because other +methods called by the Perl core (== not by the user of the object) are +usually in upper case letters and to avoid collisions with the C +function. + +This setting does not yet influence C in any way, but in the +future, global hooks might get installed that influence C and are +enabled by this setting. + +If C<$enable> is false, then the C setting will decide what +to do when a blessed object is found. + =item $json = $json->shrink ([$enable]) Perl usually over-allocates memory a bit when allocating space for @@ -324,8 +391,23 @@ Setting the maximum depth to one disallows any nesting, so that ensures that the object is only a single hash/object or array. -The argument to C will be rounded up to the next nearest power -of two. +The argument to C will be rounded up to the next highest power +of two. If no argument is given, the highest possible setting will be +used, which is rarely useful. + +See SECURITY CONSIDERATIONS, below, for more info on why this is useful. + +=item $json = $json->max_size ([$maximum_string_size]) + +Set the maximum length a JSON text may have (in bytes) where decoding is +being attempted. The default is C<0>, meaning no limit. When C +is called on a string longer then this number of characters it will not +attempt to decode the string but throw an exception. This setting has no +effect on C (yet). + +The argument to C will be rounded up to the next B +power of two (so may be more than requested). If no argument is given, the +limit check will be deactivated (same as when C<0> is specified). See SECURITY CONSIDERATIONS, below, for more info on why this is useful. @@ -347,6 +429,20 @@ Perl arrayrefs and JSON objects become Perl hashrefs. C becomes C<1>, C becomes C<0> and C becomes C. +=item ($perl_scalar, $characters) = $json->decode_prefix ($json_text) + +This works like the C method, but instead of raising an exception +when there is trailing garbage after the first JSON object, it will +silently stop parsing there and return the number of characters consumed +so far. + +This is useful if your JSON texts are not delimited by an outer protocol +(which is not the brightest thing to do in the first place) and you need +to know where the JSON text ends. + + JSON::XS->new->decode_prefix ("[1] the tail") + => ([], 3) + =back @@ -361,6 +457,7 @@ lowercase I refers to the Perl interpreter, while uppcercase I refers to the abstract Perl language itself. + =head2 JSON -> PERL =over 4 @@ -390,10 +487,10 @@ =item true, false -These JSON atoms become C<0>, C<1>, respectively. Information is lost in -this process. Future versions might represent those values differently, -but they will be guarenteed to act like these integers would normally in -Perl. +These JSON atoms become C and C, +respectively. They are overloaded to act almost exactly like the numbers +C<1> and C<0>. You can check wether a scalar is a JSON boolean by using +the C function. =item null @@ -401,6 +498,7 @@ =back + =head2 PERL -> JSON The mapping from Perl to JSON is slightly more difficult, as Perl is a @@ -434,6 +532,11 @@ to_json [\0,JSON::XS::true] # yields [false,true] +=item JSON::XS::true, JSON::XS::false + +These special values become JSON true and JSON false values, +respectively. You cna alos use C<\1> and C<\0> directly if you want. + =item blessed objects Blessed objects are not allowed. JSON::XS currently tries to encode their @@ -564,6 +667,30 @@ =back + +=head2 JSON and YAML + +You often hear that JSON is a subset (or a close subset) of YAML. This is, +however, a mass hysteria and very far from the truth. In general, there is +no way to configure JSON::XS to output a data structure as valid YAML. + +If you really must use JSON::XS to generate YAML, you should use this +algorithm (subject to change in future versions): + + my $to_yaml = JSON::XS->new->utf8->space_after (1); + my $yaml = $to_yaml->encode ($ref) . "\n"; + +This will usually generate JSON texts that also parse as valid +YAML. Please note that YAML has hardcoded limits on (simple) object key +lengths that JSON doesn't have, so you should make sure that your hash +keys are noticably shorter than the 1024 characters YAML allows. + +There might be other incompatibilities that I am not aware of. In general +you should not try to generate YAML with a JSON generator or vice versa, +or try to parse JSON with a YAML parser or vice versa: chances are high +that you will run into severe interoperability problems. + + =head2 SPEED It seems that JSON::XS is surprisingly fast, as shown in the following @@ -571,43 +698,55 @@ in the JSON::XS distribution, to make it easy to compare on your own system. -First comes a comparison between various modules using a very short JSON -string: +First comes a comparison between various modules using a very short +single-line JSON string: - {"method": "handleMessage", "params": ["user1", "we were just talking"], "id": null} + {"method": "handleMessage", "params": ["user1", "we were just talking"], \ + "id": null, "array":[1,11,234,-5,1e5,1e7, true, false]} -It shows the number of encodes/decodes per second (JSON::XS uses the -functional interface, while JSON::XS/2 uses the OO interface with -pretty-printing and hashkey sorting enabled). Higher is better: +It shows the number of encodes/decodes per second (JSON::XS uses +the functional interface, while JSON::XS/2 uses the OO interface +with pretty-printing and hashkey sorting enabled, JSON::XS/3 enables +shrink). Higher is better: + Storable | 15779.925 | 14169.946 | + -----------+------------+------------+ module | encode | decode | -----------|------------|------------| - JSON | 11488.516 | 7823.035 | - JSON::DWIW | 94708.054 | 129094.260 | - JSON::PC | 63884.157 | 128528.212 | - JSON::Syck | 34898.677 | 42096.911 | - JSON::XS | 654027.064 | 396423.669 | - JSON::XS/2 | 371564.190 | 371725.613 | + JSON | 4990.842 | 4088.813 | + JSON::DWIW | 51653.990 | 71575.154 | + JSON::PC | 65948.176 | 74631.744 | + JSON::PP | 8931.652 | 3817.168 | + JSON::Syck | 24877.248 | 27776.848 | + JSON::XS | 388361.481 | 227951.304 | + JSON::XS/2 | 227951.304 | 218453.333 | + JSON::XS/3 | 338250.323 | 218453.333 | + Storable | 16500.016 | 135300.129 | -----------+------------+------------+ -That is, JSON::XS is more than six times faster than JSON::DWIW on -encoding, more than three times faster on decoding, and about thirty times -faster than JSON, even with pretty-printing and key sorting. +That is, JSON::XS is about five times faster than JSON::DWIW on encoding, +about three times faster on decoding, and over fourty times faster +than JSON, even with pretty-printing and key sorting. It also compares +favourably to Storable for small amounts of data. Using a longer test string (roughly 18KB, generated from Yahoo! Locals search API (http://nanoref.com/yahooapis/mgPdGg): module | encode | decode | -----------|------------|------------| - JSON | 273.023 | 44.674 | - JSON::DWIW | 1089.383 | 1145.704 | - JSON::PC | 3097.419 | 2393.921 | - JSON::Syck | 514.060 | 843.053 | - JSON::XS | 6479.668 | 3636.364 | - JSON::XS/2 | 3774.221 | 3599.124 | + JSON | 55.260 | 34.971 | + JSON::DWIW | 825.228 | 1082.513 | + JSON::PC | 3571.444 | 2394.829 | + JSON::PP | 210.987 | 32.574 | + JSON::Syck | 552.551 | 787.544 | + JSON::XS | 5780.463 | 4854.519 | + JSON::XS/2 | 3869.998 | 4798.975 | + JSON::XS/3 | 5862.880 | 4798.975 | + Storable | 4445.002 | 5235.027 | -----------+------------+------------+ -Again, JSON::XS leads by far. +Again, JSON::XS leads by far (except for Storable which non-surprisingly +decodes faster). On large strings containing lots of high unicode characters, some modules (such as JSON::PC) seem to decode faster than JSON::XS, but the result @@ -630,7 +769,9 @@ resources run out, thats just fine (e.g. by using a separate process that can crash safely). The size of a JSON text in octets or characters is usually a good indication of the size of the resources required to decode -it into a Perl structure. +it into a Perl structure. While JSON::XS can check the size of the JSON +text, it might be too late when you already have it in memory, so you +might want to check the size before you accept the string. Third, JSON::XS recurses using the C stack when decoding objects and arrays. The C stack is a limited resource: for instance, on my amd64 @@ -642,9 +783,17 @@ C method. And last but least, something else could bomb you that I forgot to think -of. In that case, you get to keep the pieces. I am alway sopen for hints, +of. In that case, you get to keep the pieces. I am always open for hints, though... +If you are using JSON::XS to return packets to consumption +by javascript scripts in a browser you should have a look at +L to see wether +you are vulnerable to some common attack vectors (which really are browser +design bugs, but it is still you who will have to deal with it, as major +browser developers care only for features, not about doing security +right). + =head1 BUGS @@ -655,8 +804,26 @@ =cut -sub true() { \1 } -sub false() { \0 } +our $true = do { bless \(my $dummy = "1"), "JSON::XS::Boolean" }; +our $false = do { bless \(my $dummy = "0"), "JSON::XS::Boolean" }; + +sub true() { $true } +sub false() { $false } + +sub is_bool($) { + UNIVERSAL::isa $_[0], "JSON::XS::Boolean" +# or UNIVERSAL::isa $_[0], "JSON::Literal" +} + +XSLoader::load "JSON::XS", $VERSION; + +package JSON::XS::Boolean; + +use overload + "0+" => sub { ${$_[0]} }, + "++" => sub { $_[0] = ${$_[0]} + 1 }, + "--" => sub { $_[0] = ${$_[0]} - 1 }, + fallback => 1; 1;