--- JSON-XS/XS.pm 2007/06/11 03:42:57 1.40 +++ JSON-XS/XS.pm 2007/10/14 20:02:57 1.66 @@ -2,6 +2,9 @@ JSON::XS - JSON serialising/deserialising, done correctly and fast +JSON::XS - 正しくて高速な JSON シリアライザ/デシリアライザ + (http://fleur.hio.jp/perldoc/mix/lib/JSON/XS.html) + =head1 SYNOPSIS use JSON::XS; @@ -12,10 +15,6 @@ $utf8_encoded_json_text = to_json $perl_hash_or_arrayref; $perl_hash_or_arrayref = from_json $utf8_encoded_json_text; - # objToJson and jsonToObj aliases to to_json and from_json - # are exported for compatibility to the JSON module, - # but should not be used in new code. - # OO-interface $coder = JSON::XS->new->ascii->pretty->allow_nonref; @@ -87,16 +86,13 @@ use strict; -BEGIN { - our $VERSION = '1.24'; - our @ISA = qw(Exporter); +our $VERSION = '1.51'; +our @ISA = qw(Exporter); - our @EXPORT = qw(to_json from_json objToJson jsonToObj); - require Exporter; +our @EXPORT = qw(to_json from_json); - require XSLoader; - XSLoader::load JSON::XS::, $VERSION; -} +use Exporter; +use XSLoader; =head1 FUNCTIONAL INTERFACE @@ -107,9 +103,8 @@ =item $json_text = to_json $perl_scalar -Converts the given Perl data structure (a simple scalar or a reference to -a hash or array) to a UTF-8 encoded, binary string (that is, the string contains -octets only). Croaks on error. +Converts the given Perl data structure to a UTF-8 encoded, binary string +(that is, the string contains octets only). Croaks on error. This function call is functionally identical to: @@ -119,9 +114,9 @@ =item $perl_scalar = from_json $json_text -The opposite of C: expects an UTF-8 (binary) string and tries to -parse that as an UTF-8 encoded JSON text, returning the resulting simple -scalar or reference. Croaks on error. +The opposite of C: expects an UTF-8 (binary) string and tries +to parse that as an UTF-8 encoded JSON text, returning the resulting +reference. Croaks on error. This function call is functionally identical to: @@ -129,9 +124,66 @@ except being faster. +=item $is_boolean = JSON::XS::is_bool $scalar + +Returns true if the passed scalar represents either JSON::XS::true or +JSON::XS::false, two constants that act like C<1> and C<0>, respectively +and are used to represent JSON C and C values in Perl. + +See MAPPING, below, for more information on how JSON values are mapped to +Perl. + =back +=head1 A FEW NOTES ON UNICODE AND PERL + +Since this often leads to confusion, here are a few very clear words on +how Unicode works in Perl, modulo bugs. + +=over 4 + +=item 1. Perl strings can store characters with ordinal values > 255. + +This enables you to store unicode characters as single characters in a +Perl string - very natural. + +=item 2. Perl does I associate an encoding with your strings. + +Unless you force it to, e.g. when matching it against a regex, or printing +the scalar to a file, in which case Perl either interprets your string as +locale-encoded text, octets/binary, or as Unicode, depending on various +settings. In no case is an encoding stored together with your data, it is +I that decides encoding, not any magical metadata. + +=item 3. The internal utf-8 flag has no meaning with regards to the +encoding of your string. + +Just ignore that flag unless you debug a Perl bug, a module written in +XS or want to dive into the internals of perl. Otherwise it will only +confuse you, as, despite the name, it says nothing about how your string +is encoded. You can have unicode strings with that flag set, with that +flag clear, and you can have binary data with that flag set and that flag +clear. Other possibilities exist, too. + +If you didn't know about that flag, just the better, pretend it doesn't +exist. + +=item 4. A "Unicode String" is simply a string where each character can be +validly interpreted as a Unicode codepoint. + +If you have UTF-8 encoded data, it is no longer a Unicode string, but a +Unicode string encoded in UTF-8, giving you a binary string. + +=item 5. A string containing "high" (> 255) character values is I a UTF-8 string. + +Its a fact. Learn to live with it. + +=back + +I hope this helps :) + + =head1 OBJECT-ORIENTED INTERFACE The object oriented interface lets you configure your own encoding or @@ -278,6 +330,51 @@ {"key": "value"} +=item $json = $json->relaxed ([$enable]) + +If C<$enable> is true (or missing), then C will accept some +extensions to normal JSON syntax (see below). C will not be +affected in anyway. I. I suggest only to use this option to +parse application-specific files written by humans (configuration files, +resource files etc.) + +If C<$enable> is false (the default), then C will only accept +valid JSON texts. + +Currently accepted extensions are: + +=over 4 + +=item * list items can have an end-comma + +JSON I array elements and key-value pairs with commas. This +can be annoying if you write JSON texts manually and want to be able to +quickly append elements, so this extension accepts comma at the end of +such items not just between them: + + [ + 1, + 2, <- this comma not normally allowed + ] + { + "k1": "v1", + "k2": "v2", <- this comma not normally allowed + } + +=item * shell-style '#'-comments + +Whenever JSON allows whitespace, shell-style comments are additionally +allowed. They are terminated by the first carriage-return or line-feed +character, after which more white-space and comments are allowed. + + [ + 1, # this comment not allowed in JSON + # neither this one... + ] + +=back + =item $json = $json->canonical ([$enable]) If C<$enable> is true (or missing), then the C method will output JSON objects @@ -312,6 +409,118 @@ JSON::XS->new->allow_nonref->encode ("Hello, World!") => "Hello, World!" +=item $json = $json->allow_blessed ([$enable]) + +If C<$enable> is true (or missing), then the C method will not +barf when it encounters a blessed reference. Instead, the value of the +B option will decide wether C (C +disabled or no C method found) or a representation of the +object (C enabled and C method found) is being +encoded. Has no effect on C. + +If C<$enable> is false (the default), then C will throw an +exception when it encounters a blessed object. + +=item $json = $json->convert_blessed ([$enable]) + +If C<$enable> is true (or missing), then C, upon encountering a +blessed object, will check for the availability of the C method +on the object's class. If found, it will be called in scalar context +and the resulting scalar will be encoded instead of the object. If no +C method is found, the value of C will decide what +to do. + +The C method may safely call die if it wants. If C +returns other blessed objects, those will be handled in the same +way. C must take care of not causing an endless recursion cycle +(== crash) in this case. The name of C was chosen because other +methods called by the Perl core (== not by the user of the object) are +usually in upper case letters and to avoid collisions with the C +function. + +This setting does not yet influence C in any way, but in the +future, global hooks might get installed that influence C and are +enabled by this setting. + +If C<$enable> is false, then the C setting will decide what +to do when a blessed object is found. + +=item $json = $json->filter_json_object ([$coderef->($hashref)]) + +When C<$coderef> is specified, it will be called from C each +time it decodes a JSON object. The only argument is a reference to the +newly-created hash. If the code references returns a single scalar (which +need not be a reference), this value (i.e. a copy of that scalar to avoid +aliasing) is inserted into the deserialised data structure. If it returns +an empty list (NOTE: I C, which is a valid scalar), the +original deserialised hash will be inserted. This setting can slow down +decoding considerably. + +When C<$coderef> is omitted or undefined, any existing callback will +be removed and C will not change the deserialised hash in any +way. + +Example, convert all JSON objects into the integer 5: + + my $js = JSON::XS->new->filter_json_object (sub { 5 }); + # returns [5] + $js->decode ('[{}]') + # throw an exception because allow_nonref is not enabled + # so a lone 5 is not allowed. + $js->decode ('{"a":1, "b":2}'); + +=item $json = $json->filter_json_single_key_object ($key [=> $coderef->($value)]) + +Works remotely similar to C, but is only called for +JSON objects having a single key named C<$key>. + +This C<$coderef> is called before the one specified via +C, if any. It gets passed the single value in the JSON +object. If it returns a single value, it will be inserted into the data +structure. If it returns nothing (not even C but the empty list), +the callback from C will be called next, as if no +single-key callback were specified. + +If C<$coderef> is omitted or undefined, the corresponding callback will be +disabled. There can only ever be one callback for a given key. + +As this callback gets called less often then the C +one, decoding speed will not usually suffer as much. Therefore, single-key +objects make excellent targets to serialise Perl objects into, especially +as single-key JSON objects are as close to the type-tagged value concept +as JSON gets (its basically an ID/VALUE tuple). Of course, JSON does not +support this in any way, so you need to make sure your data never looks +like a serialised Perl hash. + +Typical names for the single object key are C<__class_whatever__>, or +C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even +things like C<__class_md5sum(classname)__>, to reduce the risk of clashing +with real hashes. + +Example, decode JSON objects of the form C<< { "__widget__" => } >> +into the corresponding C<< $WIDGET{} >> object: + + # return whatever is in $WIDGET{5}: + JSON::XS + ->new + ->filter_json_single_key_object (__widget__ => sub { + $WIDGET{ $_[0] } + }) + ->decode ('{"__widget__": 5') + + # this can be used with a TO_JSON method in some "widget" class + # for serialisation to json: + sub WidgetBase::TO_JSON { + my ($self) = @_; + + unless ($self->{id}) { + $self->{id} = ..get..some..id..; + $WIDGET{$self->{id}} = $self; + } + + { __widget__ => $self->{id} } + } + =item $json = $json->shrink ([$enable]) Perl usually over-allocates memory a bit when allocating space for @@ -353,8 +562,23 @@ Setting the maximum depth to one disallows any nesting, so that ensures that the object is only a single hash/object or array. -The argument to C will be rounded up to the next nearest power -of two. +The argument to C will be rounded up to the next highest power +of two. If no argument is given, the highest possible setting will be +used, which is rarely useful. + +See SECURITY CONSIDERATIONS, below, for more info on why this is useful. + +=item $json = $json->max_size ([$maximum_string_size]) + +Set the maximum length a JSON text may have (in bytes) where decoding is +being attempted. The default is C<0>, meaning no limit. When C +is called on a string longer then this number of characters it will not +attempt to decode the string but throw an exception. This setting has no +effect on C (yet). + +The argument to C will be rounded up to the next B +power of two (so may be more than requested). If no argument is given, the +limit check will be deactivated (same as when C<0> is specified). See SECURITY CONSIDERATIONS, below, for more info on why this is useful. @@ -426,18 +650,30 @@ =item number -A JSON number becomes either an integer or numeric (floating point) -scalar in perl, depending on its range and any fractional parts. On the -Perl level, there is no difference between those as Perl handles all the -conversion details, but an integer may take slightly less memory and might -represent more values exactly than (floating point) numbers. +A JSON number becomes either an integer, numeric (floating point) or +string scalar in perl, depending on its range and any fractional parts. On +the Perl level, there is no difference between those as Perl handles all +the conversion details, but an integer may take slightly less memory and +might represent more values exactly than (floating point) numbers. + +If the number consists of digits only, JSON::XS will try to represent +it as an integer value. If that fails, it will try to represent it as +a numeric (floating point) value if that is possible without loss of +precision. Otherwise it will preserve the number as a string value. + +Numbers containing a fractional or exponential part will always be +represented as numeric (floating point) values, possibly at a loss of +precision. + +This might create round-tripping problems as numbers might become strings, +but as Perl is typeless there is no other way to do it. =item true, false -These JSON atoms become C<0>, C<1>, respectively. Information is lost in -this process. Future versions might represent those values differently, -but they will be guarenteed to act like these integers would normally in -Perl. +These JSON atoms become C and C, +respectively. They are overloaded to act almost exactly like the numbers +C<1> and C<0>. You can check wether a scalar is a JSON boolean by using +the C function. =item null @@ -479,6 +715,11 @@ to_json [\0,JSON::XS::true] # yields [false,true] +=item JSON::XS::true, JSON::XS::false + +These special values become JSON true and JSON false values, +respectively. You can also use C<\1> and C<\0> directly if you want. + =item blessed objects Blessed objects are not allowed. JSON::XS currently tries to encode their @@ -616,20 +857,21 @@ however, a mass hysteria and very far from the truth. In general, there is no way to configure JSON::XS to output a data structure as valid YAML. -If you really must use JSON::XS to generate YAML, you should this +If you really must use JSON::XS to generate YAML, you should use this algorithm (subject to change in future versions): my $to_yaml = JSON::XS->new->utf8->space_after (1); my $yaml = $to_yaml->encode ($ref) . "\n"; This will usually generate JSON texts that also parse as valid -YAML. Please note that YAML has hardcoded limits on object key lengths -that JSON doesn't have, so you should make sure that your hash keys are -noticably shorter than 1024 characters. +YAML. Please note that YAML has hardcoded limits on (simple) object key +lengths that JSON doesn't have, so you should make sure that your hash +keys are noticably shorter than the 1024 characters YAML allows. There might be other incompatibilities that I am not aware of. In general you should not try to generate YAML with a JSON generator or vice versa, -or try to parse JSON with a YAML parser or vice versa. +or try to parse JSON with a YAML parser or vice versa: chances are high +that you will run into severe interoperability problems. =head2 SPEED @@ -650,16 +892,19 @@ with pretty-printing and hashkey sorting enabled, JSON::XS/3 enables shrink). Higher is better: + Storable | 15779.925 | 14169.946 | + -----------+------------+------------+ module | encode | decode | -----------|------------|------------| - JSON | 7645.468 | 4208.613 | - JSON::DWIW | 40721.398 | 77101.176 | - JSON::PC | 65948.176 | 78251.940 | - JSON::Syck | 22844.793 | 26479.192 | - JSON::XS | 388361.481 | 199728.762 | - JSON::XS/2 | 218453.333 | 192399.266 | - JSON::XS/3 | 338250.323 | 192399.266 | - Storable | 15779.925 | 14169.946 | + JSON | 4990.842 | 4088.813 | + JSON::DWIW | 51653.990 | 71575.154 | + JSON::PC | 65948.176 | 74631.744 | + JSON::PP | 8931.652 | 3817.168 | + JSON::Syck | 24877.248 | 27776.848 | + JSON::XS | 388361.481 | 227951.304 | + JSON::XS/2 | 227951.304 | 218453.333 | + JSON::XS/3 | 338250.323 | 218453.333 | + Storable | 16500.016 | 135300.129 | -----------+------------+------------+ That is, JSON::XS is about five times faster than JSON::DWIW on encoding, @@ -672,14 +917,15 @@ module | encode | decode | -----------|------------|------------| - JSON | 254.685 | 37.665 | - JSON::DWIW | 843.343 | 1049.731 | - JSON::PC | 3602.116 | 2307.352 | - JSON::Syck | 505.107 | 787.899 | - JSON::XS | 5747.196 | 3690.220 | - JSON::XS/2 | 3968.121 | 3676.634 | - JSON::XS/3 | 6105.246 | 3662.508 | - Storable | 4417.337 | 5285.161 | + JSON | 55.260 | 34.971 | + JSON::DWIW | 825.228 | 1082.513 | + JSON::PC | 3571.444 | 2394.829 | + JSON::PP | 210.987 | 32.574 | + JSON::Syck | 552.551 | 787.544 | + JSON::XS | 5780.463 | 4854.519 | + JSON::XS/2 | 3869.998 | 4798.975 | + JSON::XS/3 | 5862.880 | 4798.975 | + Storable | 4445.002 | 5235.027 | -----------+------------+------------+ Again, JSON::XS leads by far (except for Storable which non-surprisingly @@ -706,7 +952,9 @@ resources run out, thats just fine (e.g. by using a separate process that can crash safely). The size of a JSON text in octets or characters is usually a good indication of the size of the resources required to decode -it into a Perl structure. +it into a Perl structure. While JSON::XS can check the size of the JSON +text, it might be too late when you already have it in memory, so you +might want to check the size before you accept the string. Third, JSON::XS recurses using the C stack when decoding objects and arrays. The C stack is a limited resource: for instance, on my amd64 @@ -721,6 +969,24 @@ of. In that case, you get to keep the pieces. I am always open for hints, though... +If you are using JSON::XS to return packets to consumption +by javascript scripts in a browser you should have a look at +L to see wether +you are vulnerable to some common attack vectors (which really are browser +design bugs, but it is still you who will have to deal with it, as major +browser developers care only for features, not about doing security +right). + + +=head1 THREADS + +This module is I guarenteed to be thread safe and there are no +plans to change this until Perl gets thread support (as opposed to the +horribly slow so-called "threads" which are simply slow and bloated +process simulations - use fork, its I faster, cheaper, better). + +(It might actually work, but you ahve ben warned). + =head1 BUGS @@ -729,10 +995,31 @@ still relatively early in its development. If you keep reporting bugs they will be fixed swiftly, though. +Please refrain from using rt.cpan.org or any other bug reporting +service. I put the contact address into my modules for a reason. + =cut -sub true() { \1 } -sub false() { \0 } +our $true = do { bless \(my $dummy = 1), "JSON::XS::Boolean" }; +our $false = do { bless \(my $dummy = 0), "JSON::XS::Boolean" }; + +sub true() { $true } +sub false() { $false } + +sub is_bool($) { + UNIVERSAL::isa $_[0], "JSON::XS::Boolean" +# or UNIVERSAL::isa $_[0], "JSON::Literal" +} + +XSLoader::load "JSON::XS", $VERSION; + +package JSON::XS::Boolean; + +use overload + "0+" => sub { ${$_[0]} }, + "++" => sub { $_[0] = ${$_[0]} + 1 }, + "--" => sub { $_[0] = ${$_[0]} - 1 }, + fallback => 1; 1;