--- CBOR-XS/XS.pm 2013/10/26 22:25:47 1.4 +++ CBOR-XS/XS.pm 2013/10/27 22:35:15 1.7 @@ -14,16 +14,35 @@ # OO-interface $coder = CBOR::XS->new; - #TODO + $binary_cbor_data = $coder->encode ($perl_value); + $perl_value = $coder->decode ($binary_cbor_data); + + # prefix decoding + + my $many_cbor_strings = ...; + while (length $many_cbor_strings) { + my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings); + # data was decoded + substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string + } =head1 DESCRIPTION -WARNING! THIS IS A PRE-ALPHA RELEASE! IT WILL CRASH, CORRUPT YOUR DATA AND -EAT YOUR CHILDREN! +WARNING! THIS IS A PRE-ALPHA RELEASE! IT WILL CRASH, CORRUPT YOUR DATA +AND EAT YOUR CHILDREN! (Actually, apart from being untested and a bit +feature-limited, it might already be useful). + +This module converts Perl data structures to the Concise Binary Object +Representation (CBOR) and vice versa. CBOR is a fast binary serialisation +format that aims to use a superset of the JSON data model, i.e. when you +can represent something in JSON, you should be able to represent it in +CBOR. + +This makes it a faster and more compact binary alternative to JSON, with +the added ability of supporting serialising of perl objects. -This module converts Perl data structures to CBOR and vice versa. Its -primary goal is to be I and its secondary goal is to be -I. To reach the latter goal it was written in C. +The primary goal of this module is to be I and the secondary goal +is to be I. To reach the latter goal it was written in C. See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and vice versa. @@ -34,7 +53,7 @@ use common::sense; -our $VERSION = 0.02; +our $VERSION = 0.03; our @ISA = qw(Exporter); our @EXPORT = qw(encode_cbor decode_cbor); @@ -42,6 +61,8 @@ use Exporter; use XSLoader; +use Types::Serialiser; + our $MAGIC = "\xd9\xd9\xf7"; =head1 FUNCTIONAL INTERFACE @@ -188,26 +209,36 @@ array or hash, respectively. The keys of the map will be stringified during this process. -=item true, false +=item null + +CBOR null becomes C in Perl. + +=item true, false, undefined -These CBOR values become C and C, +These CBOR values become C, +C and C, respectively. They are overloaded to act almost exactly like the numbers -C<1> and C<0>. You can check whether a scalar is a CBOR boolean by using -the C function. +C<1> and C<0> (for true and false) or to throw an exception on access (for +error). See the L manpage for details. -=item null, undefined +=item CBOR tag 256 (perl object) -CBOR null and undefined values becomes C in Perl (in the future, -Undefined may raise an exception or something else). +The tag value C<256> (TODO: pending iana registration) will be used +to deserialise a Perl object serialised with C. See "OBJECT +SERIALISATION", below, for details. -=item tags +=item CBOR tag 55799 (magic header) -Tagged items consists of a numeric tag and another CBOR value. The tag -55799 is ignored (this tag implements the magic header). +The tag 55799 is ignored (this tag implements the magic header). -All other tags are currently converted into a L object, -which is simply a blessed array reference consistsing of the numeric tag -value followed by the (decoded) BOR value. +=item other CBOR tags + +Tagged items consists of a numeric tag and another CBOR value. Tags not +handled internally are currently converted into a L +object, which is simply a blessed array reference consisting of the +numeric tag value followed by the (decoded) CBOR value. + +In the future, support for user-supplied conversions might get added. =item anything else @@ -250,16 +281,16 @@ pair. The (numerical) tag will be encoded as a CBOR tag, the value will be encoded as appropriate for the value. -=item CBOR::XS::true, CBOR::XS::false +=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error -These special values become CBOR true and CBOR false values, -respectively. You can also use C<\1> and C<\0> directly if you want. +These special values become CBOR true, CBOR false and CBOR undefined +values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly +if you want. -=item blessed objects +=item other blessed objects -Other blessed objects currently need to have a C method. It -will be called on every object that is being serialised, and must return -something that can be encoded in CBOR. +Other blessed objects are serialised via C or C. See +"OBJECT SERIALISATION", below, for details. =item simple scalars @@ -307,8 +338,105 @@ =back +=head2 OBJECT SERIALISATION + +This module knows two way to serialise a Perl object: The CBOR-specific +way, and the generic way. + +Whenever the encoder encounters a Perl object that it cnanot serialise +directly (most of them), it will first look up the C method on +it. + +If it has a C method, it will call it with the object as only +argument, and expects exactly one return value, which it will then +substitute and encode it in the place of the object. + +Otherwise, it will look up the C method. If it exists, it will +call it with the object as first argument, and the constant string C +as the second argument, to distinguish it from other serialisers. + +The C method can return any number of values (i.e. zero or +more). These will be encoded as CBOR perl object, together with the +classname. + +If an object supports neither C nor C, encoding will fail +with an error. + +Objects encoded via C cannot be automatically decoded, but +objects encoded via C can be decoded using the following protocol: + +When an encoded CBOR perl object is encountered by the decoder, it will +look up the C method, by using the stored classname, and will fail +if the method cannot be found. + +After the lookup it will call the C method with the stored classname +as first argument, the constant string C as second argument, and all +values returned by C as remaining arguments. + +=head4 EXAMPLES + +Here is an example C method: + + sub My::Object::TO_CBOR { + my ($obj) = @_; + + ["this is a serialised My::Object object", $obj->{id}] + } + +When a C is encoded to CBOR, it will instead encode a simple +array with two members: a string, and the "object id". Decoding this CBOR +string will yield a normal perl array reference in place of the object. -=head2 MAGIC HEADER +A more useful and practical example would be a serialisation method for +the URI module. CBOR has a custom tag value for URIs, namely 32: + + sub URI::TO_CBOR { + my ($self) = @_; + my $uri = "$self"; # stringify uri + utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string + CBOR::XS::tagged 32, "$_[0]" + } + +This will encode URIs as a UTF-8 string with tag 32, which indicates an +URI. + +Decoding such an URI will not (currently) give you an URI object, but +instead a CBOR::XS::Tagged object with tag number 32 and the string - +exactly what was returned by C. + +To serialise an object so it can automatically be deserialised, you need +to use C and C. To take the URI module as example, this +would be a possible implementation: + + sub URI::FREEZE { + my ($self, $serialiser) = @_; + "$self" # encode url string + } + + sub URI::THAW { + my ($class, $serialiser, $uri) = @_; + + $class->new ($uri) + } + +Unlike C, multiple values can be returned by C. For +example, a C method that returns "type", "id" and "variant" values +would cause an invocation of C with 5 arguments: + + sub My::Object::FREEZE { + my ($self, $serialiser) = @_; + + ($self->{type}, $self->{id}, $self->{variant}) + } + + sub My::Object::THAW { + my ($class, $serialiser, $type, $id, $variant) = @_; + + $class- $type, id => $id, variant => $variant) + } + + +=head1 MAGIC HEADER There is no way to distinguish CBOR from other formats programmatically. To make it easier to distinguish CBOR from other @@ -321,7 +449,7 @@ required. -=head2 CBOR and JSON +=head1 CBOR and JSON CBOR is supposed to implement a superset of the JSON data model, and is, with some coercion, able to represent all JSON texts (something that other @@ -411,34 +539,16 @@ =cut -our $true = do { bless \(my $dummy = 1), "CBOR::XS::Boolean" }; -our $false = do { bless \(my $dummy = 0), "CBOR::XS::Boolean" }; - -sub true() { $true } -sub false() { $false } - -sub is_bool($) { - UNIVERSAL::isa $_[0], "CBOR::XS::Boolean" -# or UNIVERSAL::isa $_[0], "CBOR::Literal" -} - XSLoader::load "CBOR::XS", $VERSION; -package CBOR::XS::Boolean; - -use overload - "0+" => sub { ${$_[0]} }, - "++" => sub { $_[0] = ${$_[0]} + 1 }, - "--" => sub { $_[0] = ${$_[0]} - 1 }, - fallback => 1; - -1; - =head1 SEE ALSO The L and L modules that do similar, but human-readable, serialisation. +The L module provides the data model for true, false +and error values. + =head1 AUTHOR Marc Lehmann @@ -446,3 +556,5 @@ =cut +1 +