ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.25
Committed: Thu Nov 28 12:08:07 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.24: +17 -13 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.21 WARNING! This module is very new, and not very well tested (that's up
32     to you to do). Furthermore, details of the implementation might change
33     freely before version 1.0. And lastly, most extensions depend on an IANA
34     assignment, and until that assignment is official, this implementation is
35     not interoperable with other implementations (even future versions of this
36     module) until the assignment is done.
37 root 1.9
38     You are still invited to try out CBOR, and this module.
39 root 1.5
40     This module converts Perl data structures to the Concise Binary Object
41     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
42     format that aims to use a superset of the JSON data model, i.e. when you
43     can represent something in JSON, you should be able to represent it in
44     CBOR.
45 root 1.1
46 root 1.9 In short, CBOR is a faster and very compact binary alternative to JSON,
47 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
48     often compresses better than CBOR though, so if you plan to compress the
49     data later you might want to compare both formats first).
50 root 1.5
51 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
52     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
53     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
54     data, the worse L<Storable> performs in comparison.
55    
56     As for compactness, C<CBOR::XS> encoded data structures are usually about
57     20% smaller than the same data encoded as (compact) JSON or L<Storable>.
58 root 1.14
59 root 1.21 In addition to the core CBOR data format, this module implements a number
60     of extensions, to support cyclic and self-referencing data structures
61 root 1.25 (see C<allow_sharing>), string deduplication (see C<pack_strings>) and
62 root 1.21 scalar references (always enabled).
63    
64 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
65     is to be I<fast>. To reach the latter goal it was written in C.
66 root 1.1
67     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
68     vice versa.
69    
70     =cut
71    
72     package CBOR::XS;
73    
74     use common::sense;
75    
76 root 1.24 our $VERSION = 0.09;
77 root 1.1 our @ISA = qw(Exporter);
78    
79     our @EXPORT = qw(encode_cbor decode_cbor);
80    
81     use Exporter;
82     use XSLoader;
83    
84 root 1.6 use Types::Serialiser;
85    
86 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
87    
88 root 1.1 =head1 FUNCTIONAL INTERFACE
89    
90     The following convenience methods are provided by this module. They are
91     exported by default:
92    
93     =over 4
94    
95     =item $cbor_data = encode_cbor $perl_scalar
96    
97     Converts the given Perl data structure to CBOR representation. Croaks on
98     error.
99    
100     =item $perl_scalar = decode_cbor $cbor_data
101    
102     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
103     returning the resulting perl scalar. Croaks on error.
104    
105     =back
106    
107    
108     =head1 OBJECT-ORIENTED INTERFACE
109    
110     The object oriented interface lets you configure your own encoding or
111     decoding style, within the limits of supported formats.
112    
113     =over 4
114    
115     =item $cbor = new CBOR::XS
116    
117     Creates a new CBOR::XS object that can be used to de/encode CBOR
118     strings. All boolean flags described below are by default I<disabled>.
119    
120     The mutators for flags all return the CBOR object again and thus calls can
121     be chained:
122    
123     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
124    
125     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
126    
127     =item $max_depth = $cbor->get_max_depth
128    
129     Sets the maximum nesting level (default C<512>) accepted while encoding
130     or decoding. If a higher nesting level is detected in CBOR data or a Perl
131     data structure, then the encoder and decoder will stop and croak at that
132     point.
133    
134     Nesting level is defined by number of hash- or arrayrefs that the encoder
135     needs to traverse to reach a given point or the number of C<{> or C<[>
136     characters without their matching closing parenthesis crossed to reach a
137     given character in a string.
138    
139     Setting the maximum depth to one disallows any nesting, so that ensures
140     that the object is only a single hash/object or array.
141    
142     If no argument is given, the highest possible setting will be used, which
143     is rarely useful.
144    
145     Note that nesting is implemented by recursion in C. The default value has
146     been chosen to be as large as typical operating systems allow without
147     crashing.
148    
149     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
150    
151     =item $cbor = $cbor->max_size ([$maximum_string_size])
152    
153     =item $max_size = $cbor->get_max_size
154    
155     Set the maximum length a CBOR string may have (in bytes) where decoding
156     is being attempted. The default is C<0>, meaning no limit. When C<decode>
157     is called on a string that is longer then this many bytes, it will not
158     attempt to decode the string but throw an exception. This setting has no
159     effect on C<encode> (yet).
160    
161     If no argument is given, the limit check will be deactivated (same as when
162     C<0> is specified).
163    
164     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
165    
166 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
167    
168     =item $enabled = $cbor->get_allow_unknown
169    
170     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
171     exception when it encounters values it cannot represent in CBOR (for
172     example, filehandles) but instead will encode a CBOR C<error> value.
173    
174     If C<$enable> is false (the default), then C<encode> will throw an
175     exception when it encounters anything it cannot encode as CBOR.
176    
177     This option does not affect C<decode> in any way, and it is recommended to
178     leave it off unless you know your communications partner.
179    
180 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
181 root 1.19
182 root 1.20 =item $enabled = $cbor->get_allow_sharing
183 root 1.19
184     If C<$enable> is true (or missing), then C<encode> will not double-encode
185 root 1.20 values that have been referenced before (e.g. when the same object, such
186     as an array, is referenced multiple times), but instead will emit a
187     reference to the earlier value.
188 root 1.19
189     This means that such values will only be encoded once, and will not result
190     in a deep cloning of the value on decode, in decoders supporting the value
191 root 1.25 sharing extension. This also makes it possible to encode cyclic data
192     structures.
193 root 1.19
194 root 1.21 It is recommended to leave it off unless you know your
195     communication partner supports the value sharing extensions to CBOR
196 root 1.25 (http://cbor.schmorp.de/value-sharing), as without decoder support, the
197     resulting data structure might be unusable.
198 root 1.21
199 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
200     that have a reference counter large than one, and might unnecessarily
201     increase the encoded size, as potentially shared values are encode as
202     sharable whether or not they are actually shared.
203    
204 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
205     arrays or hashes pointed to by a reference). Weirder constructs, such as
206     an array with multiple "copies" of the I<same> string, which are hard but
207     not impossible to create in Perl, are not supported (this is the same as
208 root 1.25 with L<Storable>).
209 root 1.19
210 root 1.25 If C<$enable> is false (the default), then C<encode> will encode shared
211     data structures repeatedly, unsharing them in the process. Cyclic data
212     structures cannot be encoded in this mode.
213 root 1.19
214     This option does not affect C<decode> in any way - shared values and
215 root 1.21 references will always be decoded properly if present.
216    
217 root 1.25 =item $cbor = $cbor->pack_strings ([$enable])
218 root 1.21
219 root 1.25 =item $enabled = $cbor->get_pack_strings
220 root 1.21
221     If C<$enable> is true (or missing), then C<encode> will try not to encode
222     the same string twice, but will instead encode a reference to the string
223 root 1.25 instead. Depending on your data format, this can save a lot of space, but
224 root 1.21 also results in a very large runtime overhead (expect encoding times to be
225     2-4 times as high as without).
226    
227     It is recommended to leave it off unless you know your
228     communications partner supports the stringref extension to CBOR
229 root 1.25 (http://cbor.schmorp.de/stringref), as without decoder support, the
230     resulting data structure might not be usable.
231 root 1.21
232 root 1.25 If C<$enable> is false (the default), then C<encode> will encode strings
233     the standard CBOR way.
234 root 1.21
235     This option does not affect C<decode> in any way - string references will
236     always be decoded properly if present.
237 root 1.19
238 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
239    
240     =item $cb_or_undef = $cbor->get_filter
241    
242 root 1.24 Sets or replaces the tagged value decoding filter (when C<$cb> is
243     specified) or clears the filter (if no argument or C<undef> is provided).
244    
245     The filter callback is called only during decoding, when a non-enforced
246     tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
247     list of enforced tags). For specific tags, it's often better to provide a
248     default converter using the C<%CBOR::XS::FILTER> hash (see below).
249    
250     The first argument is the numerical tag, the second is the (decoded) value
251     that has been tagged.
252    
253     The filter function should return either exactly one value, which will
254     replace the tagged value in the decoded data structure, or no values,
255     which will result in default handling, which currently means the decoder
256     creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
257    
258     When the filter is cleared (the default state), the default filter
259     function, C<CBOR::XS::default_filter>, is used. This function simply looks
260     up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
261     a code reference that is called with tag and value, and is responsible for
262     decoding the value. If no entry exists, it returns no values.
263    
264     Example: decode all tags not handled internally into CBOR::XS::Tagged
265     objects, with no other special handling (useful when working with
266     potentially "unsafe" CBOR data).
267    
268     CBOR::XS->new->filter (sub { })->decode ($cbor_data);
269    
270     Example: provide a global filter for tag 1347375694, converting the value
271     into some string form.
272    
273     $CBOR::XS::FILTER{1347375694} = sub {
274     my ($tag, $value);
275    
276     "tag 1347375694 value $value"
277     };
278 root 1.23
279 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
280    
281     Converts the given Perl data structure (a scalar value) to its CBOR
282     representation.
283    
284     =item $perl_scalar = $cbor->decode ($cbor_data)
285    
286     The opposite of C<encode>: expects CBOR data and tries to parse it,
287     returning the resulting simple scalar or reference. Croaks on error.
288    
289     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
290    
291     This works like the C<decode> method, but instead of raising an exception
292     when there is trailing garbage after the CBOR string, it will silently
293     stop parsing there and return the number of characters consumed so far.
294    
295     This is useful if your CBOR texts are not delimited by an outer protocol
296     and you need to know where the first CBOR string ends amd the next one
297     starts.
298    
299     CBOR::XS->new->decode_prefix ("......")
300     => ("...", 3)
301    
302     =back
303    
304    
305     =head1 MAPPING
306    
307     This section describes how CBOR::XS maps Perl values to CBOR values and
308     vice versa. These mappings are designed to "do the right thing" in most
309     circumstances automatically, preserving round-tripping characteristics
310     (what you put in comes out as something equivalent).
311    
312     For the more enlightened: note that in the following descriptions,
313     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
314     refers to the abstract Perl language itself.
315    
316    
317     =head2 CBOR -> PERL
318    
319     =over 4
320    
321 root 1.4 =item integers
322    
323     CBOR integers become (numeric) perl scalars. On perls without 64 bit
324     support, 64 bit integers will be truncated or otherwise corrupted.
325    
326     =item byte strings
327    
328     Byte strings will become octet strings in Perl (the byte values 0..255
329     will simply become characters of the same value in Perl).
330    
331     =item UTF-8 strings
332    
333     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
334     decoded into proper Unicode code points. At the moment, the validity of
335     the UTF-8 octets will not be validated - corrupt input will result in
336     corrupted Perl strings.
337    
338     =item arrays, maps
339    
340     CBOR arrays and CBOR maps will be converted into references to a Perl
341     array or hash, respectively. The keys of the map will be stringified
342     during this process.
343    
344 root 1.6 =item null
345    
346     CBOR null becomes C<undef> in Perl.
347    
348     =item true, false, undefined
349 root 1.1
350 root 1.6 These CBOR values become C<Types:Serialiser::true>,
351     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
352 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
353 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
354     error). See the L<Types::Serialiser> manpage for details.
355    
356 root 1.23 =item tagged values
357 root 1.1
358 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
359 root 1.4
360 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
361     for details.
362 root 1.4
363     =item anything else
364    
365     Anything else (e.g. unsupported simple values) will raise a decoding
366     error.
367 root 1.1
368     =back
369    
370    
371     =head2 PERL -> CBOR
372    
373     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
374     truly typeless language, so we can only guess which CBOR type is meant by
375     a Perl value.
376    
377     =over 4
378    
379     =item hash references
380    
381 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
382     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
383     order.
384    
385     Currently, tied hashes will use the indefinite-length format, while normal
386     hashes will use the fixed-length format.
387 root 1.1
388     =item array references
389    
390 root 1.4 Perl array references become fixed-length CBOR arrays.
391 root 1.1
392     =item other references
393    
394     Other unblessed references are generally not allowed and will cause an
395     exception to be thrown, except for references to the integers C<0> and
396 root 1.4 C<1>, which get turned into false and true in CBOR.
397    
398     =item CBOR::XS::Tagged objects
399    
400     Objects of this type must be arrays consisting of a single C<[tag, value]>
401 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
402     be encoded as appropriate for the value. You cna use C<CBOR::XS::tag> to
403     create such objects.
404 root 1.1
405 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
406 root 1.1
407 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
408     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
409     if you want.
410 root 1.1
411 root 1.7 =item other blessed objects
412 root 1.1
413 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
414 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
415     module, and L<OBJECT SERIALISATION> for generic object serialisation.
416 root 1.1
417     =item simple scalars
418    
419     Simple Perl scalars (any scalar that is not a reference) are the most
420     difficult objects to encode: CBOR::XS will encode undefined scalars as
421 root 1.4 CBOR null values, scalars that have last been used in a string context
422 root 1.1 before encoding as CBOR strings, and anything else as number value:
423    
424     # dump as number
425     encode_cbor [2] # yields [2]
426     encode_cbor [-3.0e17] # yields [-3e+17]
427     my $value = 5; encode_cbor [$value] # yields [5]
428    
429     # used as string, so dump as string
430     print $value;
431     encode_cbor [$value] # yields ["5"]
432    
433     # undef becomes null
434     encode_cbor [undef] # yields [null]
435    
436     You can force the type to be a CBOR string by stringifying it:
437    
438     my $x = 3.1; # some variable containing a number
439     "$x"; # stringified
440     $x .= ""; # another, more awkward way to stringify
441     print $x; # perl does it for you, too, quite often
442    
443     You can force the type to be a CBOR number by numifying it:
444    
445     my $x = "3"; # some variable containing a string
446     $x += 0; # numify it, ensuring it will be dumped as a number
447     $x *= 1; # same thing, the choice is yours.
448    
449     You can not currently force the type in other, less obscure, ways. Tell me
450     if you need this capability (but don't forget to explain why it's needed
451     :).
452    
453 root 1.4 Perl values that seem to be integers generally use the shortest possible
454     representation. Floating-point values will use either the IEEE single
455     format if possible without loss of precision, otherwise the IEEE double
456     format will be used. Perls that use formats other than IEEE double to
457     represent numerical values are supported, but might suffer loss of
458     precision.
459 root 1.1
460     =back
461    
462 root 1.7 =head2 OBJECT SERIALISATION
463    
464     This module knows two way to serialise a Perl object: The CBOR-specific
465     way, and the generic way.
466    
467     Whenever the encoder encounters a Perl object that it cnanot serialise
468     directly (most of them), it will first look up the C<TO_CBOR> method on
469     it.
470    
471     If it has a C<TO_CBOR> method, it will call it with the object as only
472     argument, and expects exactly one return value, which it will then
473     substitute and encode it in the place of the object.
474    
475     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
476     call it with the object as first argument, and the constant string C<CBOR>
477     as the second argument, to distinguish it from other serialisers.
478    
479     The C<FREEZE> method can return any number of values (i.e. zero or
480     more). These will be encoded as CBOR perl object, together with the
481     classname.
482    
483     If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
484     with an error.
485    
486     Objects encoded via C<TO_CBOR> cannot be automatically decoded, but
487     objects encoded via C<FREEZE> can be decoded using the following protocol:
488    
489     When an encoded CBOR perl object is encountered by the decoder, it will
490     look up the C<THAW> method, by using the stored classname, and will fail
491     if the method cannot be found.
492    
493     After the lookup it will call the C<THAW> method with the stored classname
494     as first argument, the constant string C<CBOR> as second argument, and all
495     values returned by C<FREEZE> as remaining arguments.
496    
497     =head4 EXAMPLES
498    
499     Here is an example C<TO_CBOR> method:
500    
501     sub My::Object::TO_CBOR {
502     my ($obj) = @_;
503    
504     ["this is a serialised My::Object object", $obj->{id}]
505     }
506    
507     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
508     array with two members: a string, and the "object id". Decoding this CBOR
509     string will yield a normal perl array reference in place of the object.
510    
511     A more useful and practical example would be a serialisation method for
512     the URI module. CBOR has a custom tag value for URIs, namely 32:
513    
514     sub URI::TO_CBOR {
515     my ($self) = @_;
516     my $uri = "$self"; # stringify uri
517     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
518     CBOR::XS::tagged 32, "$_[0]"
519     }
520    
521     This will encode URIs as a UTF-8 string with tag 32, which indicates an
522     URI.
523    
524     Decoding such an URI will not (currently) give you an URI object, but
525     instead a CBOR::XS::Tagged object with tag number 32 and the string -
526     exactly what was returned by C<TO_CBOR>.
527    
528     To serialise an object so it can automatically be deserialised, you need
529     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
530     would be a possible implementation:
531    
532     sub URI::FREEZE {
533     my ($self, $serialiser) = @_;
534     "$self" # encode url string
535     }
536    
537     sub URI::THAW {
538     my ($class, $serialiser, $uri) = @_;
539    
540     $class->new ($uri)
541     }
542    
543     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
544     example, a C<FREEZE> method that returns "type", "id" and "variant" values
545     would cause an invocation of C<THAW> with 5 arguments:
546    
547     sub My::Object::FREEZE {
548     my ($self, $serialiser) = @_;
549    
550     ($self->{type}, $self->{id}, $self->{variant})
551     }
552    
553     sub My::Object::THAW {
554     my ($class, $serialiser, $type, $id, $variant) = @_;
555    
556     $class-<new (type => $type, id => $id, variant => $variant)
557     }
558    
559 root 1.1
560 root 1.7 =head1 MAGIC HEADER
561 root 1.3
562     There is no way to distinguish CBOR from other formats
563     programmatically. To make it easier to distinguish CBOR from other
564     formats, the CBOR specification has a special "magic string" that can be
565 root 1.18 prepended to any CBOR string without changing its meaning.
566 root 1.3
567     This string is available as C<$CBOR::XS::MAGIC>. This module does not
568 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
569 root 1.3 if present, so users can prepend this string as a "file type" indicator as
570     required.
571    
572    
573 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
574    
575     CBOR has the concept of tagged values - any CBOR value can be tagged with
576     a numeric 64 bit number, which are centrally administered.
577    
578     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
579     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
580     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
581     unknown tag.
582    
583     These objects are simply blessed array references - the first member of
584     the array being the numerical tag, the second being the value.
585    
586     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
587    
588     =over 4
589    
590     =item $tagged = CBOR::XS::tag $tag, $value
591    
592     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
593     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
594     value that can be encoded in CBOR, including serialisable Perl objects and
595     C<CBOR::XS::Tagged> objects).
596    
597     =item $tagged->[0]
598    
599     =item $tagged->[0] = $new_tag
600    
601     =item $tag = $tagged->tag
602    
603     =item $new_tag = $tagged->tag ($new_tag)
604    
605     Access/mutate the tag.
606    
607     =item $tagged->[1]
608    
609     =item $tagged->[1] = $new_value
610    
611     =item $value = $tagged->value
612    
613     =item $new_value = $tagged->value ($new_value)
614    
615     Access/mutate the tagged value.
616    
617     =back
618    
619     =cut
620    
621     sub tag($$) {
622     bless [@_], CBOR::XS::Tagged::;
623     }
624    
625     sub CBOR::XS::Tagged::tag {
626     $_[0][0] = $_[1] if $#_;
627     $_[0][0]
628     }
629    
630     sub CBOR::XS::Tagged::value {
631     $_[0][1] = $_[1] if $#_;
632     $_[0][1]
633     }
634    
635 root 1.13 =head2 EXAMPLES
636    
637     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
638    
639     You can look up CBOR tag value and emanings in the IANA registry at
640     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
641    
642     Prepend a magic header (C<$CBOR::XS::MAGIC>):
643    
644     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
645     # same as:
646     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
647    
648     Serialise some URIs and a regex in an array:
649    
650     my $cbor = encode_cbor [
651     (CBOR::XS::tag 32, "http://www.nethype.de/"),
652     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
653     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
654     ];
655    
656     Wrap CBOR data in CBOR:
657    
658     my $cbor_cbor = encode_cbor
659     CBOR::XS::tag 24,
660     encode_cbor [1, 2, 3];
661    
662 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
663    
664 root 1.22 This section describes how this module handles specific tagged values
665     and extensions. If a tag is not mentioned here and no additional filters
666     are provided for it, then the default handling applies (creating a
667     CBOR::XS::Tagged object on decoding, and only encoding the tag when
668     explicitly requested).
669 root 1.19
670 root 1.23 Tags not handled specifically are currently converted into a
671     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
672     consisting of the numeric tag value followed by the (decoded) CBOR value.
673    
674 root 1.19 Future versions of this module reserve the right to special case
675 root 1.22 additional tags (such as base64url).
676    
677     =head2 ENFORCED TAGS
678    
679     These tags are always handled when decoding, and their handling cannot be
680     overriden by the user.
681 root 1.19
682     =over 4
683    
684     =item <unassigned> (perl-object, L<http://cbor.schmorp.de/perl-object>)
685    
686 root 1.23 These tags are automatically created (and decoded) for serialisable
687     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
688     serialisation protocol). See L<OBJECT SERIALISATION> for details.
689 root 1.19
690     =item <unassigned>, <unassigned> (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
691    
692     These tags are automatically decoded when encountered, resulting in
693     shared values in the decoded object. They are only encoded, however, when
694     C<allow_sharable> is enabled.
695    
696 root 1.21 =item <unassigned>, <unassigned> (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
697    
698     These tags are automatically decoded when encountered. They are only
699 root 1.25 encoded, however, when C<pack_strings> is enabled.
700 root 1.21
701 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
702    
703     This tag is automatically generated when a reference are encountered (with
704     the exception of hash and array refernces). It is converted to a reference
705     when decoding.
706    
707     =item 55799 (self-describe CBOR, RFC 7049)
708    
709     This value is not generated on encoding (unless explicitly requested by
710     the user), and is simply ignored when decoding.
711    
712     =back
713    
714 root 1.24 =head2 NON-ENFORCED TAGS
715 root 1.22
716     These tags have default filters provided when decoding. Their handling can
717     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
718 root 1.24 providing a custom C<filter> callback when decoding.
719 root 1.22
720     When they result in decoding into a specific Perl class, the module
721     usually provides a corresponding C<TO_CBOR> method as well.
722    
723     When any of these need to load additional modules that are not part of the
724     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
725     provide these modules. The decoding usually fails with an exception if the
726     required module cannot be loaded.
727    
728     =over 4
729    
730     =item 2, 3 (positive/negative bignum)
731    
732     These tags are decoded into L<Math::BigInt> objects. The corresponding
733     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
734     integers, and others into positive/negative CBOR bignums.
735    
736     =item 4, 5 (decimal fraction/bigfloat)
737    
738     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
739     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
740     encodes into a decimal fraction.
741    
742     CBOR cannot represent bigfloats with I<very> large exponents - conversion
743     of such big float objects is undefined.
744    
745     Also, NaN and infinities are not encoded properly.
746    
747     =item 21, 22, 23 (expected later JSON conversion)
748    
749     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
750     tags.
751    
752     =item 32 (URI)
753    
754     These objects decode into L<URI> objects. The corresponding
755     C<URI::TO_CBOR> method again results in a CBOR URI value.
756    
757     =back
758    
759     =cut
760    
761     our %FILTER = (
762     # 0 # rfc4287 datetime, utf-8
763     # 1 # unix timestamp, any
764    
765     2 => sub { # pos bigint
766     require Math::BigInt;
767     Math::BigInt->new ("0x" . unpack "H*", pop)
768     },
769    
770     3 => sub { # neg bigint
771     require Math::BigInt;
772     -Math::BigInt->new ("0x" . unpack "H*", pop)
773     },
774    
775     4 => sub { # decimal fraction, array
776     require Math::BigFloat;
777     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
778     },
779    
780     5 => sub { # bigfloat, array
781     require Math::BigFloat;
782     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
783     },
784    
785     21 => sub { pop }, # expected conversion to base64url encoding
786     22 => sub { pop }, # expected conversion to base64 encoding
787     23 => sub { pop }, # expected conversion to base16 encoding
788    
789     # 24 # embedded cbor, byte string
790    
791     32 => sub {
792     require URI;
793     URI->new (pop)
794     },
795    
796     # 33 # base64url rfc4648, utf-8
797     # 34 # base64 rfc46484, utf-8
798     # 35 # regex pcre/ecma262, utf-8
799     # 36 # mime message rfc2045, utf-8
800     );
801    
802 root 1.19
803 root 1.7 =head1 CBOR and JSON
804 root 1.1
805 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
806     with some coercion, able to represent all JSON texts (something that other
807     "binary JSON" formats such as BSON generally do not support).
808    
809     CBOR implements some extra hints and support for JSON interoperability,
810     and the spec offers further guidance for conversion between CBOR and
811     JSON. None of this is currently implemented in CBOR, and the guidelines
812     in the spec do not result in correct round-tripping of data. If JSON
813     interoperability is improved in the future, then the goal will be to
814     ensure that decoded JSON data will round-trip encoding and decoding to
815     CBOR intact.
816 root 1.1
817    
818     =head1 SECURITY CONSIDERATIONS
819    
820     When you are using CBOR in a protocol, talking to untrusted potentially
821     hostile creatures requires relatively few measures.
822    
823     First of all, your CBOR decoder should be secure, that is, should not have
824     any buffer overflows. Obviously, this module should ensure that and I am
825     trying hard on making that true, but you never know.
826    
827     Second, you need to avoid resource-starving attacks. That means you should
828     limit the size of CBOR data you accept, or make sure then when your
829     resources run out, that's just fine (e.g. by using a separate process that
830     can crash safely). The size of a CBOR string in octets is usually a good
831     indication of the size of the resources required to decode it into a Perl
832     structure. While CBOR::XS can check the size of the CBOR text, it might be
833     too late when you already have it in memory, so you might want to check
834     the size before you accept the string.
835    
836     Third, CBOR::XS recurses using the C stack when decoding objects and
837     arrays. The C stack is a limited resource: for instance, on my amd64
838     machine with 8MB of stack size I can decode around 180k nested arrays but
839     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
840     to free the temporary). If that is exceeded, the program crashes. To be
841     conservative, the default nesting limit is set to 512. If your process
842     has a smaller stack, you should adjust this setting accordingly with the
843     C<max_depth> method.
844    
845     Something else could bomb you, too, that I forgot to think of. In that
846     case, you get to keep the pieces. I am always open for hints, though...
847    
848     Also keep in mind that CBOR::XS might leak contents of your Perl data
849     structures in its error messages, so when you serialise sensitive
850     information you might want to make sure that exceptions thrown by CBOR::XS
851     will not end up in front of untrusted eyes.
852    
853     =head1 CBOR IMPLEMENTATION NOTES
854    
855     This section contains some random implementation notes. They do not
856     describe guaranteed behaviour, but merely behaviour as-is implemented
857     right now.
858    
859     64 bit integers are only properly decoded when Perl was built with 64 bit
860     support.
861    
862     Strings and arrays are encoded with a definite length. Hashes as well,
863     unless they are tied (or otherwise magical).
864    
865     Only the double data type is supported for NV data types - when Perl uses
866     long double to represent floating point values, they might not be encoded
867     properly. Half precision types are accepted, but not encoded.
868    
869     Strict mode and canonical mode are not implemented.
870    
871    
872     =head1 THREADS
873    
874     This module is I<not> guaranteed to be thread safe and there are no
875     plans to change this until Perl gets thread support (as opposed to the
876     horribly slow so-called "threads" which are simply slow and bloated
877     process simulations - use fork, it's I<much> faster, cheaper, better).
878    
879     (It might actually work, but you have been warned).
880    
881    
882     =head1 BUGS
883    
884     While the goal of this module is to be correct, that unfortunately does
885     not mean it's bug-free, only that I think its design is bug-free. If you
886     keep reporting bugs they will be fixed swiftly, though.
887    
888     Please refrain from using rt.cpan.org or any other bug reporting
889     service. I put the contact address into my modules for a reason.
890    
891     =cut
892    
893 root 1.22 our %FILTER = (
894     # 0 # rfc4287 datetime, utf-8
895     # 1 # unix timestamp, any
896    
897     2 => sub { # pos bigint
898     require Math::BigInt;
899     Math::BigInt->new ("0x" . unpack "H*", pop)
900     },
901    
902     3 => sub { # neg bigint
903     require Math::BigInt;
904     -Math::BigInt->new ("0x" . unpack "H*", pop)
905     },
906    
907     4 => sub { # decimal fraction, array
908     require Math::BigFloat;
909     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
910     },
911    
912     5 => sub { # bigfloat, array
913     require Math::BigFloat;
914     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
915     },
916    
917     21 => sub { pop }, # expected conversion to base64url encoding
918     22 => sub { pop }, # expected conversion to base64 encoding
919     23 => sub { pop }, # expected conversion to base16 encoding
920    
921     # 24 # embedded cbor, byte string
922    
923     32 => sub {
924     require URI;
925     URI->new (pop)
926     },
927    
928     # 33 # base64url rfc4648, utf-8
929     # 34 # base64 rfc46484, utf-8
930     # 35 # regex pcre/ecma262, utf-8
931     # 36 # mime message rfc2045, utf-8
932     );
933    
934     sub CBOR::XS::default_filter {
935     &{ $FILTER{$_[0]} or return }
936     }
937    
938     sub URI::TO_CBOR {
939     my $uri = $_[0]->as_string;
940     utf8::upgrade $uri;
941     CBOR::XS::tag 32, $uri
942     }
943    
944     sub Math::BigInt::TO_CBOR {
945     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
946     $_[0]->numify
947     } else {
948     my $hex = substr $_[0]->as_hex, 2;
949     $hex = "0$hex" if 1 & length $hex; # sigh
950     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
951     }
952     }
953    
954     sub Math::BigFloat::TO_CBOR {
955     my ($m, $e) = $_[0]->parts;
956     CBOR::XS::tag 4, [$e->numify, $m]
957     }
958    
959 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
960    
961     =head1 SEE ALSO
962    
963     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
964     serialisation.
965    
966 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
967     and error values.
968    
969 root 1.1 =head1 AUTHOR
970    
971     Marc Lehmann <schmorp@schmorp.de>
972     http://home.schmorp.de/
973    
974     =cut
975    
976 root 1.6 1
977