ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.24
Committed: Fri Nov 22 16:18:59 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-0_09
Changes since 1.23: +39 -4 lines
Log Message:
0.09

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.21 WARNING! This module is very new, and not very well tested (that's up
32     to you to do). Furthermore, details of the implementation might change
33     freely before version 1.0. And lastly, most extensions depend on an IANA
34     assignment, and until that assignment is official, this implementation is
35     not interoperable with other implementations (even future versions of this
36     module) until the assignment is done.
37 root 1.9
38     You are still invited to try out CBOR, and this module.
39 root 1.5
40     This module converts Perl data structures to the Concise Binary Object
41     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
42     format that aims to use a superset of the JSON data model, i.e. when you
43     can represent something in JSON, you should be able to represent it in
44     CBOR.
45 root 1.1
46 root 1.9 In short, CBOR is a faster and very compact binary alternative to JSON,
47 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
48     often compresses better than CBOR though, so if you plan to compress the
49     data later you might want to compare both formats first).
50 root 1.5
51 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
52     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
53     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
54     data, the worse L<Storable> performs in comparison.
55    
56     As for compactness, C<CBOR::XS> encoded data structures are usually about
57     20% smaller than the same data encoded as (compact) JSON or L<Storable>.
58 root 1.14
59 root 1.21 In addition to the core CBOR data format, this module implements a number
60     of extensions, to support cyclic and self-referencing data structures
61     (see C<allow_sharing>), string deduplication (see C<allow_stringref>) and
62     scalar references (always enabled).
63    
64 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
65     is to be I<fast>. To reach the latter goal it was written in C.
66 root 1.1
67     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
68     vice versa.
69    
70     =cut
71    
72     package CBOR::XS;
73    
74     use common::sense;
75    
76 root 1.24 our $VERSION = 0.09;
77 root 1.1 our @ISA = qw(Exporter);
78    
79     our @EXPORT = qw(encode_cbor decode_cbor);
80    
81     use Exporter;
82     use XSLoader;
83    
84 root 1.6 use Types::Serialiser;
85    
86 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
87    
88 root 1.1 =head1 FUNCTIONAL INTERFACE
89    
90     The following convenience methods are provided by this module. They are
91     exported by default:
92    
93     =over 4
94    
95     =item $cbor_data = encode_cbor $perl_scalar
96    
97     Converts the given Perl data structure to CBOR representation. Croaks on
98     error.
99    
100     =item $perl_scalar = decode_cbor $cbor_data
101    
102     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
103     returning the resulting perl scalar. Croaks on error.
104    
105     =back
106    
107    
108     =head1 OBJECT-ORIENTED INTERFACE
109    
110     The object oriented interface lets you configure your own encoding or
111     decoding style, within the limits of supported formats.
112    
113     =over 4
114    
115     =item $cbor = new CBOR::XS
116    
117     Creates a new CBOR::XS object that can be used to de/encode CBOR
118     strings. All boolean flags described below are by default I<disabled>.
119    
120     The mutators for flags all return the CBOR object again and thus calls can
121     be chained:
122    
123     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
124    
125     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
126    
127     =item $max_depth = $cbor->get_max_depth
128    
129     Sets the maximum nesting level (default C<512>) accepted while encoding
130     or decoding. If a higher nesting level is detected in CBOR data or a Perl
131     data structure, then the encoder and decoder will stop and croak at that
132     point.
133    
134     Nesting level is defined by number of hash- or arrayrefs that the encoder
135     needs to traverse to reach a given point or the number of C<{> or C<[>
136     characters without their matching closing parenthesis crossed to reach a
137     given character in a string.
138    
139     Setting the maximum depth to one disallows any nesting, so that ensures
140     that the object is only a single hash/object or array.
141    
142     If no argument is given, the highest possible setting will be used, which
143     is rarely useful.
144    
145     Note that nesting is implemented by recursion in C. The default value has
146     been chosen to be as large as typical operating systems allow without
147     crashing.
148    
149     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
150    
151     =item $cbor = $cbor->max_size ([$maximum_string_size])
152    
153     =item $max_size = $cbor->get_max_size
154    
155     Set the maximum length a CBOR string may have (in bytes) where decoding
156     is being attempted. The default is C<0>, meaning no limit. When C<decode>
157     is called on a string that is longer then this many bytes, it will not
158     attempt to decode the string but throw an exception. This setting has no
159     effect on C<encode> (yet).
160    
161     If no argument is given, the limit check will be deactivated (same as when
162     C<0> is specified).
163    
164     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
165    
166 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
167    
168     =item $enabled = $cbor->get_allow_unknown
169    
170     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
171     exception when it encounters values it cannot represent in CBOR (for
172     example, filehandles) but instead will encode a CBOR C<error> value.
173    
174     If C<$enable> is false (the default), then C<encode> will throw an
175     exception when it encounters anything it cannot encode as CBOR.
176    
177     This option does not affect C<decode> in any way, and it is recommended to
178     leave it off unless you know your communications partner.
179    
180 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
181 root 1.19
182 root 1.20 =item $enabled = $cbor->get_allow_sharing
183 root 1.19
184     If C<$enable> is true (or missing), then C<encode> will not double-encode
185 root 1.20 values that have been referenced before (e.g. when the same object, such
186     as an array, is referenced multiple times), but instead will emit a
187     reference to the earlier value.
188 root 1.19
189     This means that such values will only be encoded once, and will not result
190     in a deep cloning of the value on decode, in decoders supporting the value
191     sharing extension.
192    
193 root 1.21 It is recommended to leave it off unless you know your
194     communication partner supports the value sharing extensions to CBOR
195     (http://cbor.schmorp.de/value-sharing).
196    
197 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
198     that have a reference counter large than one, and might unnecessarily
199     increase the encoded size, as potentially shared values are encode as
200     sharable whether or not they are actually shared.
201    
202 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
203     arrays or hashes pointed to by a reference). Weirder constructs, such as
204     an array with multiple "copies" of the I<same> string, which are hard but
205     not impossible to create in Perl, are not supported (this is the same as
206     for L<Storable>).
207 root 1.19
208     If C<$enable> is false (the default), then C<encode> will encode
209     exception when it encounters anything it cannot encode as CBOR.
210    
211     This option does not affect C<decode> in any way - shared values and
212 root 1.21 references will always be decoded properly if present.
213    
214     =item $cbor = $cbor->allow_stringref ([$enable])
215    
216     =item $enabled = $cbor->get_allow_stringref
217    
218     If C<$enable> is true (or missing), then C<encode> will try not to encode
219     the same string twice, but will instead encode a reference to the string
220     instead. Depending on your data format. this can save a lot of space, but
221     also results in a very large runtime overhead (expect encoding times to be
222     2-4 times as high as without).
223    
224     It is recommended to leave it off unless you know your
225     communications partner supports the stringref extension to CBOR
226     (http://cbor.schmorp.de/stringref).
227    
228     If C<$enable> is false (the default), then C<encode> will encode
229     exception when it encounters anything it cannot encode as CBOR.
230    
231     This option does not affect C<decode> in any way - string references will
232     always be decoded properly if present.
233 root 1.19
234 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
235    
236     =item $cb_or_undef = $cbor->get_filter
237    
238 root 1.24 Sets or replaces the tagged value decoding filter (when C<$cb> is
239     specified) or clears the filter (if no argument or C<undef> is provided).
240    
241     The filter callback is called only during decoding, when a non-enforced
242     tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
243     list of enforced tags). For specific tags, it's often better to provide a
244     default converter using the C<%CBOR::XS::FILTER> hash (see below).
245    
246     The first argument is the numerical tag, the second is the (decoded) value
247     that has been tagged.
248    
249     The filter function should return either exactly one value, which will
250     replace the tagged value in the decoded data structure, or no values,
251     which will result in default handling, which currently means the decoder
252     creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
253    
254     When the filter is cleared (the default state), the default filter
255     function, C<CBOR::XS::default_filter>, is used. This function simply looks
256     up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
257     a code reference that is called with tag and value, and is responsible for
258     decoding the value. If no entry exists, it returns no values.
259    
260     Example: decode all tags not handled internally into CBOR::XS::Tagged
261     objects, with no other special handling (useful when working with
262     potentially "unsafe" CBOR data).
263    
264     CBOR::XS->new->filter (sub { })->decode ($cbor_data);
265    
266     Example: provide a global filter for tag 1347375694, converting the value
267     into some string form.
268    
269     $CBOR::XS::FILTER{1347375694} = sub {
270     my ($tag, $value);
271    
272     "tag 1347375694 value $value"
273     };
274 root 1.23
275 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
276    
277     Converts the given Perl data structure (a scalar value) to its CBOR
278     representation.
279    
280     =item $perl_scalar = $cbor->decode ($cbor_data)
281    
282     The opposite of C<encode>: expects CBOR data and tries to parse it,
283     returning the resulting simple scalar or reference. Croaks on error.
284    
285     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
286    
287     This works like the C<decode> method, but instead of raising an exception
288     when there is trailing garbage after the CBOR string, it will silently
289     stop parsing there and return the number of characters consumed so far.
290    
291     This is useful if your CBOR texts are not delimited by an outer protocol
292     and you need to know where the first CBOR string ends amd the next one
293     starts.
294    
295     CBOR::XS->new->decode_prefix ("......")
296     => ("...", 3)
297    
298     =back
299    
300    
301     =head1 MAPPING
302    
303     This section describes how CBOR::XS maps Perl values to CBOR values and
304     vice versa. These mappings are designed to "do the right thing" in most
305     circumstances automatically, preserving round-tripping characteristics
306     (what you put in comes out as something equivalent).
307    
308     For the more enlightened: note that in the following descriptions,
309     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
310     refers to the abstract Perl language itself.
311    
312    
313     =head2 CBOR -> PERL
314    
315     =over 4
316    
317 root 1.4 =item integers
318    
319     CBOR integers become (numeric) perl scalars. On perls without 64 bit
320     support, 64 bit integers will be truncated or otherwise corrupted.
321    
322     =item byte strings
323    
324     Byte strings will become octet strings in Perl (the byte values 0..255
325     will simply become characters of the same value in Perl).
326    
327     =item UTF-8 strings
328    
329     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
330     decoded into proper Unicode code points. At the moment, the validity of
331     the UTF-8 octets will not be validated - corrupt input will result in
332     corrupted Perl strings.
333    
334     =item arrays, maps
335    
336     CBOR arrays and CBOR maps will be converted into references to a Perl
337     array or hash, respectively. The keys of the map will be stringified
338     during this process.
339    
340 root 1.6 =item null
341    
342     CBOR null becomes C<undef> in Perl.
343    
344     =item true, false, undefined
345 root 1.1
346 root 1.6 These CBOR values become C<Types:Serialiser::true>,
347     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
348 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
349 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
350     error). See the L<Types::Serialiser> manpage for details.
351    
352 root 1.23 =item tagged values
353 root 1.1
354 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
355 root 1.4
356 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
357     for details.
358 root 1.4
359     =item anything else
360    
361     Anything else (e.g. unsupported simple values) will raise a decoding
362     error.
363 root 1.1
364     =back
365    
366    
367     =head2 PERL -> CBOR
368    
369     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
370     truly typeless language, so we can only guess which CBOR type is meant by
371     a Perl value.
372    
373     =over 4
374    
375     =item hash references
376    
377 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
378     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
379     order.
380    
381     Currently, tied hashes will use the indefinite-length format, while normal
382     hashes will use the fixed-length format.
383 root 1.1
384     =item array references
385    
386 root 1.4 Perl array references become fixed-length CBOR arrays.
387 root 1.1
388     =item other references
389    
390     Other unblessed references are generally not allowed and will cause an
391     exception to be thrown, except for references to the integers C<0> and
392 root 1.4 C<1>, which get turned into false and true in CBOR.
393    
394     =item CBOR::XS::Tagged objects
395    
396     Objects of this type must be arrays consisting of a single C<[tag, value]>
397 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
398     be encoded as appropriate for the value. You cna use C<CBOR::XS::tag> to
399     create such objects.
400 root 1.1
401 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
402 root 1.1
403 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
404     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
405     if you want.
406 root 1.1
407 root 1.7 =item other blessed objects
408 root 1.1
409 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
410 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
411     module, and L<OBJECT SERIALISATION> for generic object serialisation.
412 root 1.1
413     =item simple scalars
414    
415     Simple Perl scalars (any scalar that is not a reference) are the most
416     difficult objects to encode: CBOR::XS will encode undefined scalars as
417 root 1.4 CBOR null values, scalars that have last been used in a string context
418 root 1.1 before encoding as CBOR strings, and anything else as number value:
419    
420     # dump as number
421     encode_cbor [2] # yields [2]
422     encode_cbor [-3.0e17] # yields [-3e+17]
423     my $value = 5; encode_cbor [$value] # yields [5]
424    
425     # used as string, so dump as string
426     print $value;
427     encode_cbor [$value] # yields ["5"]
428    
429     # undef becomes null
430     encode_cbor [undef] # yields [null]
431    
432     You can force the type to be a CBOR string by stringifying it:
433    
434     my $x = 3.1; # some variable containing a number
435     "$x"; # stringified
436     $x .= ""; # another, more awkward way to stringify
437     print $x; # perl does it for you, too, quite often
438    
439     You can force the type to be a CBOR number by numifying it:
440    
441     my $x = "3"; # some variable containing a string
442     $x += 0; # numify it, ensuring it will be dumped as a number
443     $x *= 1; # same thing, the choice is yours.
444    
445     You can not currently force the type in other, less obscure, ways. Tell me
446     if you need this capability (but don't forget to explain why it's needed
447     :).
448    
449 root 1.4 Perl values that seem to be integers generally use the shortest possible
450     representation. Floating-point values will use either the IEEE single
451     format if possible without loss of precision, otherwise the IEEE double
452     format will be used. Perls that use formats other than IEEE double to
453     represent numerical values are supported, but might suffer loss of
454     precision.
455 root 1.1
456     =back
457    
458 root 1.7 =head2 OBJECT SERIALISATION
459    
460     This module knows two way to serialise a Perl object: The CBOR-specific
461     way, and the generic way.
462    
463     Whenever the encoder encounters a Perl object that it cnanot serialise
464     directly (most of them), it will first look up the C<TO_CBOR> method on
465     it.
466    
467     If it has a C<TO_CBOR> method, it will call it with the object as only
468     argument, and expects exactly one return value, which it will then
469     substitute and encode it in the place of the object.
470    
471     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
472     call it with the object as first argument, and the constant string C<CBOR>
473     as the second argument, to distinguish it from other serialisers.
474    
475     The C<FREEZE> method can return any number of values (i.e. zero or
476     more). These will be encoded as CBOR perl object, together with the
477     classname.
478    
479     If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
480     with an error.
481    
482     Objects encoded via C<TO_CBOR> cannot be automatically decoded, but
483     objects encoded via C<FREEZE> can be decoded using the following protocol:
484    
485     When an encoded CBOR perl object is encountered by the decoder, it will
486     look up the C<THAW> method, by using the stored classname, and will fail
487     if the method cannot be found.
488    
489     After the lookup it will call the C<THAW> method with the stored classname
490     as first argument, the constant string C<CBOR> as second argument, and all
491     values returned by C<FREEZE> as remaining arguments.
492    
493     =head4 EXAMPLES
494    
495     Here is an example C<TO_CBOR> method:
496    
497     sub My::Object::TO_CBOR {
498     my ($obj) = @_;
499    
500     ["this is a serialised My::Object object", $obj->{id}]
501     }
502    
503     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
504     array with two members: a string, and the "object id". Decoding this CBOR
505     string will yield a normal perl array reference in place of the object.
506    
507     A more useful and practical example would be a serialisation method for
508     the URI module. CBOR has a custom tag value for URIs, namely 32:
509    
510     sub URI::TO_CBOR {
511     my ($self) = @_;
512     my $uri = "$self"; # stringify uri
513     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
514     CBOR::XS::tagged 32, "$_[0]"
515     }
516    
517     This will encode URIs as a UTF-8 string with tag 32, which indicates an
518     URI.
519    
520     Decoding such an URI will not (currently) give you an URI object, but
521     instead a CBOR::XS::Tagged object with tag number 32 and the string -
522     exactly what was returned by C<TO_CBOR>.
523    
524     To serialise an object so it can automatically be deserialised, you need
525     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
526     would be a possible implementation:
527    
528     sub URI::FREEZE {
529     my ($self, $serialiser) = @_;
530     "$self" # encode url string
531     }
532    
533     sub URI::THAW {
534     my ($class, $serialiser, $uri) = @_;
535    
536     $class->new ($uri)
537     }
538    
539     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
540     example, a C<FREEZE> method that returns "type", "id" and "variant" values
541     would cause an invocation of C<THAW> with 5 arguments:
542    
543     sub My::Object::FREEZE {
544     my ($self, $serialiser) = @_;
545    
546     ($self->{type}, $self->{id}, $self->{variant})
547     }
548    
549     sub My::Object::THAW {
550     my ($class, $serialiser, $type, $id, $variant) = @_;
551    
552     $class-<new (type => $type, id => $id, variant => $variant)
553     }
554    
555 root 1.1
556 root 1.7 =head1 MAGIC HEADER
557 root 1.3
558     There is no way to distinguish CBOR from other formats
559     programmatically. To make it easier to distinguish CBOR from other
560     formats, the CBOR specification has a special "magic string" that can be
561 root 1.18 prepended to any CBOR string without changing its meaning.
562 root 1.3
563     This string is available as C<$CBOR::XS::MAGIC>. This module does not
564 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
565 root 1.3 if present, so users can prepend this string as a "file type" indicator as
566     required.
567    
568    
569 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
570    
571     CBOR has the concept of tagged values - any CBOR value can be tagged with
572     a numeric 64 bit number, which are centrally administered.
573    
574     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
575     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
576     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
577     unknown tag.
578    
579     These objects are simply blessed array references - the first member of
580     the array being the numerical tag, the second being the value.
581    
582     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
583    
584     =over 4
585    
586     =item $tagged = CBOR::XS::tag $tag, $value
587    
588     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
589     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
590     value that can be encoded in CBOR, including serialisable Perl objects and
591     C<CBOR::XS::Tagged> objects).
592    
593     =item $tagged->[0]
594    
595     =item $tagged->[0] = $new_tag
596    
597     =item $tag = $tagged->tag
598    
599     =item $new_tag = $tagged->tag ($new_tag)
600    
601     Access/mutate the tag.
602    
603     =item $tagged->[1]
604    
605     =item $tagged->[1] = $new_value
606    
607     =item $value = $tagged->value
608    
609     =item $new_value = $tagged->value ($new_value)
610    
611     Access/mutate the tagged value.
612    
613     =back
614    
615     =cut
616    
617     sub tag($$) {
618     bless [@_], CBOR::XS::Tagged::;
619     }
620    
621     sub CBOR::XS::Tagged::tag {
622     $_[0][0] = $_[1] if $#_;
623     $_[0][0]
624     }
625    
626     sub CBOR::XS::Tagged::value {
627     $_[0][1] = $_[1] if $#_;
628     $_[0][1]
629     }
630    
631 root 1.13 =head2 EXAMPLES
632    
633     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
634    
635     You can look up CBOR tag value and emanings in the IANA registry at
636     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
637    
638     Prepend a magic header (C<$CBOR::XS::MAGIC>):
639    
640     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
641     # same as:
642     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
643    
644     Serialise some URIs and a regex in an array:
645    
646     my $cbor = encode_cbor [
647     (CBOR::XS::tag 32, "http://www.nethype.de/"),
648     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
649     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
650     ];
651    
652     Wrap CBOR data in CBOR:
653    
654     my $cbor_cbor = encode_cbor
655     CBOR::XS::tag 24,
656     encode_cbor [1, 2, 3];
657    
658 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
659    
660 root 1.22 This section describes how this module handles specific tagged values
661     and extensions. If a tag is not mentioned here and no additional filters
662     are provided for it, then the default handling applies (creating a
663     CBOR::XS::Tagged object on decoding, and only encoding the tag when
664     explicitly requested).
665 root 1.19
666 root 1.23 Tags not handled specifically are currently converted into a
667     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
668     consisting of the numeric tag value followed by the (decoded) CBOR value.
669    
670 root 1.19 Future versions of this module reserve the right to special case
671 root 1.22 additional tags (such as base64url).
672    
673     =head2 ENFORCED TAGS
674    
675     These tags are always handled when decoding, and their handling cannot be
676     overriden by the user.
677 root 1.19
678     =over 4
679    
680     =item <unassigned> (perl-object, L<http://cbor.schmorp.de/perl-object>)
681    
682 root 1.23 These tags are automatically created (and decoded) for serialisable
683     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
684     serialisation protocol). See L<OBJECT SERIALISATION> for details.
685 root 1.19
686     =item <unassigned>, <unassigned> (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
687    
688     These tags are automatically decoded when encountered, resulting in
689     shared values in the decoded object. They are only encoded, however, when
690     C<allow_sharable> is enabled.
691    
692 root 1.21 =item <unassigned>, <unassigned> (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
693    
694     These tags are automatically decoded when encountered. They are only
695     encoded, however, when C<allow_stringref> is enabled.
696    
697 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
698    
699     This tag is automatically generated when a reference are encountered (with
700     the exception of hash and array refernces). It is converted to a reference
701     when decoding.
702    
703     =item 55799 (self-describe CBOR, RFC 7049)
704    
705     This value is not generated on encoding (unless explicitly requested by
706     the user), and is simply ignored when decoding.
707    
708     =back
709    
710 root 1.24 =head2 NON-ENFORCED TAGS
711 root 1.22
712     These tags have default filters provided when decoding. Their handling can
713     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
714 root 1.24 providing a custom C<filter> callback when decoding.
715 root 1.22
716     When they result in decoding into a specific Perl class, the module
717     usually provides a corresponding C<TO_CBOR> method as well.
718    
719     When any of these need to load additional modules that are not part of the
720     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
721     provide these modules. The decoding usually fails with an exception if the
722     required module cannot be loaded.
723    
724     =over 4
725    
726     =item 2, 3 (positive/negative bignum)
727    
728     These tags are decoded into L<Math::BigInt> objects. The corresponding
729     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
730     integers, and others into positive/negative CBOR bignums.
731    
732     =item 4, 5 (decimal fraction/bigfloat)
733    
734     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
735     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
736     encodes into a decimal fraction.
737    
738     CBOR cannot represent bigfloats with I<very> large exponents - conversion
739     of such big float objects is undefined.
740    
741     Also, NaN and infinities are not encoded properly.
742    
743     =item 21, 22, 23 (expected later JSON conversion)
744    
745     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
746     tags.
747    
748     =item 32 (URI)
749    
750     These objects decode into L<URI> objects. The corresponding
751     C<URI::TO_CBOR> method again results in a CBOR URI value.
752    
753     =back
754    
755     =cut
756    
757     our %FILTER = (
758     # 0 # rfc4287 datetime, utf-8
759     # 1 # unix timestamp, any
760    
761     2 => sub { # pos bigint
762     require Math::BigInt;
763     Math::BigInt->new ("0x" . unpack "H*", pop)
764     },
765    
766     3 => sub { # neg bigint
767     require Math::BigInt;
768     -Math::BigInt->new ("0x" . unpack "H*", pop)
769     },
770    
771     4 => sub { # decimal fraction, array
772     require Math::BigFloat;
773     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
774     },
775    
776     5 => sub { # bigfloat, array
777     require Math::BigFloat;
778     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
779     },
780    
781     21 => sub { pop }, # expected conversion to base64url encoding
782     22 => sub { pop }, # expected conversion to base64 encoding
783     23 => sub { pop }, # expected conversion to base16 encoding
784    
785     # 24 # embedded cbor, byte string
786    
787     32 => sub {
788     require URI;
789     URI->new (pop)
790     },
791    
792     # 33 # base64url rfc4648, utf-8
793     # 34 # base64 rfc46484, utf-8
794     # 35 # regex pcre/ecma262, utf-8
795     # 36 # mime message rfc2045, utf-8
796     );
797    
798 root 1.19
799 root 1.7 =head1 CBOR and JSON
800 root 1.1
801 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
802     with some coercion, able to represent all JSON texts (something that other
803     "binary JSON" formats such as BSON generally do not support).
804    
805     CBOR implements some extra hints and support for JSON interoperability,
806     and the spec offers further guidance for conversion between CBOR and
807     JSON. None of this is currently implemented in CBOR, and the guidelines
808     in the spec do not result in correct round-tripping of data. If JSON
809     interoperability is improved in the future, then the goal will be to
810     ensure that decoded JSON data will round-trip encoding and decoding to
811     CBOR intact.
812 root 1.1
813    
814     =head1 SECURITY CONSIDERATIONS
815    
816     When you are using CBOR in a protocol, talking to untrusted potentially
817     hostile creatures requires relatively few measures.
818    
819     First of all, your CBOR decoder should be secure, that is, should not have
820     any buffer overflows. Obviously, this module should ensure that and I am
821     trying hard on making that true, but you never know.
822    
823     Second, you need to avoid resource-starving attacks. That means you should
824     limit the size of CBOR data you accept, or make sure then when your
825     resources run out, that's just fine (e.g. by using a separate process that
826     can crash safely). The size of a CBOR string in octets is usually a good
827     indication of the size of the resources required to decode it into a Perl
828     structure. While CBOR::XS can check the size of the CBOR text, it might be
829     too late when you already have it in memory, so you might want to check
830     the size before you accept the string.
831    
832     Third, CBOR::XS recurses using the C stack when decoding objects and
833     arrays. The C stack is a limited resource: for instance, on my amd64
834     machine with 8MB of stack size I can decode around 180k nested arrays but
835     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
836     to free the temporary). If that is exceeded, the program crashes. To be
837     conservative, the default nesting limit is set to 512. If your process
838     has a smaller stack, you should adjust this setting accordingly with the
839     C<max_depth> method.
840    
841     Something else could bomb you, too, that I forgot to think of. In that
842     case, you get to keep the pieces. I am always open for hints, though...
843    
844     Also keep in mind that CBOR::XS might leak contents of your Perl data
845     structures in its error messages, so when you serialise sensitive
846     information you might want to make sure that exceptions thrown by CBOR::XS
847     will not end up in front of untrusted eyes.
848    
849     =head1 CBOR IMPLEMENTATION NOTES
850    
851     This section contains some random implementation notes. They do not
852     describe guaranteed behaviour, but merely behaviour as-is implemented
853     right now.
854    
855     64 bit integers are only properly decoded when Perl was built with 64 bit
856     support.
857    
858     Strings and arrays are encoded with a definite length. Hashes as well,
859     unless they are tied (or otherwise magical).
860    
861     Only the double data type is supported for NV data types - when Perl uses
862     long double to represent floating point values, they might not be encoded
863     properly. Half precision types are accepted, but not encoded.
864    
865     Strict mode and canonical mode are not implemented.
866    
867    
868     =head1 THREADS
869    
870     This module is I<not> guaranteed to be thread safe and there are no
871     plans to change this until Perl gets thread support (as opposed to the
872     horribly slow so-called "threads" which are simply slow and bloated
873     process simulations - use fork, it's I<much> faster, cheaper, better).
874    
875     (It might actually work, but you have been warned).
876    
877    
878     =head1 BUGS
879    
880     While the goal of this module is to be correct, that unfortunately does
881     not mean it's bug-free, only that I think its design is bug-free. If you
882     keep reporting bugs they will be fixed swiftly, though.
883    
884     Please refrain from using rt.cpan.org or any other bug reporting
885     service. I put the contact address into my modules for a reason.
886    
887     =cut
888    
889 root 1.22 our %FILTER = (
890     # 0 # rfc4287 datetime, utf-8
891     # 1 # unix timestamp, any
892    
893     2 => sub { # pos bigint
894     require Math::BigInt;
895     Math::BigInt->new ("0x" . unpack "H*", pop)
896     },
897    
898     3 => sub { # neg bigint
899     require Math::BigInt;
900     -Math::BigInt->new ("0x" . unpack "H*", pop)
901     },
902    
903     4 => sub { # decimal fraction, array
904     require Math::BigFloat;
905     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
906     },
907    
908     5 => sub { # bigfloat, array
909     require Math::BigFloat;
910     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
911     },
912    
913     21 => sub { pop }, # expected conversion to base64url encoding
914     22 => sub { pop }, # expected conversion to base64 encoding
915     23 => sub { pop }, # expected conversion to base16 encoding
916    
917     # 24 # embedded cbor, byte string
918    
919     32 => sub {
920     require URI;
921     URI->new (pop)
922     },
923    
924     # 33 # base64url rfc4648, utf-8
925     # 34 # base64 rfc46484, utf-8
926     # 35 # regex pcre/ecma262, utf-8
927     # 36 # mime message rfc2045, utf-8
928     );
929    
930     sub CBOR::XS::default_filter {
931     &{ $FILTER{$_[0]} or return }
932     }
933    
934     sub URI::TO_CBOR {
935     my $uri = $_[0]->as_string;
936     utf8::upgrade $uri;
937     CBOR::XS::tag 32, $uri
938     }
939    
940     sub Math::BigInt::TO_CBOR {
941     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
942     $_[0]->numify
943     } else {
944     my $hex = substr $_[0]->as_hex, 2;
945     $hex = "0$hex" if 1 & length $hex; # sigh
946     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
947     }
948     }
949    
950     sub Math::BigFloat::TO_CBOR {
951     my ($m, $e) = $_[0]->parts;
952     CBOR::XS::tag 4, [$e->numify, $m]
953     }
954    
955 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
956    
957     =head1 SEE ALSO
958    
959     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
960     serialisation.
961    
962 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
963     and error values.
964    
965 root 1.1 =head1 AUTHOR
966    
967     Marc Lehmann <schmorp@schmorp.de>
968     http://home.schmorp.de/
969    
970     =cut
971    
972 root 1.6 1
973