ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.23
Committed: Fri Nov 22 16:00:30 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.22: +19 -22 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.21 WARNING! This module is very new, and not very well tested (that's up
32     to you to do). Furthermore, details of the implementation might change
33     freely before version 1.0. And lastly, most extensions depend on an IANA
34     assignment, and until that assignment is official, this implementation is
35     not interoperable with other implementations (even future versions of this
36     module) until the assignment is done.
37 root 1.9
38     You are still invited to try out CBOR, and this module.
39 root 1.5
40     This module converts Perl data structures to the Concise Binary Object
41     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
42     format that aims to use a superset of the JSON data model, i.e. when you
43     can represent something in JSON, you should be able to represent it in
44     CBOR.
45 root 1.1
46 root 1.9 In short, CBOR is a faster and very compact binary alternative to JSON,
47 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
48     often compresses better than CBOR though, so if you plan to compress the
49     data later you might want to compare both formats first).
50 root 1.5
51 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
52     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
53     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
54     data, the worse L<Storable> performs in comparison.
55    
56     As for compactness, C<CBOR::XS> encoded data structures are usually about
57     20% smaller than the same data encoded as (compact) JSON or L<Storable>.
58 root 1.14
59 root 1.21 In addition to the core CBOR data format, this module implements a number
60     of extensions, to support cyclic and self-referencing data structures
61     (see C<allow_sharing>), string deduplication (see C<allow_stringref>) and
62     scalar references (always enabled).
63    
64 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
65     is to be I<fast>. To reach the latter goal it was written in C.
66 root 1.1
67     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
68     vice versa.
69    
70     =cut
71    
72     package CBOR::XS;
73    
74     use common::sense;
75    
76 root 1.17 our $VERSION = 0.08;
77 root 1.1 our @ISA = qw(Exporter);
78    
79     our @EXPORT = qw(encode_cbor decode_cbor);
80    
81     use Exporter;
82     use XSLoader;
83    
84 root 1.6 use Types::Serialiser;
85    
86 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
87    
88 root 1.1 =head1 FUNCTIONAL INTERFACE
89    
90     The following convenience methods are provided by this module. They are
91     exported by default:
92    
93     =over 4
94    
95     =item $cbor_data = encode_cbor $perl_scalar
96    
97     Converts the given Perl data structure to CBOR representation. Croaks on
98     error.
99    
100     =item $perl_scalar = decode_cbor $cbor_data
101    
102     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
103     returning the resulting perl scalar. Croaks on error.
104    
105     =back
106    
107    
108     =head1 OBJECT-ORIENTED INTERFACE
109    
110     The object oriented interface lets you configure your own encoding or
111     decoding style, within the limits of supported formats.
112    
113     =over 4
114    
115     =item $cbor = new CBOR::XS
116    
117     Creates a new CBOR::XS object that can be used to de/encode CBOR
118     strings. All boolean flags described below are by default I<disabled>.
119    
120     The mutators for flags all return the CBOR object again and thus calls can
121     be chained:
122    
123     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
124    
125     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
126    
127     =item $max_depth = $cbor->get_max_depth
128    
129     Sets the maximum nesting level (default C<512>) accepted while encoding
130     or decoding. If a higher nesting level is detected in CBOR data or a Perl
131     data structure, then the encoder and decoder will stop and croak at that
132     point.
133    
134     Nesting level is defined by number of hash- or arrayrefs that the encoder
135     needs to traverse to reach a given point or the number of C<{> or C<[>
136     characters without their matching closing parenthesis crossed to reach a
137     given character in a string.
138    
139     Setting the maximum depth to one disallows any nesting, so that ensures
140     that the object is only a single hash/object or array.
141    
142     If no argument is given, the highest possible setting will be used, which
143     is rarely useful.
144    
145     Note that nesting is implemented by recursion in C. The default value has
146     been chosen to be as large as typical operating systems allow without
147     crashing.
148    
149     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
150    
151     =item $cbor = $cbor->max_size ([$maximum_string_size])
152    
153     =item $max_size = $cbor->get_max_size
154    
155     Set the maximum length a CBOR string may have (in bytes) where decoding
156     is being attempted. The default is C<0>, meaning no limit. When C<decode>
157     is called on a string that is longer then this many bytes, it will not
158     attempt to decode the string but throw an exception. This setting has no
159     effect on C<encode> (yet).
160    
161     If no argument is given, the limit check will be deactivated (same as when
162     C<0> is specified).
163    
164     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
165    
166 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
167    
168     =item $enabled = $cbor->get_allow_unknown
169    
170     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
171     exception when it encounters values it cannot represent in CBOR (for
172     example, filehandles) but instead will encode a CBOR C<error> value.
173    
174     If C<$enable> is false (the default), then C<encode> will throw an
175     exception when it encounters anything it cannot encode as CBOR.
176    
177     This option does not affect C<decode> in any way, and it is recommended to
178     leave it off unless you know your communications partner.
179    
180 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
181 root 1.19
182 root 1.20 =item $enabled = $cbor->get_allow_sharing
183 root 1.19
184     If C<$enable> is true (or missing), then C<encode> will not double-encode
185 root 1.20 values that have been referenced before (e.g. when the same object, such
186     as an array, is referenced multiple times), but instead will emit a
187     reference to the earlier value.
188 root 1.19
189     This means that such values will only be encoded once, and will not result
190     in a deep cloning of the value on decode, in decoders supporting the value
191     sharing extension.
192    
193 root 1.21 It is recommended to leave it off unless you know your
194     communication partner supports the value sharing extensions to CBOR
195     (http://cbor.schmorp.de/value-sharing).
196    
197 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
198     that have a reference counter large than one, and might unnecessarily
199     increase the encoded size, as potentially shared values are encode as
200     sharable whether or not they are actually shared.
201    
202 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
203     arrays or hashes pointed to by a reference). Weirder constructs, such as
204     an array with multiple "copies" of the I<same> string, which are hard but
205     not impossible to create in Perl, are not supported (this is the same as
206     for L<Storable>).
207 root 1.19
208     If C<$enable> is false (the default), then C<encode> will encode
209     exception when it encounters anything it cannot encode as CBOR.
210    
211     This option does not affect C<decode> in any way - shared values and
212 root 1.21 references will always be decoded properly if present.
213    
214     =item $cbor = $cbor->allow_stringref ([$enable])
215    
216     =item $enabled = $cbor->get_allow_stringref
217    
218     If C<$enable> is true (or missing), then C<encode> will try not to encode
219     the same string twice, but will instead encode a reference to the string
220     instead. Depending on your data format. this can save a lot of space, but
221     also results in a very large runtime overhead (expect encoding times to be
222     2-4 times as high as without).
223    
224     It is recommended to leave it off unless you know your
225     communications partner supports the stringref extension to CBOR
226     (http://cbor.schmorp.de/stringref).
227    
228     If C<$enable> is false (the default), then C<encode> will encode
229     exception when it encounters anything it cannot encode as CBOR.
230    
231     This option does not affect C<decode> in any way - string references will
232     always be decoded properly if present.
233 root 1.19
234 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
235    
236     =item $cb_or_undef = $cbor->get_filter
237    
238     TODO
239    
240 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
241    
242     Converts the given Perl data structure (a scalar value) to its CBOR
243     representation.
244    
245     =item $perl_scalar = $cbor->decode ($cbor_data)
246    
247     The opposite of C<encode>: expects CBOR data and tries to parse it,
248     returning the resulting simple scalar or reference. Croaks on error.
249    
250     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
251    
252     This works like the C<decode> method, but instead of raising an exception
253     when there is trailing garbage after the CBOR string, it will silently
254     stop parsing there and return the number of characters consumed so far.
255    
256     This is useful if your CBOR texts are not delimited by an outer protocol
257     and you need to know where the first CBOR string ends amd the next one
258     starts.
259    
260     CBOR::XS->new->decode_prefix ("......")
261     => ("...", 3)
262    
263     =back
264    
265    
266     =head1 MAPPING
267    
268     This section describes how CBOR::XS maps Perl values to CBOR values and
269     vice versa. These mappings are designed to "do the right thing" in most
270     circumstances automatically, preserving round-tripping characteristics
271     (what you put in comes out as something equivalent).
272    
273     For the more enlightened: note that in the following descriptions,
274     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
275     refers to the abstract Perl language itself.
276    
277    
278     =head2 CBOR -> PERL
279    
280     =over 4
281    
282 root 1.4 =item integers
283    
284     CBOR integers become (numeric) perl scalars. On perls without 64 bit
285     support, 64 bit integers will be truncated or otherwise corrupted.
286    
287     =item byte strings
288    
289     Byte strings will become octet strings in Perl (the byte values 0..255
290     will simply become characters of the same value in Perl).
291    
292     =item UTF-8 strings
293    
294     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
295     decoded into proper Unicode code points. At the moment, the validity of
296     the UTF-8 octets will not be validated - corrupt input will result in
297     corrupted Perl strings.
298    
299     =item arrays, maps
300    
301     CBOR arrays and CBOR maps will be converted into references to a Perl
302     array or hash, respectively. The keys of the map will be stringified
303     during this process.
304    
305 root 1.6 =item null
306    
307     CBOR null becomes C<undef> in Perl.
308    
309     =item true, false, undefined
310 root 1.1
311 root 1.6 These CBOR values become C<Types:Serialiser::true>,
312     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
313 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
314 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
315     error). See the L<Types::Serialiser> manpage for details.
316    
317 root 1.23 =item tagged values
318 root 1.1
319 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
320 root 1.4
321 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
322     for details.
323 root 1.4
324     =item anything else
325    
326     Anything else (e.g. unsupported simple values) will raise a decoding
327     error.
328 root 1.1
329     =back
330    
331    
332     =head2 PERL -> CBOR
333    
334     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
335     truly typeless language, so we can only guess which CBOR type is meant by
336     a Perl value.
337    
338     =over 4
339    
340     =item hash references
341    
342 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
343     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
344     order.
345    
346     Currently, tied hashes will use the indefinite-length format, while normal
347     hashes will use the fixed-length format.
348 root 1.1
349     =item array references
350    
351 root 1.4 Perl array references become fixed-length CBOR arrays.
352 root 1.1
353     =item other references
354    
355     Other unblessed references are generally not allowed and will cause an
356     exception to be thrown, except for references to the integers C<0> and
357 root 1.4 C<1>, which get turned into false and true in CBOR.
358    
359     =item CBOR::XS::Tagged objects
360    
361     Objects of this type must be arrays consisting of a single C<[tag, value]>
362 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
363     be encoded as appropriate for the value. You cna use C<CBOR::XS::tag> to
364     create such objects.
365 root 1.1
366 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
367 root 1.1
368 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
369     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
370     if you want.
371 root 1.1
372 root 1.7 =item other blessed objects
373 root 1.1
374 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
375 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
376     module, and L<OBJECT SERIALISATION> for generic object serialisation.
377 root 1.1
378     =item simple scalars
379    
380     Simple Perl scalars (any scalar that is not a reference) are the most
381     difficult objects to encode: CBOR::XS will encode undefined scalars as
382 root 1.4 CBOR null values, scalars that have last been used in a string context
383 root 1.1 before encoding as CBOR strings, and anything else as number value:
384    
385     # dump as number
386     encode_cbor [2] # yields [2]
387     encode_cbor [-3.0e17] # yields [-3e+17]
388     my $value = 5; encode_cbor [$value] # yields [5]
389    
390     # used as string, so dump as string
391     print $value;
392     encode_cbor [$value] # yields ["5"]
393    
394     # undef becomes null
395     encode_cbor [undef] # yields [null]
396    
397     You can force the type to be a CBOR string by stringifying it:
398    
399     my $x = 3.1; # some variable containing a number
400     "$x"; # stringified
401     $x .= ""; # another, more awkward way to stringify
402     print $x; # perl does it for you, too, quite often
403    
404     You can force the type to be a CBOR number by numifying it:
405    
406     my $x = "3"; # some variable containing a string
407     $x += 0; # numify it, ensuring it will be dumped as a number
408     $x *= 1; # same thing, the choice is yours.
409    
410     You can not currently force the type in other, less obscure, ways. Tell me
411     if you need this capability (but don't forget to explain why it's needed
412     :).
413    
414 root 1.4 Perl values that seem to be integers generally use the shortest possible
415     representation. Floating-point values will use either the IEEE single
416     format if possible without loss of precision, otherwise the IEEE double
417     format will be used. Perls that use formats other than IEEE double to
418     represent numerical values are supported, but might suffer loss of
419     precision.
420 root 1.1
421     =back
422    
423 root 1.7 =head2 OBJECT SERIALISATION
424    
425     This module knows two way to serialise a Perl object: The CBOR-specific
426     way, and the generic way.
427    
428     Whenever the encoder encounters a Perl object that it cnanot serialise
429     directly (most of them), it will first look up the C<TO_CBOR> method on
430     it.
431    
432     If it has a C<TO_CBOR> method, it will call it with the object as only
433     argument, and expects exactly one return value, which it will then
434     substitute and encode it in the place of the object.
435    
436     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
437     call it with the object as first argument, and the constant string C<CBOR>
438     as the second argument, to distinguish it from other serialisers.
439    
440     The C<FREEZE> method can return any number of values (i.e. zero or
441     more). These will be encoded as CBOR perl object, together with the
442     classname.
443    
444     If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
445     with an error.
446    
447     Objects encoded via C<TO_CBOR> cannot be automatically decoded, but
448     objects encoded via C<FREEZE> can be decoded using the following protocol:
449    
450     When an encoded CBOR perl object is encountered by the decoder, it will
451     look up the C<THAW> method, by using the stored classname, and will fail
452     if the method cannot be found.
453    
454     After the lookup it will call the C<THAW> method with the stored classname
455     as first argument, the constant string C<CBOR> as second argument, and all
456     values returned by C<FREEZE> as remaining arguments.
457    
458     =head4 EXAMPLES
459    
460     Here is an example C<TO_CBOR> method:
461    
462     sub My::Object::TO_CBOR {
463     my ($obj) = @_;
464    
465     ["this is a serialised My::Object object", $obj->{id}]
466     }
467    
468     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
469     array with two members: a string, and the "object id". Decoding this CBOR
470     string will yield a normal perl array reference in place of the object.
471    
472     A more useful and practical example would be a serialisation method for
473     the URI module. CBOR has a custom tag value for URIs, namely 32:
474    
475     sub URI::TO_CBOR {
476     my ($self) = @_;
477     my $uri = "$self"; # stringify uri
478     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
479     CBOR::XS::tagged 32, "$_[0]"
480     }
481    
482     This will encode URIs as a UTF-8 string with tag 32, which indicates an
483     URI.
484    
485     Decoding such an URI will not (currently) give you an URI object, but
486     instead a CBOR::XS::Tagged object with tag number 32 and the string -
487     exactly what was returned by C<TO_CBOR>.
488    
489     To serialise an object so it can automatically be deserialised, you need
490     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
491     would be a possible implementation:
492    
493     sub URI::FREEZE {
494     my ($self, $serialiser) = @_;
495     "$self" # encode url string
496     }
497    
498     sub URI::THAW {
499     my ($class, $serialiser, $uri) = @_;
500    
501     $class->new ($uri)
502     }
503    
504     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
505     example, a C<FREEZE> method that returns "type", "id" and "variant" values
506     would cause an invocation of C<THAW> with 5 arguments:
507    
508     sub My::Object::FREEZE {
509     my ($self, $serialiser) = @_;
510    
511     ($self->{type}, $self->{id}, $self->{variant})
512     }
513    
514     sub My::Object::THAW {
515     my ($class, $serialiser, $type, $id, $variant) = @_;
516    
517     $class-<new (type => $type, id => $id, variant => $variant)
518     }
519    
520 root 1.1
521 root 1.7 =head1 MAGIC HEADER
522 root 1.3
523     There is no way to distinguish CBOR from other formats
524     programmatically. To make it easier to distinguish CBOR from other
525     formats, the CBOR specification has a special "magic string" that can be
526 root 1.18 prepended to any CBOR string without changing its meaning.
527 root 1.3
528     This string is available as C<$CBOR::XS::MAGIC>. This module does not
529 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
530 root 1.3 if present, so users can prepend this string as a "file type" indicator as
531     required.
532    
533    
534 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
535    
536     CBOR has the concept of tagged values - any CBOR value can be tagged with
537     a numeric 64 bit number, which are centrally administered.
538    
539     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
540     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
541     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
542     unknown tag.
543    
544     These objects are simply blessed array references - the first member of
545     the array being the numerical tag, the second being the value.
546    
547     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
548    
549     =over 4
550    
551     =item $tagged = CBOR::XS::tag $tag, $value
552    
553     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
554     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
555     value that can be encoded in CBOR, including serialisable Perl objects and
556     C<CBOR::XS::Tagged> objects).
557    
558     =item $tagged->[0]
559    
560     =item $tagged->[0] = $new_tag
561    
562     =item $tag = $tagged->tag
563    
564     =item $new_tag = $tagged->tag ($new_tag)
565    
566     Access/mutate the tag.
567    
568     =item $tagged->[1]
569    
570     =item $tagged->[1] = $new_value
571    
572     =item $value = $tagged->value
573    
574     =item $new_value = $tagged->value ($new_value)
575    
576     Access/mutate the tagged value.
577    
578     =back
579    
580     =cut
581    
582     sub tag($$) {
583     bless [@_], CBOR::XS::Tagged::;
584     }
585    
586     sub CBOR::XS::Tagged::tag {
587     $_[0][0] = $_[1] if $#_;
588     $_[0][0]
589     }
590    
591     sub CBOR::XS::Tagged::value {
592     $_[0][1] = $_[1] if $#_;
593     $_[0][1]
594     }
595    
596 root 1.13 =head2 EXAMPLES
597    
598     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
599    
600     You can look up CBOR tag value and emanings in the IANA registry at
601     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
602    
603     Prepend a magic header (C<$CBOR::XS::MAGIC>):
604    
605     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
606     # same as:
607     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
608    
609     Serialise some URIs and a regex in an array:
610    
611     my $cbor = encode_cbor [
612     (CBOR::XS::tag 32, "http://www.nethype.de/"),
613     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
614     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
615     ];
616    
617     Wrap CBOR data in CBOR:
618    
619     my $cbor_cbor = encode_cbor
620     CBOR::XS::tag 24,
621     encode_cbor [1, 2, 3];
622    
623 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
624    
625 root 1.22 This section describes how this module handles specific tagged values
626     and extensions. If a tag is not mentioned here and no additional filters
627     are provided for it, then the default handling applies (creating a
628     CBOR::XS::Tagged object on decoding, and only encoding the tag when
629     explicitly requested).
630 root 1.19
631 root 1.23 Tags not handled specifically are currently converted into a
632     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
633     consisting of the numeric tag value followed by the (decoded) CBOR value.
634    
635 root 1.19 Future versions of this module reserve the right to special case
636 root 1.22 additional tags (such as base64url).
637    
638     =head2 ENFORCED TAGS
639    
640     These tags are always handled when decoding, and their handling cannot be
641     overriden by the user.
642 root 1.19
643     =over 4
644    
645     =item <unassigned> (perl-object, L<http://cbor.schmorp.de/perl-object>)
646    
647 root 1.23 These tags are automatically created (and decoded) for serialisable
648     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
649     serialisation protocol). See L<OBJECT SERIALISATION> for details.
650 root 1.19
651     =item <unassigned>, <unassigned> (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
652    
653     These tags are automatically decoded when encountered, resulting in
654     shared values in the decoded object. They are only encoded, however, when
655     C<allow_sharable> is enabled.
656    
657 root 1.21 =item <unassigned>, <unassigned> (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
658    
659     These tags are automatically decoded when encountered. They are only
660     encoded, however, when C<allow_stringref> is enabled.
661    
662 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
663    
664     This tag is automatically generated when a reference are encountered (with
665     the exception of hash and array refernces). It is converted to a reference
666     when decoding.
667    
668     =item 55799 (self-describe CBOR, RFC 7049)
669    
670     This value is not generated on encoding (unless explicitly requested by
671     the user), and is simply ignored when decoding.
672    
673     =back
674    
675 root 1.22 =head2 OPTIONAL TAGS
676    
677     These tags have default filters provided when decoding. Their handling can
678     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
679     providing a custom C<filter> function when decoding.
680    
681     When they result in decoding into a specific Perl class, the module
682     usually provides a corresponding C<TO_CBOR> method as well.
683    
684     When any of these need to load additional modules that are not part of the
685     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
686     provide these modules. The decoding usually fails with an exception if the
687     required module cannot be loaded.
688    
689     =over 4
690    
691     =item 2, 3 (positive/negative bignum)
692    
693     These tags are decoded into L<Math::BigInt> objects. The corresponding
694     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
695     integers, and others into positive/negative CBOR bignums.
696    
697     =item 4, 5 (decimal fraction/bigfloat)
698    
699     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
700     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
701     encodes into a decimal fraction.
702    
703     CBOR cannot represent bigfloats with I<very> large exponents - conversion
704     of such big float objects is undefined.
705    
706     Also, NaN and infinities are not encoded properly.
707    
708     =item 21, 22, 23 (expected later JSON conversion)
709    
710     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
711     tags.
712    
713     =item 32 (URI)
714    
715     These objects decode into L<URI> objects. The corresponding
716     C<URI::TO_CBOR> method again results in a CBOR URI value.
717    
718     =back
719    
720     =cut
721    
722     our %FILTER = (
723     # 0 # rfc4287 datetime, utf-8
724     # 1 # unix timestamp, any
725    
726     2 => sub { # pos bigint
727     require Math::BigInt;
728     Math::BigInt->new ("0x" . unpack "H*", pop)
729     },
730    
731     3 => sub { # neg bigint
732     require Math::BigInt;
733     -Math::BigInt->new ("0x" . unpack "H*", pop)
734     },
735    
736     4 => sub { # decimal fraction, array
737     require Math::BigFloat;
738     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
739     },
740    
741     5 => sub { # bigfloat, array
742     require Math::BigFloat;
743     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
744     },
745    
746     21 => sub { pop }, # expected conversion to base64url encoding
747     22 => sub { pop }, # expected conversion to base64 encoding
748     23 => sub { pop }, # expected conversion to base16 encoding
749    
750     # 24 # embedded cbor, byte string
751    
752     32 => sub {
753     require URI;
754     URI->new (pop)
755     },
756    
757     # 33 # base64url rfc4648, utf-8
758     # 34 # base64 rfc46484, utf-8
759     # 35 # regex pcre/ecma262, utf-8
760     # 36 # mime message rfc2045, utf-8
761     );
762    
763 root 1.19
764 root 1.7 =head1 CBOR and JSON
765 root 1.1
766 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
767     with some coercion, able to represent all JSON texts (something that other
768     "binary JSON" formats such as BSON generally do not support).
769    
770     CBOR implements some extra hints and support for JSON interoperability,
771     and the spec offers further guidance for conversion between CBOR and
772     JSON. None of this is currently implemented in CBOR, and the guidelines
773     in the spec do not result in correct round-tripping of data. If JSON
774     interoperability is improved in the future, then the goal will be to
775     ensure that decoded JSON data will round-trip encoding and decoding to
776     CBOR intact.
777 root 1.1
778    
779     =head1 SECURITY CONSIDERATIONS
780    
781     When you are using CBOR in a protocol, talking to untrusted potentially
782     hostile creatures requires relatively few measures.
783    
784     First of all, your CBOR decoder should be secure, that is, should not have
785     any buffer overflows. Obviously, this module should ensure that and I am
786     trying hard on making that true, but you never know.
787    
788     Second, you need to avoid resource-starving attacks. That means you should
789     limit the size of CBOR data you accept, or make sure then when your
790     resources run out, that's just fine (e.g. by using a separate process that
791     can crash safely). The size of a CBOR string in octets is usually a good
792     indication of the size of the resources required to decode it into a Perl
793     structure. While CBOR::XS can check the size of the CBOR text, it might be
794     too late when you already have it in memory, so you might want to check
795     the size before you accept the string.
796    
797     Third, CBOR::XS recurses using the C stack when decoding objects and
798     arrays. The C stack is a limited resource: for instance, on my amd64
799     machine with 8MB of stack size I can decode around 180k nested arrays but
800     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
801     to free the temporary). If that is exceeded, the program crashes. To be
802     conservative, the default nesting limit is set to 512. If your process
803     has a smaller stack, you should adjust this setting accordingly with the
804     C<max_depth> method.
805    
806     Something else could bomb you, too, that I forgot to think of. In that
807     case, you get to keep the pieces. I am always open for hints, though...
808    
809     Also keep in mind that CBOR::XS might leak contents of your Perl data
810     structures in its error messages, so when you serialise sensitive
811     information you might want to make sure that exceptions thrown by CBOR::XS
812     will not end up in front of untrusted eyes.
813    
814     =head1 CBOR IMPLEMENTATION NOTES
815    
816     This section contains some random implementation notes. They do not
817     describe guaranteed behaviour, but merely behaviour as-is implemented
818     right now.
819    
820     64 bit integers are only properly decoded when Perl was built with 64 bit
821     support.
822    
823     Strings and arrays are encoded with a definite length. Hashes as well,
824     unless they are tied (or otherwise magical).
825    
826     Only the double data type is supported for NV data types - when Perl uses
827     long double to represent floating point values, they might not be encoded
828     properly. Half precision types are accepted, but not encoded.
829    
830     Strict mode and canonical mode are not implemented.
831    
832    
833     =head1 THREADS
834    
835     This module is I<not> guaranteed to be thread safe and there are no
836     plans to change this until Perl gets thread support (as opposed to the
837     horribly slow so-called "threads" which are simply slow and bloated
838     process simulations - use fork, it's I<much> faster, cheaper, better).
839    
840     (It might actually work, but you have been warned).
841    
842    
843     =head1 BUGS
844    
845     While the goal of this module is to be correct, that unfortunately does
846     not mean it's bug-free, only that I think its design is bug-free. If you
847     keep reporting bugs they will be fixed swiftly, though.
848    
849     Please refrain from using rt.cpan.org or any other bug reporting
850     service. I put the contact address into my modules for a reason.
851    
852     =cut
853    
854 root 1.22 our %FILTER = (
855     # 0 # rfc4287 datetime, utf-8
856     # 1 # unix timestamp, any
857    
858     2 => sub { # pos bigint
859     require Math::BigInt;
860     Math::BigInt->new ("0x" . unpack "H*", pop)
861     },
862    
863     3 => sub { # neg bigint
864     require Math::BigInt;
865     -Math::BigInt->new ("0x" . unpack "H*", pop)
866     },
867    
868     4 => sub { # decimal fraction, array
869     require Math::BigFloat;
870     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
871     },
872    
873     5 => sub { # bigfloat, array
874     require Math::BigFloat;
875     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
876     },
877    
878     21 => sub { pop }, # expected conversion to base64url encoding
879     22 => sub { pop }, # expected conversion to base64 encoding
880     23 => sub { pop }, # expected conversion to base16 encoding
881    
882     # 24 # embedded cbor, byte string
883    
884     32 => sub {
885     require URI;
886     URI->new (pop)
887     },
888    
889     # 33 # base64url rfc4648, utf-8
890     # 34 # base64 rfc46484, utf-8
891     # 35 # regex pcre/ecma262, utf-8
892     # 36 # mime message rfc2045, utf-8
893     );
894    
895     sub CBOR::XS::default_filter {
896     &{ $FILTER{$_[0]} or return }
897     }
898    
899     sub URI::TO_CBOR {
900     my $uri = $_[0]->as_string;
901     utf8::upgrade $uri;
902     CBOR::XS::tag 32, $uri
903     }
904    
905     sub Math::BigInt::TO_CBOR {
906     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
907     $_[0]->numify
908     } else {
909     my $hex = substr $_[0]->as_hex, 2;
910     $hex = "0$hex" if 1 & length $hex; # sigh
911     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
912     }
913     }
914    
915     sub Math::BigFloat::TO_CBOR {
916     my ($m, $e) = $_[0]->parts;
917     CBOR::XS::tag 4, [$e->numify, $m]
918     }
919    
920 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
921    
922     =head1 SEE ALSO
923    
924     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
925     serialisation.
926    
927 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
928     and error values.
929    
930 root 1.1 =head1 AUTHOR
931    
932     Marc Lehmann <schmorp@schmorp.de>
933     http://home.schmorp.de/
934    
935     =cut
936    
937 root 1.6 1
938