ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.27
Committed: Thu Nov 28 15:43:24 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.26: +12 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.21 WARNING! This module is very new, and not very well tested (that's up
32     to you to do). Furthermore, details of the implementation might change
33     freely before version 1.0. And lastly, most extensions depend on an IANA
34     assignment, and until that assignment is official, this implementation is
35     not interoperable with other implementations (even future versions of this
36     module) until the assignment is done.
37 root 1.9
38     You are still invited to try out CBOR, and this module.
39 root 1.5
40     This module converts Perl data structures to the Concise Binary Object
41     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
42     format that aims to use a superset of the JSON data model, i.e. when you
43     can represent something in JSON, you should be able to represent it in
44     CBOR.
45 root 1.1
46 root 1.9 In short, CBOR is a faster and very compact binary alternative to JSON,
47 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
48     often compresses better than CBOR though, so if you plan to compress the
49     data later you might want to compare both formats first).
50 root 1.5
51 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
52     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
53     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
54     data, the worse L<Storable> performs in comparison.
55    
56     As for compactness, C<CBOR::XS> encoded data structures are usually about
57     20% smaller than the same data encoded as (compact) JSON or L<Storable>.
58 root 1.14
59 root 1.21 In addition to the core CBOR data format, this module implements a number
60     of extensions, to support cyclic and self-referencing data structures
61 root 1.25 (see C<allow_sharing>), string deduplication (see C<pack_strings>) and
62 root 1.21 scalar references (always enabled).
63    
64 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
65     is to be I<fast>. To reach the latter goal it was written in C.
66 root 1.1
67     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
68     vice versa.
69    
70     =cut
71    
72     package CBOR::XS;
73    
74     use common::sense;
75    
76 root 1.24 our $VERSION = 0.09;
77 root 1.1 our @ISA = qw(Exporter);
78    
79     our @EXPORT = qw(encode_cbor decode_cbor);
80    
81     use Exporter;
82     use XSLoader;
83    
84 root 1.6 use Types::Serialiser;
85    
86 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
87    
88 root 1.1 =head1 FUNCTIONAL INTERFACE
89    
90     The following convenience methods are provided by this module. They are
91     exported by default:
92    
93     =over 4
94    
95     =item $cbor_data = encode_cbor $perl_scalar
96    
97     Converts the given Perl data structure to CBOR representation. Croaks on
98     error.
99    
100     =item $perl_scalar = decode_cbor $cbor_data
101    
102     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
103     returning the resulting perl scalar. Croaks on error.
104    
105     =back
106    
107    
108     =head1 OBJECT-ORIENTED INTERFACE
109    
110     The object oriented interface lets you configure your own encoding or
111     decoding style, within the limits of supported formats.
112    
113     =over 4
114    
115     =item $cbor = new CBOR::XS
116    
117     Creates a new CBOR::XS object that can be used to de/encode CBOR
118     strings. All boolean flags described below are by default I<disabled>.
119    
120     The mutators for flags all return the CBOR object again and thus calls can
121     be chained:
122    
123     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
124    
125     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
126    
127     =item $max_depth = $cbor->get_max_depth
128    
129     Sets the maximum nesting level (default C<512>) accepted while encoding
130     or decoding. If a higher nesting level is detected in CBOR data or a Perl
131     data structure, then the encoder and decoder will stop and croak at that
132     point.
133    
134     Nesting level is defined by number of hash- or arrayrefs that the encoder
135     needs to traverse to reach a given point or the number of C<{> or C<[>
136     characters without their matching closing parenthesis crossed to reach a
137     given character in a string.
138    
139     Setting the maximum depth to one disallows any nesting, so that ensures
140     that the object is only a single hash/object or array.
141    
142     If no argument is given, the highest possible setting will be used, which
143     is rarely useful.
144    
145     Note that nesting is implemented by recursion in C. The default value has
146     been chosen to be as large as typical operating systems allow without
147     crashing.
148    
149     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
150    
151     =item $cbor = $cbor->max_size ([$maximum_string_size])
152    
153     =item $max_size = $cbor->get_max_size
154    
155     Set the maximum length a CBOR string may have (in bytes) where decoding
156     is being attempted. The default is C<0>, meaning no limit. When C<decode>
157     is called on a string that is longer then this many bytes, it will not
158     attempt to decode the string but throw an exception. This setting has no
159     effect on C<encode> (yet).
160    
161     If no argument is given, the limit check will be deactivated (same as when
162     C<0> is specified).
163    
164     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
165    
166 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
167    
168     =item $enabled = $cbor->get_allow_unknown
169    
170     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
171     exception when it encounters values it cannot represent in CBOR (for
172     example, filehandles) but instead will encode a CBOR C<error> value.
173    
174     If C<$enable> is false (the default), then C<encode> will throw an
175     exception when it encounters anything it cannot encode as CBOR.
176    
177     This option does not affect C<decode> in any way, and it is recommended to
178     leave it off unless you know your communications partner.
179    
180 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
181 root 1.19
182 root 1.20 =item $enabled = $cbor->get_allow_sharing
183 root 1.19
184     If C<$enable> is true (or missing), then C<encode> will not double-encode
185 root 1.20 values that have been referenced before (e.g. when the same object, such
186     as an array, is referenced multiple times), but instead will emit a
187     reference to the earlier value.
188 root 1.19
189     This means that such values will only be encoded once, and will not result
190     in a deep cloning of the value on decode, in decoders supporting the value
191 root 1.25 sharing extension. This also makes it possible to encode cyclic data
192     structures.
193 root 1.19
194 root 1.21 It is recommended to leave it off unless you know your
195     communication partner supports the value sharing extensions to CBOR
196 root 1.26 (L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
197 root 1.25 resulting data structure might be unusable.
198 root 1.21
199 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
200     that have a reference counter large than one, and might unnecessarily
201     increase the encoded size, as potentially shared values are encode as
202     sharable whether or not they are actually shared.
203    
204 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
205     arrays or hashes pointed to by a reference). Weirder constructs, such as
206     an array with multiple "copies" of the I<same> string, which are hard but
207     not impossible to create in Perl, are not supported (this is the same as
208 root 1.25 with L<Storable>).
209 root 1.19
210 root 1.25 If C<$enable> is false (the default), then C<encode> will encode shared
211     data structures repeatedly, unsharing them in the process. Cyclic data
212     structures cannot be encoded in this mode.
213 root 1.19
214     This option does not affect C<decode> in any way - shared values and
215 root 1.21 references will always be decoded properly if present.
216    
217 root 1.25 =item $cbor = $cbor->pack_strings ([$enable])
218 root 1.21
219 root 1.25 =item $enabled = $cbor->get_pack_strings
220 root 1.21
221     If C<$enable> is true (or missing), then C<encode> will try not to encode
222     the same string twice, but will instead encode a reference to the string
223 root 1.25 instead. Depending on your data format, this can save a lot of space, but
224 root 1.21 also results in a very large runtime overhead (expect encoding times to be
225     2-4 times as high as without).
226    
227     It is recommended to leave it off unless you know your
228     communications partner supports the stringref extension to CBOR
229 root 1.26 (L<http://cbor.schmorp.de/stringref>), as without decoder support, the
230 root 1.25 resulting data structure might not be usable.
231 root 1.21
232 root 1.25 If C<$enable> is false (the default), then C<encode> will encode strings
233     the standard CBOR way.
234 root 1.21
235     This option does not affect C<decode> in any way - string references will
236     always be decoded properly if present.
237 root 1.19
238 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
239    
240     =item $cb_or_undef = $cbor->get_filter
241    
242 root 1.24 Sets or replaces the tagged value decoding filter (when C<$cb> is
243     specified) or clears the filter (if no argument or C<undef> is provided).
244    
245     The filter callback is called only during decoding, when a non-enforced
246     tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
247     list of enforced tags). For specific tags, it's often better to provide a
248     default converter using the C<%CBOR::XS::FILTER> hash (see below).
249    
250     The first argument is the numerical tag, the second is the (decoded) value
251     that has been tagged.
252    
253     The filter function should return either exactly one value, which will
254     replace the tagged value in the decoded data structure, or no values,
255     which will result in default handling, which currently means the decoder
256     creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
257    
258     When the filter is cleared (the default state), the default filter
259     function, C<CBOR::XS::default_filter>, is used. This function simply looks
260     up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
261     a code reference that is called with tag and value, and is responsible for
262     decoding the value. If no entry exists, it returns no values.
263    
264     Example: decode all tags not handled internally into CBOR::XS::Tagged
265     objects, with no other special handling (useful when working with
266     potentially "unsafe" CBOR data).
267    
268     CBOR::XS->new->filter (sub { })->decode ($cbor_data);
269    
270     Example: provide a global filter for tag 1347375694, converting the value
271     into some string form.
272    
273     $CBOR::XS::FILTER{1347375694} = sub {
274     my ($tag, $value);
275    
276     "tag 1347375694 value $value"
277     };
278 root 1.23
279 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
280    
281     Converts the given Perl data structure (a scalar value) to its CBOR
282     representation.
283    
284     =item $perl_scalar = $cbor->decode ($cbor_data)
285    
286     The opposite of C<encode>: expects CBOR data and tries to parse it,
287     returning the resulting simple scalar or reference. Croaks on error.
288    
289     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
290    
291     This works like the C<decode> method, but instead of raising an exception
292     when there is trailing garbage after the CBOR string, it will silently
293     stop parsing there and return the number of characters consumed so far.
294    
295     This is useful if your CBOR texts are not delimited by an outer protocol
296     and you need to know where the first CBOR string ends amd the next one
297     starts.
298    
299     CBOR::XS->new->decode_prefix ("......")
300     => ("...", 3)
301    
302     =back
303    
304    
305     =head1 MAPPING
306    
307     This section describes how CBOR::XS maps Perl values to CBOR values and
308     vice versa. These mappings are designed to "do the right thing" in most
309     circumstances automatically, preserving round-tripping characteristics
310     (what you put in comes out as something equivalent).
311    
312     For the more enlightened: note that in the following descriptions,
313     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
314     refers to the abstract Perl language itself.
315    
316    
317     =head2 CBOR -> PERL
318    
319     =over 4
320    
321 root 1.4 =item integers
322    
323     CBOR integers become (numeric) perl scalars. On perls without 64 bit
324     support, 64 bit integers will be truncated or otherwise corrupted.
325    
326     =item byte strings
327    
328 root 1.27 Byte strings will become octet strings in Perl (the Byte values 0..255
329 root 1.4 will simply become characters of the same value in Perl).
330    
331     =item UTF-8 strings
332    
333     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
334     decoded into proper Unicode code points. At the moment, the validity of
335     the UTF-8 octets will not be validated - corrupt input will result in
336     corrupted Perl strings.
337    
338     =item arrays, maps
339    
340     CBOR arrays and CBOR maps will be converted into references to a Perl
341     array or hash, respectively. The keys of the map will be stringified
342     during this process.
343    
344 root 1.6 =item null
345    
346     CBOR null becomes C<undef> in Perl.
347    
348     =item true, false, undefined
349 root 1.1
350 root 1.6 These CBOR values become C<Types:Serialiser::true>,
351     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
352 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
353 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
354     error). See the L<Types::Serialiser> manpage for details.
355    
356 root 1.23 =item tagged values
357 root 1.1
358 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
359 root 1.4
360 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
361     for details.
362 root 1.4
363     =item anything else
364    
365     Anything else (e.g. unsupported simple values) will raise a decoding
366     error.
367 root 1.1
368     =back
369    
370    
371     =head2 PERL -> CBOR
372    
373     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
374     truly typeless language, so we can only guess which CBOR type is meant by
375     a Perl value.
376    
377     =over 4
378    
379     =item hash references
380    
381 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
382     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
383     order.
384    
385     Currently, tied hashes will use the indefinite-length format, while normal
386     hashes will use the fixed-length format.
387 root 1.1
388     =item array references
389    
390 root 1.4 Perl array references become fixed-length CBOR arrays.
391 root 1.1
392     =item other references
393    
394     Other unblessed references are generally not allowed and will cause an
395     exception to be thrown, except for references to the integers C<0> and
396 root 1.4 C<1>, which get turned into false and true in CBOR.
397    
398     =item CBOR::XS::Tagged objects
399    
400     Objects of this type must be arrays consisting of a single C<[tag, value]>
401 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
402     be encoded as appropriate for the value. You cna use C<CBOR::XS::tag> to
403     create such objects.
404 root 1.1
405 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
406 root 1.1
407 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
408     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
409     if you want.
410 root 1.1
411 root 1.7 =item other blessed objects
412 root 1.1
413 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
414 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
415     module, and L<OBJECT SERIALISATION> for generic object serialisation.
416 root 1.1
417     =item simple scalars
418    
419     Simple Perl scalars (any scalar that is not a reference) are the most
420     difficult objects to encode: CBOR::XS will encode undefined scalars as
421 root 1.4 CBOR null values, scalars that have last been used in a string context
422 root 1.1 before encoding as CBOR strings, and anything else as number value:
423    
424     # dump as number
425     encode_cbor [2] # yields [2]
426     encode_cbor [-3.0e17] # yields [-3e+17]
427     my $value = 5; encode_cbor [$value] # yields [5]
428    
429 root 1.27 # used as string, so dump as string (either byte or text)
430 root 1.1 print $value;
431     encode_cbor [$value] # yields ["5"]
432    
433     # undef becomes null
434     encode_cbor [undef] # yields [null]
435    
436     You can force the type to be a CBOR string by stringifying it:
437    
438     my $x = 3.1; # some variable containing a number
439     "$x"; # stringified
440     $x .= ""; # another, more awkward way to stringify
441     print $x; # perl does it for you, too, quite often
442    
443 root 1.27 You can force whether a string ie encoded as byte or text string by using
444     C<utf8::upgrade> and C<utf8::downgrade>):
445    
446     utf8::upgrade $x; # encode $x as text string
447     utf8::downgrade $x; # encode $x as byte string
448    
449     Perl doesn't define what operations up- and downgrade strings, so if the
450     difference between byte and text is important, you should up- or downgrade
451     your string as late as possible before encoding.
452    
453 root 1.1 You can force the type to be a CBOR number by numifying it:
454    
455     my $x = "3"; # some variable containing a string
456     $x += 0; # numify it, ensuring it will be dumped as a number
457     $x *= 1; # same thing, the choice is yours.
458    
459     You can not currently force the type in other, less obscure, ways. Tell me
460     if you need this capability (but don't forget to explain why it's needed
461     :).
462    
463 root 1.4 Perl values that seem to be integers generally use the shortest possible
464     representation. Floating-point values will use either the IEEE single
465     format if possible without loss of precision, otherwise the IEEE double
466     format will be used. Perls that use formats other than IEEE double to
467     represent numerical values are supported, but might suffer loss of
468     precision.
469 root 1.1
470     =back
471    
472 root 1.7 =head2 OBJECT SERIALISATION
473    
474     This module knows two way to serialise a Perl object: The CBOR-specific
475     way, and the generic way.
476    
477     Whenever the encoder encounters a Perl object that it cnanot serialise
478     directly (most of them), it will first look up the C<TO_CBOR> method on
479     it.
480    
481     If it has a C<TO_CBOR> method, it will call it with the object as only
482     argument, and expects exactly one return value, which it will then
483     substitute and encode it in the place of the object.
484    
485     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
486     call it with the object as first argument, and the constant string C<CBOR>
487     as the second argument, to distinguish it from other serialisers.
488    
489     The C<FREEZE> method can return any number of values (i.e. zero or
490     more). These will be encoded as CBOR perl object, together with the
491     classname.
492    
493     If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
494     with an error.
495    
496     Objects encoded via C<TO_CBOR> cannot be automatically decoded, but
497     objects encoded via C<FREEZE> can be decoded using the following protocol:
498    
499     When an encoded CBOR perl object is encountered by the decoder, it will
500     look up the C<THAW> method, by using the stored classname, and will fail
501     if the method cannot be found.
502    
503     After the lookup it will call the C<THAW> method with the stored classname
504     as first argument, the constant string C<CBOR> as second argument, and all
505     values returned by C<FREEZE> as remaining arguments.
506    
507     =head4 EXAMPLES
508    
509     Here is an example C<TO_CBOR> method:
510    
511     sub My::Object::TO_CBOR {
512     my ($obj) = @_;
513    
514     ["this is a serialised My::Object object", $obj->{id}]
515     }
516    
517     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
518     array with two members: a string, and the "object id". Decoding this CBOR
519     string will yield a normal perl array reference in place of the object.
520    
521     A more useful and practical example would be a serialisation method for
522     the URI module. CBOR has a custom tag value for URIs, namely 32:
523    
524     sub URI::TO_CBOR {
525     my ($self) = @_;
526     my $uri = "$self"; # stringify uri
527     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
528     CBOR::XS::tagged 32, "$_[0]"
529     }
530    
531     This will encode URIs as a UTF-8 string with tag 32, which indicates an
532     URI.
533    
534     Decoding such an URI will not (currently) give you an URI object, but
535     instead a CBOR::XS::Tagged object with tag number 32 and the string -
536     exactly what was returned by C<TO_CBOR>.
537    
538     To serialise an object so it can automatically be deserialised, you need
539     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
540     would be a possible implementation:
541    
542     sub URI::FREEZE {
543     my ($self, $serialiser) = @_;
544     "$self" # encode url string
545     }
546    
547     sub URI::THAW {
548     my ($class, $serialiser, $uri) = @_;
549    
550     $class->new ($uri)
551     }
552    
553     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
554     example, a C<FREEZE> method that returns "type", "id" and "variant" values
555     would cause an invocation of C<THAW> with 5 arguments:
556    
557     sub My::Object::FREEZE {
558     my ($self, $serialiser) = @_;
559    
560     ($self->{type}, $self->{id}, $self->{variant})
561     }
562    
563     sub My::Object::THAW {
564     my ($class, $serialiser, $type, $id, $variant) = @_;
565    
566     $class-<new (type => $type, id => $id, variant => $variant)
567     }
568    
569 root 1.1
570 root 1.7 =head1 MAGIC HEADER
571 root 1.3
572     There is no way to distinguish CBOR from other formats
573     programmatically. To make it easier to distinguish CBOR from other
574     formats, the CBOR specification has a special "magic string" that can be
575 root 1.18 prepended to any CBOR string without changing its meaning.
576 root 1.3
577     This string is available as C<$CBOR::XS::MAGIC>. This module does not
578 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
579 root 1.3 if present, so users can prepend this string as a "file type" indicator as
580     required.
581    
582    
583 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
584    
585     CBOR has the concept of tagged values - any CBOR value can be tagged with
586     a numeric 64 bit number, which are centrally administered.
587    
588     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
589     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
590     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
591     unknown tag.
592    
593     These objects are simply blessed array references - the first member of
594     the array being the numerical tag, the second being the value.
595    
596     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
597    
598     =over 4
599    
600     =item $tagged = CBOR::XS::tag $tag, $value
601    
602     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
603     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
604     value that can be encoded in CBOR, including serialisable Perl objects and
605     C<CBOR::XS::Tagged> objects).
606    
607     =item $tagged->[0]
608    
609     =item $tagged->[0] = $new_tag
610    
611     =item $tag = $tagged->tag
612    
613     =item $new_tag = $tagged->tag ($new_tag)
614    
615     Access/mutate the tag.
616    
617     =item $tagged->[1]
618    
619     =item $tagged->[1] = $new_value
620    
621     =item $value = $tagged->value
622    
623     =item $new_value = $tagged->value ($new_value)
624    
625     Access/mutate the tagged value.
626    
627     =back
628    
629     =cut
630    
631     sub tag($$) {
632     bless [@_], CBOR::XS::Tagged::;
633     }
634    
635     sub CBOR::XS::Tagged::tag {
636     $_[0][0] = $_[1] if $#_;
637     $_[0][0]
638     }
639    
640     sub CBOR::XS::Tagged::value {
641     $_[0][1] = $_[1] if $#_;
642     $_[0][1]
643     }
644    
645 root 1.13 =head2 EXAMPLES
646    
647     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
648    
649     You can look up CBOR tag value and emanings in the IANA registry at
650     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
651    
652     Prepend a magic header (C<$CBOR::XS::MAGIC>):
653    
654     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
655     # same as:
656     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
657    
658     Serialise some URIs and a regex in an array:
659    
660     my $cbor = encode_cbor [
661     (CBOR::XS::tag 32, "http://www.nethype.de/"),
662     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
663     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
664     ];
665    
666     Wrap CBOR data in CBOR:
667    
668     my $cbor_cbor = encode_cbor
669     CBOR::XS::tag 24,
670     encode_cbor [1, 2, 3];
671    
672 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
673    
674 root 1.22 This section describes how this module handles specific tagged values
675     and extensions. If a tag is not mentioned here and no additional filters
676     are provided for it, then the default handling applies (creating a
677     CBOR::XS::Tagged object on decoding, and only encoding the tag when
678     explicitly requested).
679 root 1.19
680 root 1.23 Tags not handled specifically are currently converted into a
681     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
682     consisting of the numeric tag value followed by the (decoded) CBOR value.
683    
684 root 1.19 Future versions of this module reserve the right to special case
685 root 1.22 additional tags (such as base64url).
686    
687     =head2 ENFORCED TAGS
688    
689     These tags are always handled when decoding, and their handling cannot be
690     overriden by the user.
691 root 1.19
692     =over 4
693    
694 root 1.26 =item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
695 root 1.19
696 root 1.23 These tags are automatically created (and decoded) for serialisable
697     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
698     serialisation protocol). See L<OBJECT SERIALISATION> for details.
699 root 1.19
700 root 1.26 =item 28, 29 (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
701 root 1.19
702     These tags are automatically decoded when encountered, resulting in
703     shared values in the decoded object. They are only encoded, however, when
704     C<allow_sharable> is enabled.
705    
706 root 1.26 =item 256, 25 (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
707 root 1.21
708     These tags are automatically decoded when encountered. They are only
709 root 1.25 encoded, however, when C<pack_strings> is enabled.
710 root 1.21
711 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
712    
713     This tag is automatically generated when a reference are encountered (with
714     the exception of hash and array refernces). It is converted to a reference
715     when decoding.
716    
717     =item 55799 (self-describe CBOR, RFC 7049)
718    
719     This value is not generated on encoding (unless explicitly requested by
720     the user), and is simply ignored when decoding.
721    
722     =back
723    
724 root 1.24 =head2 NON-ENFORCED TAGS
725 root 1.22
726     These tags have default filters provided when decoding. Their handling can
727     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
728 root 1.24 providing a custom C<filter> callback when decoding.
729 root 1.22
730     When they result in decoding into a specific Perl class, the module
731     usually provides a corresponding C<TO_CBOR> method as well.
732    
733     When any of these need to load additional modules that are not part of the
734     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
735     provide these modules. The decoding usually fails with an exception if the
736     required module cannot be loaded.
737    
738     =over 4
739    
740     =item 2, 3 (positive/negative bignum)
741    
742     These tags are decoded into L<Math::BigInt> objects. The corresponding
743     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
744     integers, and others into positive/negative CBOR bignums.
745    
746     =item 4, 5 (decimal fraction/bigfloat)
747    
748     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
749     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
750     encodes into a decimal fraction.
751    
752     CBOR cannot represent bigfloats with I<very> large exponents - conversion
753     of such big float objects is undefined.
754    
755     Also, NaN and infinities are not encoded properly.
756    
757     =item 21, 22, 23 (expected later JSON conversion)
758    
759     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
760     tags.
761    
762     =item 32 (URI)
763    
764     These objects decode into L<URI> objects. The corresponding
765     C<URI::TO_CBOR> method again results in a CBOR URI value.
766    
767     =back
768    
769     =cut
770    
771     our %FILTER = (
772     # 0 # rfc4287 datetime, utf-8
773     # 1 # unix timestamp, any
774    
775     2 => sub { # pos bigint
776     require Math::BigInt;
777     Math::BigInt->new ("0x" . unpack "H*", pop)
778     },
779    
780     3 => sub { # neg bigint
781     require Math::BigInt;
782     -Math::BigInt->new ("0x" . unpack "H*", pop)
783     },
784    
785     4 => sub { # decimal fraction, array
786     require Math::BigFloat;
787     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
788     },
789    
790     5 => sub { # bigfloat, array
791     require Math::BigFloat;
792     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
793     },
794    
795     21 => sub { pop }, # expected conversion to base64url encoding
796     22 => sub { pop }, # expected conversion to base64 encoding
797     23 => sub { pop }, # expected conversion to base16 encoding
798    
799     # 24 # embedded cbor, byte string
800    
801     32 => sub {
802     require URI;
803     URI->new (pop)
804     },
805    
806     # 33 # base64url rfc4648, utf-8
807     # 34 # base64 rfc46484, utf-8
808     # 35 # regex pcre/ecma262, utf-8
809     # 36 # mime message rfc2045, utf-8
810     );
811    
812 root 1.19
813 root 1.7 =head1 CBOR and JSON
814 root 1.1
815 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
816     with some coercion, able to represent all JSON texts (something that other
817     "binary JSON" formats such as BSON generally do not support).
818    
819     CBOR implements some extra hints and support for JSON interoperability,
820     and the spec offers further guidance for conversion between CBOR and
821     JSON. None of this is currently implemented in CBOR, and the guidelines
822     in the spec do not result in correct round-tripping of data. If JSON
823     interoperability is improved in the future, then the goal will be to
824     ensure that decoded JSON data will round-trip encoding and decoding to
825     CBOR intact.
826 root 1.1
827    
828     =head1 SECURITY CONSIDERATIONS
829    
830     When you are using CBOR in a protocol, talking to untrusted potentially
831     hostile creatures requires relatively few measures.
832    
833     First of all, your CBOR decoder should be secure, that is, should not have
834     any buffer overflows. Obviously, this module should ensure that and I am
835     trying hard on making that true, but you never know.
836    
837     Second, you need to avoid resource-starving attacks. That means you should
838     limit the size of CBOR data you accept, or make sure then when your
839     resources run out, that's just fine (e.g. by using a separate process that
840     can crash safely). The size of a CBOR string in octets is usually a good
841     indication of the size of the resources required to decode it into a Perl
842     structure. While CBOR::XS can check the size of the CBOR text, it might be
843     too late when you already have it in memory, so you might want to check
844     the size before you accept the string.
845    
846     Third, CBOR::XS recurses using the C stack when decoding objects and
847     arrays. The C stack is a limited resource: for instance, on my amd64
848     machine with 8MB of stack size I can decode around 180k nested arrays but
849     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
850     to free the temporary). If that is exceeded, the program crashes. To be
851     conservative, the default nesting limit is set to 512. If your process
852     has a smaller stack, you should adjust this setting accordingly with the
853     C<max_depth> method.
854    
855     Something else could bomb you, too, that I forgot to think of. In that
856     case, you get to keep the pieces. I am always open for hints, though...
857    
858     Also keep in mind that CBOR::XS might leak contents of your Perl data
859     structures in its error messages, so when you serialise sensitive
860     information you might want to make sure that exceptions thrown by CBOR::XS
861     will not end up in front of untrusted eyes.
862    
863     =head1 CBOR IMPLEMENTATION NOTES
864    
865     This section contains some random implementation notes. They do not
866     describe guaranteed behaviour, but merely behaviour as-is implemented
867     right now.
868    
869     64 bit integers are only properly decoded when Perl was built with 64 bit
870     support.
871    
872     Strings and arrays are encoded with a definite length. Hashes as well,
873     unless they are tied (or otherwise magical).
874    
875     Only the double data type is supported for NV data types - when Perl uses
876     long double to represent floating point values, they might not be encoded
877     properly. Half precision types are accepted, but not encoded.
878    
879     Strict mode and canonical mode are not implemented.
880    
881    
882     =head1 THREADS
883    
884     This module is I<not> guaranteed to be thread safe and there are no
885     plans to change this until Perl gets thread support (as opposed to the
886     horribly slow so-called "threads" which are simply slow and bloated
887     process simulations - use fork, it's I<much> faster, cheaper, better).
888    
889     (It might actually work, but you have been warned).
890    
891    
892     =head1 BUGS
893    
894     While the goal of this module is to be correct, that unfortunately does
895     not mean it's bug-free, only that I think its design is bug-free. If you
896     keep reporting bugs they will be fixed swiftly, though.
897    
898     Please refrain from using rt.cpan.org or any other bug reporting
899     service. I put the contact address into my modules for a reason.
900    
901     =cut
902    
903 root 1.22 our %FILTER = (
904     # 0 # rfc4287 datetime, utf-8
905     # 1 # unix timestamp, any
906    
907     2 => sub { # pos bigint
908     require Math::BigInt;
909     Math::BigInt->new ("0x" . unpack "H*", pop)
910     },
911    
912     3 => sub { # neg bigint
913     require Math::BigInt;
914     -Math::BigInt->new ("0x" . unpack "H*", pop)
915     },
916    
917     4 => sub { # decimal fraction, array
918     require Math::BigFloat;
919     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
920     },
921    
922     5 => sub { # bigfloat, array
923     require Math::BigFloat;
924     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
925     },
926    
927     21 => sub { pop }, # expected conversion to base64url encoding
928     22 => sub { pop }, # expected conversion to base64 encoding
929     23 => sub { pop }, # expected conversion to base16 encoding
930    
931     # 24 # embedded cbor, byte string
932    
933     32 => sub {
934     require URI;
935     URI->new (pop)
936     },
937    
938     # 33 # base64url rfc4648, utf-8
939     # 34 # base64 rfc46484, utf-8
940     # 35 # regex pcre/ecma262, utf-8
941     # 36 # mime message rfc2045, utf-8
942     );
943    
944     sub CBOR::XS::default_filter {
945     &{ $FILTER{$_[0]} or return }
946     }
947    
948     sub URI::TO_CBOR {
949     my $uri = $_[0]->as_string;
950     utf8::upgrade $uri;
951     CBOR::XS::tag 32, $uri
952     }
953    
954     sub Math::BigInt::TO_CBOR {
955     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
956     $_[0]->numify
957     } else {
958     my $hex = substr $_[0]->as_hex, 2;
959     $hex = "0$hex" if 1 & length $hex; # sigh
960     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
961     }
962     }
963    
964     sub Math::BigFloat::TO_CBOR {
965     my ($m, $e) = $_[0]->parts;
966     CBOR::XS::tag 4, [$e->numify, $m]
967     }
968    
969 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
970    
971     =head1 SEE ALSO
972    
973     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
974     serialisation.
975    
976 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
977     and error values.
978    
979 root 1.1 =head1 AUTHOR
980    
981     Marc Lehmann <schmorp@schmorp.de>
982     http://home.schmorp.de/
983    
984     =cut
985    
986 root 1.6 1
987