ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.31
Committed: Sat Nov 30 18:13:53 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.30: +38 -8 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.5 This module converts Perl data structures to the Concise Binary Object
32     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
33 root 1.28 format that aims to use an (almost) superset of the JSON data model, i.e.
34     when you can represent something useful in JSON, you should be able to
35     represent it in CBOR.
36 root 1.1
37 root 1.28 In short, CBOR is a faster and quite compact binary alternative to JSON,
38 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
39     often compresses better than CBOR though, so if you plan to compress the
40 root 1.28 data later and speed is less important you might want to compare both
41     formats first).
42 root 1.5
43 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
44     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
45     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
46     data, the worse L<Storable> performs in comparison.
47    
48 root 1.28 Regarding compactness, C<CBOR::XS>-encoded data structures are usually
49     about 20% smaller than the same data encoded as (compact) JSON or
50     L<Storable>.
51    
52     In addition to the core CBOR data format, this module implements a
53 root 1.31 number of extensions, to support cyclic and shared data structures
54     (see C<allow_sharing> and C<allow_cycles>), string deduplication (see
55     C<pack_strings>) and scalar references (always enabled).
56 root 1.21
57 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
58     is to be I<fast>. To reach the latter goal it was written in C.
59 root 1.1
60     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
61     vice versa.
62    
63     =cut
64    
65     package CBOR::XS;
66    
67     use common::sense;
68    
69 root 1.28 our $VERSION = '1.0';
70 root 1.1 our @ISA = qw(Exporter);
71    
72     our @EXPORT = qw(encode_cbor decode_cbor);
73    
74     use Exporter;
75     use XSLoader;
76    
77 root 1.6 use Types::Serialiser;
78    
79 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
80    
81 root 1.1 =head1 FUNCTIONAL INTERFACE
82    
83     The following convenience methods are provided by this module. They are
84     exported by default:
85    
86     =over 4
87    
88     =item $cbor_data = encode_cbor $perl_scalar
89    
90     Converts the given Perl data structure to CBOR representation. Croaks on
91     error.
92    
93     =item $perl_scalar = decode_cbor $cbor_data
94    
95     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
96     returning the resulting perl scalar. Croaks on error.
97    
98     =back
99    
100    
101     =head1 OBJECT-ORIENTED INTERFACE
102    
103     The object oriented interface lets you configure your own encoding or
104     decoding style, within the limits of supported formats.
105    
106     =over 4
107    
108     =item $cbor = new CBOR::XS
109    
110     Creates a new CBOR::XS object that can be used to de/encode CBOR
111     strings. All boolean flags described below are by default I<disabled>.
112    
113     The mutators for flags all return the CBOR object again and thus calls can
114     be chained:
115    
116     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
117    
118     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
119    
120     =item $max_depth = $cbor->get_max_depth
121    
122     Sets the maximum nesting level (default C<512>) accepted while encoding
123     or decoding. If a higher nesting level is detected in CBOR data or a Perl
124     data structure, then the encoder and decoder will stop and croak at that
125     point.
126    
127     Nesting level is defined by number of hash- or arrayrefs that the encoder
128     needs to traverse to reach a given point or the number of C<{> or C<[>
129     characters without their matching closing parenthesis crossed to reach a
130     given character in a string.
131    
132     Setting the maximum depth to one disallows any nesting, so that ensures
133     that the object is only a single hash/object or array.
134    
135     If no argument is given, the highest possible setting will be used, which
136     is rarely useful.
137    
138     Note that nesting is implemented by recursion in C. The default value has
139     been chosen to be as large as typical operating systems allow without
140     crashing.
141    
142     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
143    
144     =item $cbor = $cbor->max_size ([$maximum_string_size])
145    
146     =item $max_size = $cbor->get_max_size
147    
148     Set the maximum length a CBOR string may have (in bytes) where decoding
149     is being attempted. The default is C<0>, meaning no limit. When C<decode>
150     is called on a string that is longer then this many bytes, it will not
151     attempt to decode the string but throw an exception. This setting has no
152     effect on C<encode> (yet).
153    
154     If no argument is given, the limit check will be deactivated (same as when
155     C<0> is specified).
156    
157     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
158    
159 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
160    
161     =item $enabled = $cbor->get_allow_unknown
162    
163     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
164     exception when it encounters values it cannot represent in CBOR (for
165     example, filehandles) but instead will encode a CBOR C<error> value.
166    
167     If C<$enable> is false (the default), then C<encode> will throw an
168     exception when it encounters anything it cannot encode as CBOR.
169    
170     This option does not affect C<decode> in any way, and it is recommended to
171     leave it off unless you know your communications partner.
172    
173 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
174 root 1.19
175 root 1.20 =item $enabled = $cbor->get_allow_sharing
176 root 1.19
177     If C<$enable> is true (or missing), then C<encode> will not double-encode
178 root 1.20 values that have been referenced before (e.g. when the same object, such
179     as an array, is referenced multiple times), but instead will emit a
180     reference to the earlier value.
181 root 1.19
182     This means that such values will only be encoded once, and will not result
183     in a deep cloning of the value on decode, in decoders supporting the value
184 root 1.25 sharing extension. This also makes it possible to encode cyclic data
185 root 1.31 structures (which need C<allow_cycles> to ne enabled to be decoded by this
186     module).
187 root 1.19
188 root 1.21 It is recommended to leave it off unless you know your
189     communication partner supports the value sharing extensions to CBOR
190 root 1.26 (L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
191 root 1.25 resulting data structure might be unusable.
192 root 1.21
193 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
194     that have a reference counter large than one, and might unnecessarily
195     increase the encoded size, as potentially shared values are encode as
196 root 1.31 shareable whether or not they are actually shared.
197 root 1.19
198 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
199     arrays or hashes pointed to by a reference). Weirder constructs, such as
200     an array with multiple "copies" of the I<same> string, which are hard but
201     not impossible to create in Perl, are not supported (this is the same as
202 root 1.25 with L<Storable>).
203 root 1.19
204 root 1.25 If C<$enable> is false (the default), then C<encode> will encode shared
205     data structures repeatedly, unsharing them in the process. Cyclic data
206     structures cannot be encoded in this mode.
207 root 1.19
208     This option does not affect C<decode> in any way - shared values and
209 root 1.21 references will always be decoded properly if present.
210    
211 root 1.31 =item $cbor = $cbor->allow_cycles ([$enable])
212    
213     =item $enabled = $cbor->get_allow_cycles
214    
215     If C<$enable> is true (or missing), then C<decode> will happily decode
216     self-referential (cyclic) data structures. By default these will not be
217     decoded, as they need manual cleanup to avoid memory leaks, so code that
218     isn't prepared for this will not leak memory.
219    
220     If C<$enable> is false (the default), then C<decode> will throw an error
221     when it encounters a self-referential/cyclic data structure.
222    
223     This option does not affect C<encode> in any way - shared values and
224     references will always be decoded properly if present.
225    
226 root 1.25 =item $cbor = $cbor->pack_strings ([$enable])
227 root 1.21
228 root 1.25 =item $enabled = $cbor->get_pack_strings
229 root 1.21
230     If C<$enable> is true (or missing), then C<encode> will try not to encode
231     the same string twice, but will instead encode a reference to the string
232 root 1.25 instead. Depending on your data format, this can save a lot of space, but
233 root 1.21 also results in a very large runtime overhead (expect encoding times to be
234     2-4 times as high as without).
235    
236     It is recommended to leave it off unless you know your
237     communications partner supports the stringref extension to CBOR
238 root 1.26 (L<http://cbor.schmorp.de/stringref>), as without decoder support, the
239 root 1.25 resulting data structure might not be usable.
240 root 1.21
241 root 1.25 If C<$enable> is false (the default), then C<encode> will encode strings
242     the standard CBOR way.
243 root 1.21
244     This option does not affect C<decode> in any way - string references will
245     always be decoded properly if present.
246 root 1.19
247 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
248    
249     =item $cb_or_undef = $cbor->get_filter
250    
251 root 1.24 Sets or replaces the tagged value decoding filter (when C<$cb> is
252     specified) or clears the filter (if no argument or C<undef> is provided).
253    
254     The filter callback is called only during decoding, when a non-enforced
255     tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
256     list of enforced tags). For specific tags, it's often better to provide a
257     default converter using the C<%CBOR::XS::FILTER> hash (see below).
258    
259     The first argument is the numerical tag, the second is the (decoded) value
260     that has been tagged.
261    
262     The filter function should return either exactly one value, which will
263     replace the tagged value in the decoded data structure, or no values,
264     which will result in default handling, which currently means the decoder
265     creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
266    
267     When the filter is cleared (the default state), the default filter
268     function, C<CBOR::XS::default_filter>, is used. This function simply looks
269     up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
270     a code reference that is called with tag and value, and is responsible for
271     decoding the value. If no entry exists, it returns no values.
272    
273 root 1.28 Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
274 root 1.24 objects, with no other special handling (useful when working with
275     potentially "unsafe" CBOR data).
276    
277     CBOR::XS->new->filter (sub { })->decode ($cbor_data);
278    
279     Example: provide a global filter for tag 1347375694, converting the value
280     into some string form.
281    
282     $CBOR::XS::FILTER{1347375694} = sub {
283     my ($tag, $value);
284    
285     "tag 1347375694 value $value"
286     };
287 root 1.23
288 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
289    
290     Converts the given Perl data structure (a scalar value) to its CBOR
291     representation.
292    
293     =item $perl_scalar = $cbor->decode ($cbor_data)
294    
295     The opposite of C<encode>: expects CBOR data and tries to parse it,
296     returning the resulting simple scalar or reference. Croaks on error.
297    
298     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
299    
300     This works like the C<decode> method, but instead of raising an exception
301     when there is trailing garbage after the CBOR string, it will silently
302     stop parsing there and return the number of characters consumed so far.
303    
304     This is useful if your CBOR texts are not delimited by an outer protocol
305     and you need to know where the first CBOR string ends amd the next one
306     starts.
307    
308     CBOR::XS->new->decode_prefix ("......")
309     => ("...", 3)
310    
311     =back
312    
313    
314     =head1 MAPPING
315    
316     This section describes how CBOR::XS maps Perl values to CBOR values and
317     vice versa. These mappings are designed to "do the right thing" in most
318     circumstances automatically, preserving round-tripping characteristics
319     (what you put in comes out as something equivalent).
320    
321     For the more enlightened: note that in the following descriptions,
322     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
323     refers to the abstract Perl language itself.
324    
325    
326     =head2 CBOR -> PERL
327    
328     =over 4
329    
330 root 1.4 =item integers
331    
332     CBOR integers become (numeric) perl scalars. On perls without 64 bit
333     support, 64 bit integers will be truncated or otherwise corrupted.
334    
335     =item byte strings
336    
337 root 1.27 Byte strings will become octet strings in Perl (the Byte values 0..255
338 root 1.4 will simply become characters of the same value in Perl).
339    
340     =item UTF-8 strings
341    
342     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
343     decoded into proper Unicode code points. At the moment, the validity of
344     the UTF-8 octets will not be validated - corrupt input will result in
345     corrupted Perl strings.
346    
347     =item arrays, maps
348    
349     CBOR arrays and CBOR maps will be converted into references to a Perl
350     array or hash, respectively. The keys of the map will be stringified
351     during this process.
352    
353 root 1.6 =item null
354    
355     CBOR null becomes C<undef> in Perl.
356    
357     =item true, false, undefined
358 root 1.1
359 root 1.6 These CBOR values become C<Types:Serialiser::true>,
360     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
361 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
362 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
363     error). See the L<Types::Serialiser> manpage for details.
364    
365 root 1.23 =item tagged values
366 root 1.1
367 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
368 root 1.4
369 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
370 root 1.28 for details on which tags are handled how.
371 root 1.4
372     =item anything else
373    
374     Anything else (e.g. unsupported simple values) will raise a decoding
375     error.
376 root 1.1
377     =back
378    
379    
380     =head2 PERL -> CBOR
381    
382     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
383 root 1.28 typeless language. That means this module can only guess which CBOR type
384     is meant by a perl value.
385 root 1.1
386     =over 4
387    
388     =item hash references
389    
390 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
391     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
392 root 1.28 order. This order can be different each time a hahs is encoded.
393 root 1.4
394     Currently, tied hashes will use the indefinite-length format, while normal
395     hashes will use the fixed-length format.
396 root 1.1
397     =item array references
398    
399 root 1.4 Perl array references become fixed-length CBOR arrays.
400 root 1.1
401     =item other references
402    
403 root 1.28 Other unblessed references will be represented using
404     the indirection tag extension (tag value C<22098>,
405     L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
406     to be able to decode these values somehow, by either "doing the right
407     thing", decoding into a generic tagged object, simply ignoring the tag, or
408     something else.
409 root 1.4
410     =item CBOR::XS::Tagged objects
411    
412     Objects of this type must be arrays consisting of a single C<[tag, value]>
413 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
414 root 1.28 be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
415 root 1.13 create such objects.
416 root 1.1
417 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
418 root 1.1
419 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
420     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
421     if you want.
422 root 1.1
423 root 1.7 =item other blessed objects
424 root 1.1
425 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
426 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
427     module, and L<OBJECT SERIALISATION> for generic object serialisation.
428 root 1.1
429     =item simple scalars
430    
431     Simple Perl scalars (any scalar that is not a reference) are the most
432     difficult objects to encode: CBOR::XS will encode undefined scalars as
433 root 1.4 CBOR null values, scalars that have last been used in a string context
434 root 1.1 before encoding as CBOR strings, and anything else as number value:
435    
436     # dump as number
437     encode_cbor [2] # yields [2]
438     encode_cbor [-3.0e17] # yields [-3e+17]
439     my $value = 5; encode_cbor [$value] # yields [5]
440    
441 root 1.27 # used as string, so dump as string (either byte or text)
442 root 1.1 print $value;
443     encode_cbor [$value] # yields ["5"]
444    
445     # undef becomes null
446     encode_cbor [undef] # yields [null]
447    
448     You can force the type to be a CBOR string by stringifying it:
449    
450     my $x = 3.1; # some variable containing a number
451     "$x"; # stringified
452     $x .= ""; # another, more awkward way to stringify
453     print $x; # perl does it for you, too, quite often
454    
455 root 1.27 You can force whether a string ie encoded as byte or text string by using
456     C<utf8::upgrade> and C<utf8::downgrade>):
457    
458     utf8::upgrade $x; # encode $x as text string
459     utf8::downgrade $x; # encode $x as byte string
460    
461     Perl doesn't define what operations up- and downgrade strings, so if the
462     difference between byte and text is important, you should up- or downgrade
463     your string as late as possible before encoding.
464    
465 root 1.1 You can force the type to be a CBOR number by numifying it:
466    
467     my $x = "3"; # some variable containing a string
468     $x += 0; # numify it, ensuring it will be dumped as a number
469     $x *= 1; # same thing, the choice is yours.
470    
471     You can not currently force the type in other, less obscure, ways. Tell me
472     if you need this capability (but don't forget to explain why it's needed
473     :).
474    
475 root 1.4 Perl values that seem to be integers generally use the shortest possible
476     representation. Floating-point values will use either the IEEE single
477     format if possible without loss of precision, otherwise the IEEE double
478     format will be used. Perls that use formats other than IEEE double to
479     represent numerical values are supported, but might suffer loss of
480     precision.
481 root 1.1
482     =back
483    
484 root 1.7 =head2 OBJECT SERIALISATION
485    
486 root 1.29 This module implements both a CBOR-specific and the generic
487     L<Types::Serialier> object serialisation protocol. The following
488     subsections explain both methods.
489    
490     =head3 ENCODING
491    
492 root 1.7 This module knows two way to serialise a Perl object: The CBOR-specific
493     way, and the generic way.
494    
495 root 1.29 Whenever the encoder encounters a Perl object that it cannot serialise
496 root 1.7 directly (most of them), it will first look up the C<TO_CBOR> method on
497     it.
498    
499     If it has a C<TO_CBOR> method, it will call it with the object as only
500     argument, and expects exactly one return value, which it will then
501     substitute and encode it in the place of the object.
502    
503     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
504     call it with the object as first argument, and the constant string C<CBOR>
505     as the second argument, to distinguish it from other serialisers.
506    
507     The C<FREEZE> method can return any number of values (i.e. zero or
508     more). These will be encoded as CBOR perl object, together with the
509     classname.
510    
511 root 1.29 These methods I<MUST NOT> change the data structure that is being
512     serialised. Failure to comply to this can result in memory corruption -
513     and worse.
514    
515 root 1.7 If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
516     with an error.
517    
518 root 1.29 =head3 DECODING
519    
520     Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
521     but objects encoded via C<FREEZE> can be decoded using the following
522     protocol:
523 root 1.7
524     When an encoded CBOR perl object is encountered by the decoder, it will
525     look up the C<THAW> method, by using the stored classname, and will fail
526     if the method cannot be found.
527    
528     After the lookup it will call the C<THAW> method with the stored classname
529     as first argument, the constant string C<CBOR> as second argument, and all
530     values returned by C<FREEZE> as remaining arguments.
531    
532 root 1.29 =head3 EXAMPLES
533 root 1.7
534     Here is an example C<TO_CBOR> method:
535    
536     sub My::Object::TO_CBOR {
537     my ($obj) = @_;
538    
539     ["this is a serialised My::Object object", $obj->{id}]
540     }
541    
542     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
543     array with two members: a string, and the "object id". Decoding this CBOR
544     string will yield a normal perl array reference in place of the object.
545    
546     A more useful and practical example would be a serialisation method for
547     the URI module. CBOR has a custom tag value for URIs, namely 32:
548    
549     sub URI::TO_CBOR {
550     my ($self) = @_;
551     my $uri = "$self"; # stringify uri
552     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
553 root 1.28 CBOR::XS::tag 32, "$_[0]"
554 root 1.7 }
555    
556     This will encode URIs as a UTF-8 string with tag 32, which indicates an
557     URI.
558    
559     Decoding such an URI will not (currently) give you an URI object, but
560     instead a CBOR::XS::Tagged object with tag number 32 and the string -
561     exactly what was returned by C<TO_CBOR>.
562    
563     To serialise an object so it can automatically be deserialised, you need
564     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
565     would be a possible implementation:
566    
567     sub URI::FREEZE {
568     my ($self, $serialiser) = @_;
569     "$self" # encode url string
570     }
571    
572     sub URI::THAW {
573     my ($class, $serialiser, $uri) = @_;
574    
575     $class->new ($uri)
576     }
577    
578     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
579     example, a C<FREEZE> method that returns "type", "id" and "variant" values
580     would cause an invocation of C<THAW> with 5 arguments:
581    
582     sub My::Object::FREEZE {
583     my ($self, $serialiser) = @_;
584    
585     ($self->{type}, $self->{id}, $self->{variant})
586     }
587    
588     sub My::Object::THAW {
589     my ($class, $serialiser, $type, $id, $variant) = @_;
590    
591     $class-<new (type => $type, id => $id, variant => $variant)
592     }
593    
594 root 1.1
595 root 1.7 =head1 MAGIC HEADER
596 root 1.3
597     There is no way to distinguish CBOR from other formats
598     programmatically. To make it easier to distinguish CBOR from other
599     formats, the CBOR specification has a special "magic string" that can be
600 root 1.18 prepended to any CBOR string without changing its meaning.
601 root 1.3
602     This string is available as C<$CBOR::XS::MAGIC>. This module does not
603 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
604 root 1.3 if present, so users can prepend this string as a "file type" indicator as
605     required.
606    
607    
608 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
609    
610     CBOR has the concept of tagged values - any CBOR value can be tagged with
611     a numeric 64 bit number, which are centrally administered.
612    
613     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
614     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
615     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
616     unknown tag.
617    
618     These objects are simply blessed array references - the first member of
619     the array being the numerical tag, the second being the value.
620    
621     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
622    
623     =over 4
624    
625     =item $tagged = CBOR::XS::tag $tag, $value
626    
627     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
628     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
629     value that can be encoded in CBOR, including serialisable Perl objects and
630     C<CBOR::XS::Tagged> objects).
631    
632     =item $tagged->[0]
633    
634     =item $tagged->[0] = $new_tag
635    
636     =item $tag = $tagged->tag
637    
638     =item $new_tag = $tagged->tag ($new_tag)
639    
640     Access/mutate the tag.
641    
642     =item $tagged->[1]
643    
644     =item $tagged->[1] = $new_value
645    
646     =item $value = $tagged->value
647    
648     =item $new_value = $tagged->value ($new_value)
649    
650     Access/mutate the tagged value.
651    
652     =back
653    
654     =cut
655    
656     sub tag($$) {
657     bless [@_], CBOR::XS::Tagged::;
658     }
659    
660     sub CBOR::XS::Tagged::tag {
661     $_[0][0] = $_[1] if $#_;
662     $_[0][0]
663     }
664    
665     sub CBOR::XS::Tagged::value {
666     $_[0][1] = $_[1] if $#_;
667     $_[0][1]
668     }
669    
670 root 1.13 =head2 EXAMPLES
671    
672     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
673    
674     You can look up CBOR tag value and emanings in the IANA registry at
675     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
676    
677     Prepend a magic header (C<$CBOR::XS::MAGIC>):
678    
679     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
680     # same as:
681     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
682    
683     Serialise some URIs and a regex in an array:
684    
685     my $cbor = encode_cbor [
686     (CBOR::XS::tag 32, "http://www.nethype.de/"),
687     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
688     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
689     ];
690    
691     Wrap CBOR data in CBOR:
692    
693     my $cbor_cbor = encode_cbor
694     CBOR::XS::tag 24,
695     encode_cbor [1, 2, 3];
696    
697 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
698    
699 root 1.22 This section describes how this module handles specific tagged values
700     and extensions. If a tag is not mentioned here and no additional filters
701     are provided for it, then the default handling applies (creating a
702     CBOR::XS::Tagged object on decoding, and only encoding the tag when
703     explicitly requested).
704 root 1.19
705 root 1.23 Tags not handled specifically are currently converted into a
706     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
707     consisting of the numeric tag value followed by the (decoded) CBOR value.
708    
709 root 1.19 Future versions of this module reserve the right to special case
710 root 1.22 additional tags (such as base64url).
711    
712     =head2 ENFORCED TAGS
713    
714     These tags are always handled when decoding, and their handling cannot be
715     overriden by the user.
716 root 1.19
717     =over 4
718    
719 root 1.26 =item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
720 root 1.19
721 root 1.23 These tags are automatically created (and decoded) for serialisable
722     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
723     serialisation protocol). See L<OBJECT SERIALISATION> for details.
724 root 1.19
725 root 1.31 =item 28, 29 (shareable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
726 root 1.19
727 root 1.31 These tags are automatically decoded when encountered (and they do not
728     result in a cyclic data structure, see C<allow_cycles>), resulting in
729 root 1.19 shared values in the decoded object. They are only encoded, however, when
730 root 1.31 C<allow_sharing> is enabled.
731    
732     Not all shared values can be successfully decoded: values that reference
733     themselves will I<currently> decode as C<undef> (this is not the same
734     as a reference pointing to itself, which will be represented as a value
735     that contains an indirect reference to itself - these will be decoded
736     properly).
737    
738     Note that considerably more shared value data structures can be decoded
739     than will be encoded - currently, only values pointed to by references
740     will be shared, others will not. While non-reference shared values can be
741     generated in Perl with some effort, they were considered too unimportant
742     to be supported in the encoder. The decoder, however, will decode these
743     values as shared values.
744 root 1.19
745 root 1.26 =item 256, 25 (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
746 root 1.21
747     These tags are automatically decoded when encountered. They are only
748 root 1.25 encoded, however, when C<pack_strings> is enabled.
749 root 1.21
750 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
751    
752     This tag is automatically generated when a reference are encountered (with
753     the exception of hash and array refernces). It is converted to a reference
754     when decoding.
755    
756     =item 55799 (self-describe CBOR, RFC 7049)
757    
758     This value is not generated on encoding (unless explicitly requested by
759     the user), and is simply ignored when decoding.
760    
761     =back
762    
763 root 1.24 =head2 NON-ENFORCED TAGS
764 root 1.22
765     These tags have default filters provided when decoding. Their handling can
766     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
767 root 1.24 providing a custom C<filter> callback when decoding.
768 root 1.22
769     When they result in decoding into a specific Perl class, the module
770     usually provides a corresponding C<TO_CBOR> method as well.
771    
772     When any of these need to load additional modules that are not part of the
773     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
774     provide these modules. The decoding usually fails with an exception if the
775     required module cannot be loaded.
776    
777     =over 4
778    
779     =item 2, 3 (positive/negative bignum)
780    
781     These tags are decoded into L<Math::BigInt> objects. The corresponding
782     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
783     integers, and others into positive/negative CBOR bignums.
784    
785     =item 4, 5 (decimal fraction/bigfloat)
786    
787     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
788     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
789     encodes into a decimal fraction.
790    
791     CBOR cannot represent bigfloats with I<very> large exponents - conversion
792     of such big float objects is undefined.
793    
794     Also, NaN and infinities are not encoded properly.
795    
796     =item 21, 22, 23 (expected later JSON conversion)
797    
798     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
799     tags.
800    
801     =item 32 (URI)
802    
803     These objects decode into L<URI> objects. The corresponding
804     C<URI::TO_CBOR> method again results in a CBOR URI value.
805    
806     =back
807    
808     =cut
809    
810     our %FILTER = (
811     # 0 # rfc4287 datetime, utf-8
812     # 1 # unix timestamp, any
813    
814     2 => sub { # pos bigint
815     require Math::BigInt;
816     Math::BigInt->new ("0x" . unpack "H*", pop)
817     },
818    
819     3 => sub { # neg bigint
820     require Math::BigInt;
821     -Math::BigInt->new ("0x" . unpack "H*", pop)
822     },
823    
824     4 => sub { # decimal fraction, array
825     require Math::BigFloat;
826     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
827     },
828    
829     5 => sub { # bigfloat, array
830     require Math::BigFloat;
831     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
832     },
833    
834     21 => sub { pop }, # expected conversion to base64url encoding
835     22 => sub { pop }, # expected conversion to base64 encoding
836     23 => sub { pop }, # expected conversion to base16 encoding
837    
838     # 24 # embedded cbor, byte string
839    
840     32 => sub {
841     require URI;
842     URI->new (pop)
843     },
844    
845     # 33 # base64url rfc4648, utf-8
846     # 34 # base64 rfc46484, utf-8
847     # 35 # regex pcre/ecma262, utf-8
848     # 36 # mime message rfc2045, utf-8
849     );
850    
851 root 1.19
852 root 1.7 =head1 CBOR and JSON
853 root 1.1
854 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
855     with some coercion, able to represent all JSON texts (something that other
856     "binary JSON" formats such as BSON generally do not support).
857    
858     CBOR implements some extra hints and support for JSON interoperability,
859     and the spec offers further guidance for conversion between CBOR and
860     JSON. None of this is currently implemented in CBOR, and the guidelines
861     in the spec do not result in correct round-tripping of data. If JSON
862     interoperability is improved in the future, then the goal will be to
863     ensure that decoded JSON data will round-trip encoding and decoding to
864     CBOR intact.
865 root 1.1
866    
867     =head1 SECURITY CONSIDERATIONS
868    
869     When you are using CBOR in a protocol, talking to untrusted potentially
870     hostile creatures requires relatively few measures.
871    
872     First of all, your CBOR decoder should be secure, that is, should not have
873     any buffer overflows. Obviously, this module should ensure that and I am
874     trying hard on making that true, but you never know.
875    
876     Second, you need to avoid resource-starving attacks. That means you should
877     limit the size of CBOR data you accept, or make sure then when your
878     resources run out, that's just fine (e.g. by using a separate process that
879     can crash safely). The size of a CBOR string in octets is usually a good
880     indication of the size of the resources required to decode it into a Perl
881     structure. While CBOR::XS can check the size of the CBOR text, it might be
882     too late when you already have it in memory, so you might want to check
883     the size before you accept the string.
884    
885     Third, CBOR::XS recurses using the C stack when decoding objects and
886     arrays. The C stack is a limited resource: for instance, on my amd64
887     machine with 8MB of stack size I can decode around 180k nested arrays but
888     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
889     to free the temporary). If that is exceeded, the program crashes. To be
890     conservative, the default nesting limit is set to 512. If your process
891     has a smaller stack, you should adjust this setting accordingly with the
892     C<max_depth> method.
893    
894     Something else could bomb you, too, that I forgot to think of. In that
895     case, you get to keep the pieces. I am always open for hints, though...
896    
897     Also keep in mind that CBOR::XS might leak contents of your Perl data
898     structures in its error messages, so when you serialise sensitive
899     information you might want to make sure that exceptions thrown by CBOR::XS
900     will not end up in front of untrusted eyes.
901    
902     =head1 CBOR IMPLEMENTATION NOTES
903    
904     This section contains some random implementation notes. They do not
905     describe guaranteed behaviour, but merely behaviour as-is implemented
906     right now.
907    
908     64 bit integers are only properly decoded when Perl was built with 64 bit
909     support.
910    
911     Strings and arrays are encoded with a definite length. Hashes as well,
912     unless they are tied (or otherwise magical).
913    
914     Only the double data type is supported for NV data types - when Perl uses
915     long double to represent floating point values, they might not be encoded
916     properly. Half precision types are accepted, but not encoded.
917    
918     Strict mode and canonical mode are not implemented.
919    
920    
921 root 1.30 =head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
922    
923     On perls that were built without 64 bit integer support (these are rare
924     nowadays, even on 32 bit architectures), support for any kind of 64 bit
925     integer in CBOR is very limited - most likely, these 64 bit values will
926     be truncated, corrupted, or otherwise not decoded correctly. This also
927     includes string, array and map sizes that are stored as 64 bit integers.
928    
929    
930 root 1.1 =head1 THREADS
931    
932     This module is I<not> guaranteed to be thread safe and there are no
933     plans to change this until Perl gets thread support (as opposed to the
934     horribly slow so-called "threads" which are simply slow and bloated
935     process simulations - use fork, it's I<much> faster, cheaper, better).
936    
937     (It might actually work, but you have been warned).
938    
939    
940     =head1 BUGS
941    
942     While the goal of this module is to be correct, that unfortunately does
943     not mean it's bug-free, only that I think its design is bug-free. If you
944     keep reporting bugs they will be fixed swiftly, though.
945    
946     Please refrain from using rt.cpan.org or any other bug reporting
947     service. I put the contact address into my modules for a reason.
948    
949     =cut
950    
951 root 1.22 our %FILTER = (
952     # 0 # rfc4287 datetime, utf-8
953     # 1 # unix timestamp, any
954    
955     2 => sub { # pos bigint
956     require Math::BigInt;
957     Math::BigInt->new ("0x" . unpack "H*", pop)
958     },
959    
960     3 => sub { # neg bigint
961     require Math::BigInt;
962     -Math::BigInt->new ("0x" . unpack "H*", pop)
963     },
964    
965     4 => sub { # decimal fraction, array
966     require Math::BigFloat;
967     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
968     },
969    
970     5 => sub { # bigfloat, array
971     require Math::BigFloat;
972     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
973     },
974    
975     21 => sub { pop }, # expected conversion to base64url encoding
976     22 => sub { pop }, # expected conversion to base64 encoding
977     23 => sub { pop }, # expected conversion to base16 encoding
978    
979     # 24 # embedded cbor, byte string
980    
981     32 => sub {
982     require URI;
983     URI->new (pop)
984     },
985    
986     # 33 # base64url rfc4648, utf-8
987     # 34 # base64 rfc46484, utf-8
988     # 35 # regex pcre/ecma262, utf-8
989     # 36 # mime message rfc2045, utf-8
990     );
991    
992     sub CBOR::XS::default_filter {
993     &{ $FILTER{$_[0]} or return }
994     }
995    
996     sub URI::TO_CBOR {
997     my $uri = $_[0]->as_string;
998     utf8::upgrade $uri;
999     CBOR::XS::tag 32, $uri
1000     }
1001    
1002     sub Math::BigInt::TO_CBOR {
1003     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
1004     $_[0]->numify
1005     } else {
1006     my $hex = substr $_[0]->as_hex, 2;
1007     $hex = "0$hex" if 1 & length $hex; # sigh
1008     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
1009     }
1010     }
1011    
1012     sub Math::BigFloat::TO_CBOR {
1013     my ($m, $e) = $_[0]->parts;
1014     CBOR::XS::tag 4, [$e->numify, $m]
1015     }
1016    
1017 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
1018    
1019     =head1 SEE ALSO
1020    
1021     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
1022     serialisation.
1023    
1024 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
1025     and error values.
1026    
1027 root 1.1 =head1 AUTHOR
1028    
1029     Marc Lehmann <schmorp@schmorp.de>
1030     http://home.schmorp.de/
1031    
1032     =cut
1033    
1034 root 1.6 1
1035