ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.30
Committed: Sat Nov 30 16:19:59 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
Changes since 1.29: +9 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.5 This module converts Perl data structures to the Concise Binary Object
32     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
33 root 1.28 format that aims to use an (almost) superset of the JSON data model, i.e.
34     when you can represent something useful in JSON, you should be able to
35     represent it in CBOR.
36 root 1.1
37 root 1.28 In short, CBOR is a faster and quite compact binary alternative to JSON,
38 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
39     often compresses better than CBOR though, so if you plan to compress the
40 root 1.28 data later and speed is less important you might want to compare both
41     formats first).
42 root 1.5
43 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
44     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
45     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
46     data, the worse L<Storable> performs in comparison.
47    
48 root 1.28 Regarding compactness, C<CBOR::XS>-encoded data structures are usually
49     about 20% smaller than the same data encoded as (compact) JSON or
50     L<Storable>.
51    
52     In addition to the core CBOR data format, this module implements a
53     number of extensions, to support cyclic and shared data structures (see
54     C<allow_sharing>), string deduplication (see C<pack_strings>) and scalar
55     references (always enabled).
56 root 1.21
57 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
58     is to be I<fast>. To reach the latter goal it was written in C.
59 root 1.1
60     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
61     vice versa.
62    
63     =cut
64    
65     package CBOR::XS;
66    
67     use common::sense;
68    
69 root 1.28 our $VERSION = '1.0';
70 root 1.1 our @ISA = qw(Exporter);
71    
72     our @EXPORT = qw(encode_cbor decode_cbor);
73    
74     use Exporter;
75     use XSLoader;
76    
77 root 1.6 use Types::Serialiser;
78    
79 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
80    
81 root 1.1 =head1 FUNCTIONAL INTERFACE
82    
83     The following convenience methods are provided by this module. They are
84     exported by default:
85    
86     =over 4
87    
88     =item $cbor_data = encode_cbor $perl_scalar
89    
90     Converts the given Perl data structure to CBOR representation. Croaks on
91     error.
92    
93     =item $perl_scalar = decode_cbor $cbor_data
94    
95     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
96     returning the resulting perl scalar. Croaks on error.
97    
98     =back
99    
100    
101     =head1 OBJECT-ORIENTED INTERFACE
102    
103     The object oriented interface lets you configure your own encoding or
104     decoding style, within the limits of supported formats.
105    
106     =over 4
107    
108     =item $cbor = new CBOR::XS
109    
110     Creates a new CBOR::XS object that can be used to de/encode CBOR
111     strings. All boolean flags described below are by default I<disabled>.
112    
113     The mutators for flags all return the CBOR object again and thus calls can
114     be chained:
115    
116     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
117    
118     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
119    
120     =item $max_depth = $cbor->get_max_depth
121    
122     Sets the maximum nesting level (default C<512>) accepted while encoding
123     or decoding. If a higher nesting level is detected in CBOR data or a Perl
124     data structure, then the encoder and decoder will stop and croak at that
125     point.
126    
127     Nesting level is defined by number of hash- or arrayrefs that the encoder
128     needs to traverse to reach a given point or the number of C<{> or C<[>
129     characters without their matching closing parenthesis crossed to reach a
130     given character in a string.
131    
132     Setting the maximum depth to one disallows any nesting, so that ensures
133     that the object is only a single hash/object or array.
134    
135     If no argument is given, the highest possible setting will be used, which
136     is rarely useful.
137    
138     Note that nesting is implemented by recursion in C. The default value has
139     been chosen to be as large as typical operating systems allow without
140     crashing.
141    
142     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
143    
144     =item $cbor = $cbor->max_size ([$maximum_string_size])
145    
146     =item $max_size = $cbor->get_max_size
147    
148     Set the maximum length a CBOR string may have (in bytes) where decoding
149     is being attempted. The default is C<0>, meaning no limit. When C<decode>
150     is called on a string that is longer then this many bytes, it will not
151     attempt to decode the string but throw an exception. This setting has no
152     effect on C<encode> (yet).
153    
154     If no argument is given, the limit check will be deactivated (same as when
155     C<0> is specified).
156    
157     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
158    
159 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
160    
161     =item $enabled = $cbor->get_allow_unknown
162    
163     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
164     exception when it encounters values it cannot represent in CBOR (for
165     example, filehandles) but instead will encode a CBOR C<error> value.
166    
167     If C<$enable> is false (the default), then C<encode> will throw an
168     exception when it encounters anything it cannot encode as CBOR.
169    
170     This option does not affect C<decode> in any way, and it is recommended to
171     leave it off unless you know your communications partner.
172    
173 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
174 root 1.19
175 root 1.20 =item $enabled = $cbor->get_allow_sharing
176 root 1.19
177     If C<$enable> is true (or missing), then C<encode> will not double-encode
178 root 1.20 values that have been referenced before (e.g. when the same object, such
179     as an array, is referenced multiple times), but instead will emit a
180     reference to the earlier value.
181 root 1.19
182     This means that such values will only be encoded once, and will not result
183     in a deep cloning of the value on decode, in decoders supporting the value
184 root 1.25 sharing extension. This also makes it possible to encode cyclic data
185     structures.
186 root 1.19
187 root 1.21 It is recommended to leave it off unless you know your
188     communication partner supports the value sharing extensions to CBOR
189 root 1.26 (L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
190 root 1.25 resulting data structure might be unusable.
191 root 1.21
192 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
193     that have a reference counter large than one, and might unnecessarily
194     increase the encoded size, as potentially shared values are encode as
195     sharable whether or not they are actually shared.
196    
197 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
198     arrays or hashes pointed to by a reference). Weirder constructs, such as
199     an array with multiple "copies" of the I<same> string, which are hard but
200     not impossible to create in Perl, are not supported (this is the same as
201 root 1.25 with L<Storable>).
202 root 1.19
203 root 1.25 If C<$enable> is false (the default), then C<encode> will encode shared
204     data structures repeatedly, unsharing them in the process. Cyclic data
205     structures cannot be encoded in this mode.
206 root 1.19
207     This option does not affect C<decode> in any way - shared values and
208 root 1.21 references will always be decoded properly if present.
209    
210 root 1.25 =item $cbor = $cbor->pack_strings ([$enable])
211 root 1.21
212 root 1.25 =item $enabled = $cbor->get_pack_strings
213 root 1.21
214     If C<$enable> is true (or missing), then C<encode> will try not to encode
215     the same string twice, but will instead encode a reference to the string
216 root 1.25 instead. Depending on your data format, this can save a lot of space, but
217 root 1.21 also results in a very large runtime overhead (expect encoding times to be
218     2-4 times as high as without).
219    
220     It is recommended to leave it off unless you know your
221     communications partner supports the stringref extension to CBOR
222 root 1.26 (L<http://cbor.schmorp.de/stringref>), as without decoder support, the
223 root 1.25 resulting data structure might not be usable.
224 root 1.21
225 root 1.25 If C<$enable> is false (the default), then C<encode> will encode strings
226     the standard CBOR way.
227 root 1.21
228     This option does not affect C<decode> in any way - string references will
229     always be decoded properly if present.
230 root 1.19
231 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
232    
233     =item $cb_or_undef = $cbor->get_filter
234    
235 root 1.24 Sets or replaces the tagged value decoding filter (when C<$cb> is
236     specified) or clears the filter (if no argument or C<undef> is provided).
237    
238     The filter callback is called only during decoding, when a non-enforced
239     tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
240     list of enforced tags). For specific tags, it's often better to provide a
241     default converter using the C<%CBOR::XS::FILTER> hash (see below).
242    
243     The first argument is the numerical tag, the second is the (decoded) value
244     that has been tagged.
245    
246     The filter function should return either exactly one value, which will
247     replace the tagged value in the decoded data structure, or no values,
248     which will result in default handling, which currently means the decoder
249     creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
250    
251     When the filter is cleared (the default state), the default filter
252     function, C<CBOR::XS::default_filter>, is used. This function simply looks
253     up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
254     a code reference that is called with tag and value, and is responsible for
255     decoding the value. If no entry exists, it returns no values.
256    
257 root 1.28 Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
258 root 1.24 objects, with no other special handling (useful when working with
259     potentially "unsafe" CBOR data).
260    
261     CBOR::XS->new->filter (sub { })->decode ($cbor_data);
262    
263     Example: provide a global filter for tag 1347375694, converting the value
264     into some string form.
265    
266     $CBOR::XS::FILTER{1347375694} = sub {
267     my ($tag, $value);
268    
269     "tag 1347375694 value $value"
270     };
271 root 1.23
272 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
273    
274     Converts the given Perl data structure (a scalar value) to its CBOR
275     representation.
276    
277     =item $perl_scalar = $cbor->decode ($cbor_data)
278    
279     The opposite of C<encode>: expects CBOR data and tries to parse it,
280     returning the resulting simple scalar or reference. Croaks on error.
281    
282     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
283    
284     This works like the C<decode> method, but instead of raising an exception
285     when there is trailing garbage after the CBOR string, it will silently
286     stop parsing there and return the number of characters consumed so far.
287    
288     This is useful if your CBOR texts are not delimited by an outer protocol
289     and you need to know where the first CBOR string ends amd the next one
290     starts.
291    
292     CBOR::XS->new->decode_prefix ("......")
293     => ("...", 3)
294    
295     =back
296    
297    
298     =head1 MAPPING
299    
300     This section describes how CBOR::XS maps Perl values to CBOR values and
301     vice versa. These mappings are designed to "do the right thing" in most
302     circumstances automatically, preserving round-tripping characteristics
303     (what you put in comes out as something equivalent).
304    
305     For the more enlightened: note that in the following descriptions,
306     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
307     refers to the abstract Perl language itself.
308    
309    
310     =head2 CBOR -> PERL
311    
312     =over 4
313    
314 root 1.4 =item integers
315    
316     CBOR integers become (numeric) perl scalars. On perls without 64 bit
317     support, 64 bit integers will be truncated or otherwise corrupted.
318    
319     =item byte strings
320    
321 root 1.27 Byte strings will become octet strings in Perl (the Byte values 0..255
322 root 1.4 will simply become characters of the same value in Perl).
323    
324     =item UTF-8 strings
325    
326     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
327     decoded into proper Unicode code points. At the moment, the validity of
328     the UTF-8 octets will not be validated - corrupt input will result in
329     corrupted Perl strings.
330    
331     =item arrays, maps
332    
333     CBOR arrays and CBOR maps will be converted into references to a Perl
334     array or hash, respectively. The keys of the map will be stringified
335     during this process.
336    
337 root 1.6 =item null
338    
339     CBOR null becomes C<undef> in Perl.
340    
341     =item true, false, undefined
342 root 1.1
343 root 1.6 These CBOR values become C<Types:Serialiser::true>,
344     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
345 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
346 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
347     error). See the L<Types::Serialiser> manpage for details.
348    
349 root 1.23 =item tagged values
350 root 1.1
351 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
352 root 1.4
353 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
354 root 1.28 for details on which tags are handled how.
355 root 1.4
356     =item anything else
357    
358     Anything else (e.g. unsupported simple values) will raise a decoding
359     error.
360 root 1.1
361     =back
362    
363    
364     =head2 PERL -> CBOR
365    
366     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
367 root 1.28 typeless language. That means this module can only guess which CBOR type
368     is meant by a perl value.
369 root 1.1
370     =over 4
371    
372     =item hash references
373    
374 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
375     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
376 root 1.28 order. This order can be different each time a hahs is encoded.
377 root 1.4
378     Currently, tied hashes will use the indefinite-length format, while normal
379     hashes will use the fixed-length format.
380 root 1.1
381     =item array references
382    
383 root 1.4 Perl array references become fixed-length CBOR arrays.
384 root 1.1
385     =item other references
386    
387 root 1.28 Other unblessed references will be represented using
388     the indirection tag extension (tag value C<22098>,
389     L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
390     to be able to decode these values somehow, by either "doing the right
391     thing", decoding into a generic tagged object, simply ignoring the tag, or
392     something else.
393 root 1.4
394     =item CBOR::XS::Tagged objects
395    
396     Objects of this type must be arrays consisting of a single C<[tag, value]>
397 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
398 root 1.28 be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
399 root 1.13 create such objects.
400 root 1.1
401 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
402 root 1.1
403 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
404     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
405     if you want.
406 root 1.1
407 root 1.7 =item other blessed objects
408 root 1.1
409 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
410 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
411     module, and L<OBJECT SERIALISATION> for generic object serialisation.
412 root 1.1
413     =item simple scalars
414    
415     Simple Perl scalars (any scalar that is not a reference) are the most
416     difficult objects to encode: CBOR::XS will encode undefined scalars as
417 root 1.4 CBOR null values, scalars that have last been used in a string context
418 root 1.1 before encoding as CBOR strings, and anything else as number value:
419    
420     # dump as number
421     encode_cbor [2] # yields [2]
422     encode_cbor [-3.0e17] # yields [-3e+17]
423     my $value = 5; encode_cbor [$value] # yields [5]
424    
425 root 1.27 # used as string, so dump as string (either byte or text)
426 root 1.1 print $value;
427     encode_cbor [$value] # yields ["5"]
428    
429     # undef becomes null
430     encode_cbor [undef] # yields [null]
431    
432     You can force the type to be a CBOR string by stringifying it:
433    
434     my $x = 3.1; # some variable containing a number
435     "$x"; # stringified
436     $x .= ""; # another, more awkward way to stringify
437     print $x; # perl does it for you, too, quite often
438    
439 root 1.27 You can force whether a string ie encoded as byte or text string by using
440     C<utf8::upgrade> and C<utf8::downgrade>):
441    
442     utf8::upgrade $x; # encode $x as text string
443     utf8::downgrade $x; # encode $x as byte string
444    
445     Perl doesn't define what operations up- and downgrade strings, so if the
446     difference between byte and text is important, you should up- or downgrade
447     your string as late as possible before encoding.
448    
449 root 1.1 You can force the type to be a CBOR number by numifying it:
450    
451     my $x = "3"; # some variable containing a string
452     $x += 0; # numify it, ensuring it will be dumped as a number
453     $x *= 1; # same thing, the choice is yours.
454    
455     You can not currently force the type in other, less obscure, ways. Tell me
456     if you need this capability (but don't forget to explain why it's needed
457     :).
458    
459 root 1.4 Perl values that seem to be integers generally use the shortest possible
460     representation. Floating-point values will use either the IEEE single
461     format if possible without loss of precision, otherwise the IEEE double
462     format will be used. Perls that use formats other than IEEE double to
463     represent numerical values are supported, but might suffer loss of
464     precision.
465 root 1.1
466     =back
467    
468 root 1.7 =head2 OBJECT SERIALISATION
469    
470 root 1.29 This module implements both a CBOR-specific and the generic
471     L<Types::Serialier> object serialisation protocol. The following
472     subsections explain both methods.
473    
474     =head3 ENCODING
475    
476 root 1.7 This module knows two way to serialise a Perl object: The CBOR-specific
477     way, and the generic way.
478    
479 root 1.29 Whenever the encoder encounters a Perl object that it cannot serialise
480 root 1.7 directly (most of them), it will first look up the C<TO_CBOR> method on
481     it.
482    
483     If it has a C<TO_CBOR> method, it will call it with the object as only
484     argument, and expects exactly one return value, which it will then
485     substitute and encode it in the place of the object.
486    
487     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
488     call it with the object as first argument, and the constant string C<CBOR>
489     as the second argument, to distinguish it from other serialisers.
490    
491     The C<FREEZE> method can return any number of values (i.e. zero or
492     more). These will be encoded as CBOR perl object, together with the
493     classname.
494    
495 root 1.29 These methods I<MUST NOT> change the data structure that is being
496     serialised. Failure to comply to this can result in memory corruption -
497     and worse.
498    
499 root 1.7 If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
500     with an error.
501    
502 root 1.29 =head3 DECODING
503    
504     Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
505     but objects encoded via C<FREEZE> can be decoded using the following
506     protocol:
507 root 1.7
508     When an encoded CBOR perl object is encountered by the decoder, it will
509     look up the C<THAW> method, by using the stored classname, and will fail
510     if the method cannot be found.
511    
512     After the lookup it will call the C<THAW> method with the stored classname
513     as first argument, the constant string C<CBOR> as second argument, and all
514     values returned by C<FREEZE> as remaining arguments.
515    
516 root 1.29 =head3 EXAMPLES
517 root 1.7
518     Here is an example C<TO_CBOR> method:
519    
520     sub My::Object::TO_CBOR {
521     my ($obj) = @_;
522    
523     ["this is a serialised My::Object object", $obj->{id}]
524     }
525    
526     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
527     array with two members: a string, and the "object id". Decoding this CBOR
528     string will yield a normal perl array reference in place of the object.
529    
530     A more useful and practical example would be a serialisation method for
531     the URI module. CBOR has a custom tag value for URIs, namely 32:
532    
533     sub URI::TO_CBOR {
534     my ($self) = @_;
535     my $uri = "$self"; # stringify uri
536     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
537 root 1.28 CBOR::XS::tag 32, "$_[0]"
538 root 1.7 }
539    
540     This will encode URIs as a UTF-8 string with tag 32, which indicates an
541     URI.
542    
543     Decoding such an URI will not (currently) give you an URI object, but
544     instead a CBOR::XS::Tagged object with tag number 32 and the string -
545     exactly what was returned by C<TO_CBOR>.
546    
547     To serialise an object so it can automatically be deserialised, you need
548     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
549     would be a possible implementation:
550    
551     sub URI::FREEZE {
552     my ($self, $serialiser) = @_;
553     "$self" # encode url string
554     }
555    
556     sub URI::THAW {
557     my ($class, $serialiser, $uri) = @_;
558    
559     $class->new ($uri)
560     }
561    
562     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
563     example, a C<FREEZE> method that returns "type", "id" and "variant" values
564     would cause an invocation of C<THAW> with 5 arguments:
565    
566     sub My::Object::FREEZE {
567     my ($self, $serialiser) = @_;
568    
569     ($self->{type}, $self->{id}, $self->{variant})
570     }
571    
572     sub My::Object::THAW {
573     my ($class, $serialiser, $type, $id, $variant) = @_;
574    
575     $class-<new (type => $type, id => $id, variant => $variant)
576     }
577    
578 root 1.1
579 root 1.7 =head1 MAGIC HEADER
580 root 1.3
581     There is no way to distinguish CBOR from other formats
582     programmatically. To make it easier to distinguish CBOR from other
583     formats, the CBOR specification has a special "magic string" that can be
584 root 1.18 prepended to any CBOR string without changing its meaning.
585 root 1.3
586     This string is available as C<$CBOR::XS::MAGIC>. This module does not
587 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
588 root 1.3 if present, so users can prepend this string as a "file type" indicator as
589     required.
590    
591    
592 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
593    
594     CBOR has the concept of tagged values - any CBOR value can be tagged with
595     a numeric 64 bit number, which are centrally administered.
596    
597     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
598     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
599     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
600     unknown tag.
601    
602     These objects are simply blessed array references - the first member of
603     the array being the numerical tag, the second being the value.
604    
605     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
606    
607     =over 4
608    
609     =item $tagged = CBOR::XS::tag $tag, $value
610    
611     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
612     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
613     value that can be encoded in CBOR, including serialisable Perl objects and
614     C<CBOR::XS::Tagged> objects).
615    
616     =item $tagged->[0]
617    
618     =item $tagged->[0] = $new_tag
619    
620     =item $tag = $tagged->tag
621    
622     =item $new_tag = $tagged->tag ($new_tag)
623    
624     Access/mutate the tag.
625    
626     =item $tagged->[1]
627    
628     =item $tagged->[1] = $new_value
629    
630     =item $value = $tagged->value
631    
632     =item $new_value = $tagged->value ($new_value)
633    
634     Access/mutate the tagged value.
635    
636     =back
637    
638     =cut
639    
640     sub tag($$) {
641     bless [@_], CBOR::XS::Tagged::;
642     }
643    
644     sub CBOR::XS::Tagged::tag {
645     $_[0][0] = $_[1] if $#_;
646     $_[0][0]
647     }
648    
649     sub CBOR::XS::Tagged::value {
650     $_[0][1] = $_[1] if $#_;
651     $_[0][1]
652     }
653    
654 root 1.13 =head2 EXAMPLES
655    
656     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
657    
658     You can look up CBOR tag value and emanings in the IANA registry at
659     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
660    
661     Prepend a magic header (C<$CBOR::XS::MAGIC>):
662    
663     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
664     # same as:
665     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
666    
667     Serialise some URIs and a regex in an array:
668    
669     my $cbor = encode_cbor [
670     (CBOR::XS::tag 32, "http://www.nethype.de/"),
671     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
672     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
673     ];
674    
675     Wrap CBOR data in CBOR:
676    
677     my $cbor_cbor = encode_cbor
678     CBOR::XS::tag 24,
679     encode_cbor [1, 2, 3];
680    
681 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
682    
683 root 1.22 This section describes how this module handles specific tagged values
684     and extensions. If a tag is not mentioned here and no additional filters
685     are provided for it, then the default handling applies (creating a
686     CBOR::XS::Tagged object on decoding, and only encoding the tag when
687     explicitly requested).
688 root 1.19
689 root 1.23 Tags not handled specifically are currently converted into a
690     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
691     consisting of the numeric tag value followed by the (decoded) CBOR value.
692    
693 root 1.19 Future versions of this module reserve the right to special case
694 root 1.22 additional tags (such as base64url).
695    
696     =head2 ENFORCED TAGS
697    
698     These tags are always handled when decoding, and their handling cannot be
699     overriden by the user.
700 root 1.19
701     =over 4
702    
703 root 1.26 =item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
704 root 1.19
705 root 1.23 These tags are automatically created (and decoded) for serialisable
706     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
707     serialisation protocol). See L<OBJECT SERIALISATION> for details.
708 root 1.19
709 root 1.26 =item 28, 29 (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
710 root 1.19
711     These tags are automatically decoded when encountered, resulting in
712     shared values in the decoded object. They are only encoded, however, when
713     C<allow_sharable> is enabled.
714    
715 root 1.26 =item 256, 25 (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
716 root 1.21
717     These tags are automatically decoded when encountered. They are only
718 root 1.25 encoded, however, when C<pack_strings> is enabled.
719 root 1.21
720 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
721    
722     This tag is automatically generated when a reference are encountered (with
723     the exception of hash and array refernces). It is converted to a reference
724     when decoding.
725    
726     =item 55799 (self-describe CBOR, RFC 7049)
727    
728     This value is not generated on encoding (unless explicitly requested by
729     the user), and is simply ignored when decoding.
730    
731     =back
732    
733 root 1.24 =head2 NON-ENFORCED TAGS
734 root 1.22
735     These tags have default filters provided when decoding. Their handling can
736     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
737 root 1.24 providing a custom C<filter> callback when decoding.
738 root 1.22
739     When they result in decoding into a specific Perl class, the module
740     usually provides a corresponding C<TO_CBOR> method as well.
741    
742     When any of these need to load additional modules that are not part of the
743     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
744     provide these modules. The decoding usually fails with an exception if the
745     required module cannot be loaded.
746    
747     =over 4
748    
749     =item 2, 3 (positive/negative bignum)
750    
751     These tags are decoded into L<Math::BigInt> objects. The corresponding
752     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
753     integers, and others into positive/negative CBOR bignums.
754    
755     =item 4, 5 (decimal fraction/bigfloat)
756    
757     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
758     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
759     encodes into a decimal fraction.
760    
761     CBOR cannot represent bigfloats with I<very> large exponents - conversion
762     of such big float objects is undefined.
763    
764     Also, NaN and infinities are not encoded properly.
765    
766     =item 21, 22, 23 (expected later JSON conversion)
767    
768     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
769     tags.
770    
771     =item 32 (URI)
772    
773     These objects decode into L<URI> objects. The corresponding
774     C<URI::TO_CBOR> method again results in a CBOR URI value.
775    
776     =back
777    
778     =cut
779    
780     our %FILTER = (
781     # 0 # rfc4287 datetime, utf-8
782     # 1 # unix timestamp, any
783    
784     2 => sub { # pos bigint
785     require Math::BigInt;
786     Math::BigInt->new ("0x" . unpack "H*", pop)
787     },
788    
789     3 => sub { # neg bigint
790     require Math::BigInt;
791     -Math::BigInt->new ("0x" . unpack "H*", pop)
792     },
793    
794     4 => sub { # decimal fraction, array
795     require Math::BigFloat;
796     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
797     },
798    
799     5 => sub { # bigfloat, array
800     require Math::BigFloat;
801     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
802     },
803    
804     21 => sub { pop }, # expected conversion to base64url encoding
805     22 => sub { pop }, # expected conversion to base64 encoding
806     23 => sub { pop }, # expected conversion to base16 encoding
807    
808     # 24 # embedded cbor, byte string
809    
810     32 => sub {
811     require URI;
812     URI->new (pop)
813     },
814    
815     # 33 # base64url rfc4648, utf-8
816     # 34 # base64 rfc46484, utf-8
817     # 35 # regex pcre/ecma262, utf-8
818     # 36 # mime message rfc2045, utf-8
819     );
820    
821 root 1.19
822 root 1.7 =head1 CBOR and JSON
823 root 1.1
824 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
825     with some coercion, able to represent all JSON texts (something that other
826     "binary JSON" formats such as BSON generally do not support).
827    
828     CBOR implements some extra hints and support for JSON interoperability,
829     and the spec offers further guidance for conversion between CBOR and
830     JSON. None of this is currently implemented in CBOR, and the guidelines
831     in the spec do not result in correct round-tripping of data. If JSON
832     interoperability is improved in the future, then the goal will be to
833     ensure that decoded JSON data will round-trip encoding and decoding to
834     CBOR intact.
835 root 1.1
836    
837     =head1 SECURITY CONSIDERATIONS
838    
839     When you are using CBOR in a protocol, talking to untrusted potentially
840     hostile creatures requires relatively few measures.
841    
842     First of all, your CBOR decoder should be secure, that is, should not have
843     any buffer overflows. Obviously, this module should ensure that and I am
844     trying hard on making that true, but you never know.
845    
846     Second, you need to avoid resource-starving attacks. That means you should
847     limit the size of CBOR data you accept, or make sure then when your
848     resources run out, that's just fine (e.g. by using a separate process that
849     can crash safely). The size of a CBOR string in octets is usually a good
850     indication of the size of the resources required to decode it into a Perl
851     structure. While CBOR::XS can check the size of the CBOR text, it might be
852     too late when you already have it in memory, so you might want to check
853     the size before you accept the string.
854    
855     Third, CBOR::XS recurses using the C stack when decoding objects and
856     arrays. The C stack is a limited resource: for instance, on my amd64
857     machine with 8MB of stack size I can decode around 180k nested arrays but
858     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
859     to free the temporary). If that is exceeded, the program crashes. To be
860     conservative, the default nesting limit is set to 512. If your process
861     has a smaller stack, you should adjust this setting accordingly with the
862     C<max_depth> method.
863    
864     Something else could bomb you, too, that I forgot to think of. In that
865     case, you get to keep the pieces. I am always open for hints, though...
866    
867     Also keep in mind that CBOR::XS might leak contents of your Perl data
868     structures in its error messages, so when you serialise sensitive
869     information you might want to make sure that exceptions thrown by CBOR::XS
870     will not end up in front of untrusted eyes.
871    
872     =head1 CBOR IMPLEMENTATION NOTES
873    
874     This section contains some random implementation notes. They do not
875     describe guaranteed behaviour, but merely behaviour as-is implemented
876     right now.
877    
878     64 bit integers are only properly decoded when Perl was built with 64 bit
879     support.
880    
881     Strings and arrays are encoded with a definite length. Hashes as well,
882     unless they are tied (or otherwise magical).
883    
884     Only the double data type is supported for NV data types - when Perl uses
885     long double to represent floating point values, they might not be encoded
886     properly. Half precision types are accepted, but not encoded.
887    
888     Strict mode and canonical mode are not implemented.
889    
890    
891 root 1.30 =head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
892    
893     On perls that were built without 64 bit integer support (these are rare
894     nowadays, even on 32 bit architectures), support for any kind of 64 bit
895     integer in CBOR is very limited - most likely, these 64 bit values will
896     be truncated, corrupted, or otherwise not decoded correctly. This also
897     includes string, array and map sizes that are stored as 64 bit integers.
898    
899    
900 root 1.1 =head1 THREADS
901    
902     This module is I<not> guaranteed to be thread safe and there are no
903     plans to change this until Perl gets thread support (as opposed to the
904     horribly slow so-called "threads" which are simply slow and bloated
905     process simulations - use fork, it's I<much> faster, cheaper, better).
906    
907     (It might actually work, but you have been warned).
908    
909    
910     =head1 BUGS
911    
912     While the goal of this module is to be correct, that unfortunately does
913     not mean it's bug-free, only that I think its design is bug-free. If you
914     keep reporting bugs they will be fixed swiftly, though.
915    
916     Please refrain from using rt.cpan.org or any other bug reporting
917     service. I put the contact address into my modules for a reason.
918    
919     =cut
920    
921 root 1.22 our %FILTER = (
922     # 0 # rfc4287 datetime, utf-8
923     # 1 # unix timestamp, any
924    
925     2 => sub { # pos bigint
926     require Math::BigInt;
927     Math::BigInt->new ("0x" . unpack "H*", pop)
928     },
929    
930     3 => sub { # neg bigint
931     require Math::BigInt;
932     -Math::BigInt->new ("0x" . unpack "H*", pop)
933     },
934    
935     4 => sub { # decimal fraction, array
936     require Math::BigFloat;
937     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
938     },
939    
940     5 => sub { # bigfloat, array
941     require Math::BigFloat;
942     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
943     },
944    
945     21 => sub { pop }, # expected conversion to base64url encoding
946     22 => sub { pop }, # expected conversion to base64 encoding
947     23 => sub { pop }, # expected conversion to base16 encoding
948    
949     # 24 # embedded cbor, byte string
950    
951     32 => sub {
952     require URI;
953     URI->new (pop)
954     },
955    
956     # 33 # base64url rfc4648, utf-8
957     # 34 # base64 rfc46484, utf-8
958     # 35 # regex pcre/ecma262, utf-8
959     # 36 # mime message rfc2045, utf-8
960     );
961    
962     sub CBOR::XS::default_filter {
963     &{ $FILTER{$_[0]} or return }
964     }
965    
966     sub URI::TO_CBOR {
967     my $uri = $_[0]->as_string;
968     utf8::upgrade $uri;
969     CBOR::XS::tag 32, $uri
970     }
971    
972     sub Math::BigInt::TO_CBOR {
973     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
974     $_[0]->numify
975     } else {
976     my $hex = substr $_[0]->as_hex, 2;
977     $hex = "0$hex" if 1 & length $hex; # sigh
978     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
979     }
980     }
981    
982     sub Math::BigFloat::TO_CBOR {
983     my ($m, $e) = $_[0]->parts;
984     CBOR::XS::tag 4, [$e->numify, $m]
985     }
986    
987 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
988    
989     =head1 SEE ALSO
990    
991     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
992     serialisation.
993    
994 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
995     and error values.
996    
997 root 1.1 =head1 AUTHOR
998    
999     Marc Lehmann <schmorp@schmorp.de>
1000     http://home.schmorp.de/
1001    
1002     =cut
1003    
1004 root 1.6 1
1005