ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.pm
Revision: 1.28
Committed: Thu Nov 28 16:09:04 2013 UTC (10 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-1_0
Changes since 1.27: +28 -32 lines
Log Message:
1.0

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     CBOR::XS - Concise Binary Object Representation (CBOR, RFC7049)
4    
5     =encoding utf-8
6    
7     =head1 SYNOPSIS
8    
9     use CBOR::XS;
10    
11     $binary_cbor_data = encode_cbor $perl_value;
12     $perl_value = decode_cbor $binary_cbor_data;
13    
14     # OO-interface
15    
16     $coder = CBOR::XS->new;
17 root 1.6 $binary_cbor_data = $coder->encode ($perl_value);
18     $perl_value = $coder->decode ($binary_cbor_data);
19    
20     # prefix decoding
21    
22     my $many_cbor_strings = ...;
23     while (length $many_cbor_strings) {
24     my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
25     # data was decoded
26     substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27     }
28 root 1.1
29     =head1 DESCRIPTION
30    
31 root 1.5 This module converts Perl data structures to the Concise Binary Object
32     Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
33 root 1.28 format that aims to use an (almost) superset of the JSON data model, i.e.
34     when you can represent something useful in JSON, you should be able to
35     represent it in CBOR.
36 root 1.1
37 root 1.28 In short, CBOR is a faster and quite compact binary alternative to JSON,
38 root 1.10 with the added ability of supporting serialisation of Perl objects. (JSON
39     often compresses better than CBOR though, so if you plan to compress the
40 root 1.28 data later and speed is less important you might want to compare both
41     formats first).
42 root 1.5
43 root 1.15 To give you a general idea about speed, with texts in the megabyte range,
44     C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
45     L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
46     data, the worse L<Storable> performs in comparison.
47    
48 root 1.28 Regarding compactness, C<CBOR::XS>-encoded data structures are usually
49     about 20% smaller than the same data encoded as (compact) JSON or
50     L<Storable>.
51    
52     In addition to the core CBOR data format, this module implements a
53     number of extensions, to support cyclic and shared data structures (see
54     C<allow_sharing>), string deduplication (see C<pack_strings>) and scalar
55     references (always enabled).
56 root 1.21
57 root 1.5 The primary goal of this module is to be I<correct> and the secondary goal
58     is to be I<fast>. To reach the latter goal it was written in C.
59 root 1.1
60     See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
61     vice versa.
62    
63     =cut
64    
65     package CBOR::XS;
66    
67     use common::sense;
68    
69 root 1.28 our $VERSION = '1.0';
70 root 1.1 our @ISA = qw(Exporter);
71    
72     our @EXPORT = qw(encode_cbor decode_cbor);
73    
74     use Exporter;
75     use XSLoader;
76    
77 root 1.6 use Types::Serialiser;
78    
79 root 1.3 our $MAGIC = "\xd9\xd9\xf7";
80    
81 root 1.1 =head1 FUNCTIONAL INTERFACE
82    
83     The following convenience methods are provided by this module. They are
84     exported by default:
85    
86     =over 4
87    
88     =item $cbor_data = encode_cbor $perl_scalar
89    
90     Converts the given Perl data structure to CBOR representation. Croaks on
91     error.
92    
93     =item $perl_scalar = decode_cbor $cbor_data
94    
95     The opposite of C<encode_cbor>: expects a valid CBOR string to parse,
96     returning the resulting perl scalar. Croaks on error.
97    
98     =back
99    
100    
101     =head1 OBJECT-ORIENTED INTERFACE
102    
103     The object oriented interface lets you configure your own encoding or
104     decoding style, within the limits of supported formats.
105    
106     =over 4
107    
108     =item $cbor = new CBOR::XS
109    
110     Creates a new CBOR::XS object that can be used to de/encode CBOR
111     strings. All boolean flags described below are by default I<disabled>.
112    
113     The mutators for flags all return the CBOR object again and thus calls can
114     be chained:
115    
116     my $cbor = CBOR::XS->new->encode ({a => [1,2]});
117    
118     =item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
119    
120     =item $max_depth = $cbor->get_max_depth
121    
122     Sets the maximum nesting level (default C<512>) accepted while encoding
123     or decoding. If a higher nesting level is detected in CBOR data or a Perl
124     data structure, then the encoder and decoder will stop and croak at that
125     point.
126    
127     Nesting level is defined by number of hash- or arrayrefs that the encoder
128     needs to traverse to reach a given point or the number of C<{> or C<[>
129     characters without their matching closing parenthesis crossed to reach a
130     given character in a string.
131    
132     Setting the maximum depth to one disallows any nesting, so that ensures
133     that the object is only a single hash/object or array.
134    
135     If no argument is given, the highest possible setting will be used, which
136     is rarely useful.
137    
138     Note that nesting is implemented by recursion in C. The default value has
139     been chosen to be as large as typical operating systems allow without
140     crashing.
141    
142     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
143    
144     =item $cbor = $cbor->max_size ([$maximum_string_size])
145    
146     =item $max_size = $cbor->get_max_size
147    
148     Set the maximum length a CBOR string may have (in bytes) where decoding
149     is being attempted. The default is C<0>, meaning no limit. When C<decode>
150     is called on a string that is longer then this many bytes, it will not
151     attempt to decode the string but throw an exception. This setting has no
152     effect on C<encode> (yet).
153    
154     If no argument is given, the limit check will be deactivated (same as when
155     C<0> is specified).
156    
157     See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
158    
159 root 1.19 =item $cbor = $cbor->allow_unknown ([$enable])
160    
161     =item $enabled = $cbor->get_allow_unknown
162    
163     If C<$enable> is true (or missing), then C<encode> will I<not> throw an
164     exception when it encounters values it cannot represent in CBOR (for
165     example, filehandles) but instead will encode a CBOR C<error> value.
166    
167     If C<$enable> is false (the default), then C<encode> will throw an
168     exception when it encounters anything it cannot encode as CBOR.
169    
170     This option does not affect C<decode> in any way, and it is recommended to
171     leave it off unless you know your communications partner.
172    
173 root 1.20 =item $cbor = $cbor->allow_sharing ([$enable])
174 root 1.19
175 root 1.20 =item $enabled = $cbor->get_allow_sharing
176 root 1.19
177     If C<$enable> is true (or missing), then C<encode> will not double-encode
178 root 1.20 values that have been referenced before (e.g. when the same object, such
179     as an array, is referenced multiple times), but instead will emit a
180     reference to the earlier value.
181 root 1.19
182     This means that such values will only be encoded once, and will not result
183     in a deep cloning of the value on decode, in decoders supporting the value
184 root 1.25 sharing extension. This also makes it possible to encode cyclic data
185     structures.
186 root 1.19
187 root 1.21 It is recommended to leave it off unless you know your
188     communication partner supports the value sharing extensions to CBOR
189 root 1.26 (L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
190 root 1.25 resulting data structure might be unusable.
191 root 1.21
192 root 1.19 Detecting shared values incurs a runtime overhead when values are encoded
193     that have a reference counter large than one, and might unnecessarily
194     increase the encoded size, as potentially shared values are encode as
195     sharable whether or not they are actually shared.
196    
197 root 1.20 At the moment, only targets of references can be shared (e.g. scalars,
198     arrays or hashes pointed to by a reference). Weirder constructs, such as
199     an array with multiple "copies" of the I<same> string, which are hard but
200     not impossible to create in Perl, are not supported (this is the same as
201 root 1.25 with L<Storable>).
202 root 1.19
203 root 1.25 If C<$enable> is false (the default), then C<encode> will encode shared
204     data structures repeatedly, unsharing them in the process. Cyclic data
205     structures cannot be encoded in this mode.
206 root 1.19
207     This option does not affect C<decode> in any way - shared values and
208 root 1.21 references will always be decoded properly if present.
209    
210 root 1.25 =item $cbor = $cbor->pack_strings ([$enable])
211 root 1.21
212 root 1.25 =item $enabled = $cbor->get_pack_strings
213 root 1.21
214     If C<$enable> is true (or missing), then C<encode> will try not to encode
215     the same string twice, but will instead encode a reference to the string
216 root 1.25 instead. Depending on your data format, this can save a lot of space, but
217 root 1.21 also results in a very large runtime overhead (expect encoding times to be
218     2-4 times as high as without).
219    
220     It is recommended to leave it off unless you know your
221     communications partner supports the stringref extension to CBOR
222 root 1.26 (L<http://cbor.schmorp.de/stringref>), as without decoder support, the
223 root 1.25 resulting data structure might not be usable.
224 root 1.21
225 root 1.25 If C<$enable> is false (the default), then C<encode> will encode strings
226     the standard CBOR way.
227 root 1.21
228     This option does not affect C<decode> in any way - string references will
229     always be decoded properly if present.
230 root 1.19
231 root 1.23 =item $cbor = $cbor->filter ([$cb->($tag, $value)])
232    
233     =item $cb_or_undef = $cbor->get_filter
234    
235 root 1.24 Sets or replaces the tagged value decoding filter (when C<$cb> is
236     specified) or clears the filter (if no argument or C<undef> is provided).
237    
238     The filter callback is called only during decoding, when a non-enforced
239     tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
240     list of enforced tags). For specific tags, it's often better to provide a
241     default converter using the C<%CBOR::XS::FILTER> hash (see below).
242    
243     The first argument is the numerical tag, the second is the (decoded) value
244     that has been tagged.
245    
246     The filter function should return either exactly one value, which will
247     replace the tagged value in the decoded data structure, or no values,
248     which will result in default handling, which currently means the decoder
249     creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
250    
251     When the filter is cleared (the default state), the default filter
252     function, C<CBOR::XS::default_filter>, is used. This function simply looks
253     up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
254     a code reference that is called with tag and value, and is responsible for
255     decoding the value. If no entry exists, it returns no values.
256    
257 root 1.28 Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
258 root 1.24 objects, with no other special handling (useful when working with
259     potentially "unsafe" CBOR data).
260    
261     CBOR::XS->new->filter (sub { })->decode ($cbor_data);
262    
263     Example: provide a global filter for tag 1347375694, converting the value
264     into some string form.
265    
266     $CBOR::XS::FILTER{1347375694} = sub {
267     my ($tag, $value);
268    
269     "tag 1347375694 value $value"
270     };
271 root 1.23
272 root 1.1 =item $cbor_data = $cbor->encode ($perl_scalar)
273    
274     Converts the given Perl data structure (a scalar value) to its CBOR
275     representation.
276    
277     =item $perl_scalar = $cbor->decode ($cbor_data)
278    
279     The opposite of C<encode>: expects CBOR data and tries to parse it,
280     returning the resulting simple scalar or reference. Croaks on error.
281    
282     =item ($perl_scalar, $octets) = $cbor->decode_prefix ($cbor_data)
283    
284     This works like the C<decode> method, but instead of raising an exception
285     when there is trailing garbage after the CBOR string, it will silently
286     stop parsing there and return the number of characters consumed so far.
287    
288     This is useful if your CBOR texts are not delimited by an outer protocol
289     and you need to know where the first CBOR string ends amd the next one
290     starts.
291    
292     CBOR::XS->new->decode_prefix ("......")
293     => ("...", 3)
294    
295     =back
296    
297    
298     =head1 MAPPING
299    
300     This section describes how CBOR::XS maps Perl values to CBOR values and
301     vice versa. These mappings are designed to "do the right thing" in most
302     circumstances automatically, preserving round-tripping characteristics
303     (what you put in comes out as something equivalent).
304    
305     For the more enlightened: note that in the following descriptions,
306     lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
307     refers to the abstract Perl language itself.
308    
309    
310     =head2 CBOR -> PERL
311    
312     =over 4
313    
314 root 1.4 =item integers
315    
316     CBOR integers become (numeric) perl scalars. On perls without 64 bit
317     support, 64 bit integers will be truncated or otherwise corrupted.
318    
319     =item byte strings
320    
321 root 1.27 Byte strings will become octet strings in Perl (the Byte values 0..255
322 root 1.4 will simply become characters of the same value in Perl).
323    
324     =item UTF-8 strings
325    
326     UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
327     decoded into proper Unicode code points. At the moment, the validity of
328     the UTF-8 octets will not be validated - corrupt input will result in
329     corrupted Perl strings.
330    
331     =item arrays, maps
332    
333     CBOR arrays and CBOR maps will be converted into references to a Perl
334     array or hash, respectively. The keys of the map will be stringified
335     during this process.
336    
337 root 1.6 =item null
338    
339     CBOR null becomes C<undef> in Perl.
340    
341     =item true, false, undefined
342 root 1.1
343 root 1.6 These CBOR values become C<Types:Serialiser::true>,
344     C<Types:Serialiser::false> and C<Types::Serialiser::error>,
345 root 1.1 respectively. They are overloaded to act almost exactly like the numbers
346 root 1.6 C<1> and C<0> (for true and false) or to throw an exception on access (for
347     error). See the L<Types::Serialiser> manpage for details.
348    
349 root 1.23 =item tagged values
350 root 1.1
351 root 1.23 Tagged items consists of a numeric tag and another CBOR value.
352 root 1.4
353 root 1.23 See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
354 root 1.28 for details on which tags are handled how.
355 root 1.4
356     =item anything else
357    
358     Anything else (e.g. unsupported simple values) will raise a decoding
359     error.
360 root 1.1
361     =back
362    
363    
364     =head2 PERL -> CBOR
365    
366     The mapping from Perl to CBOR is slightly more difficult, as Perl is a
367 root 1.28 typeless language. That means this module can only guess which CBOR type
368     is meant by a perl value.
369 root 1.1
370     =over 4
371    
372     =item hash references
373    
374 root 1.4 Perl hash references become CBOR maps. As there is no inherent ordering in
375     hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
376 root 1.28 order. This order can be different each time a hahs is encoded.
377 root 1.4
378     Currently, tied hashes will use the indefinite-length format, while normal
379     hashes will use the fixed-length format.
380 root 1.1
381     =item array references
382    
383 root 1.4 Perl array references become fixed-length CBOR arrays.
384 root 1.1
385     =item other references
386    
387 root 1.28 Other unblessed references will be represented using
388     the indirection tag extension (tag value C<22098>,
389     L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
390     to be able to decode these values somehow, by either "doing the right
391     thing", decoding into a generic tagged object, simply ignoring the tag, or
392     something else.
393 root 1.4
394     =item CBOR::XS::Tagged objects
395    
396     Objects of this type must be arrays consisting of a single C<[tag, value]>
397 root 1.13 pair. The (numerical) tag will be encoded as a CBOR tag, the value will
398 root 1.28 be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
399 root 1.13 create such objects.
400 root 1.1
401 root 1.6 =item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
402 root 1.1
403 root 1.6 These special values become CBOR true, CBOR false and CBOR undefined
404     values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
405     if you want.
406 root 1.1
407 root 1.7 =item other blessed objects
408 root 1.1
409 root 1.7 Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
410 root 1.23 L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
411     module, and L<OBJECT SERIALISATION> for generic object serialisation.
412 root 1.1
413     =item simple scalars
414    
415     Simple Perl scalars (any scalar that is not a reference) are the most
416     difficult objects to encode: CBOR::XS will encode undefined scalars as
417 root 1.4 CBOR null values, scalars that have last been used in a string context
418 root 1.1 before encoding as CBOR strings, and anything else as number value:
419    
420     # dump as number
421     encode_cbor [2] # yields [2]
422     encode_cbor [-3.0e17] # yields [-3e+17]
423     my $value = 5; encode_cbor [$value] # yields [5]
424    
425 root 1.27 # used as string, so dump as string (either byte or text)
426 root 1.1 print $value;
427     encode_cbor [$value] # yields ["5"]
428    
429     # undef becomes null
430     encode_cbor [undef] # yields [null]
431    
432     You can force the type to be a CBOR string by stringifying it:
433    
434     my $x = 3.1; # some variable containing a number
435     "$x"; # stringified
436     $x .= ""; # another, more awkward way to stringify
437     print $x; # perl does it for you, too, quite often
438    
439 root 1.27 You can force whether a string ie encoded as byte or text string by using
440     C<utf8::upgrade> and C<utf8::downgrade>):
441    
442     utf8::upgrade $x; # encode $x as text string
443     utf8::downgrade $x; # encode $x as byte string
444    
445     Perl doesn't define what operations up- and downgrade strings, so if the
446     difference between byte and text is important, you should up- or downgrade
447     your string as late as possible before encoding.
448    
449 root 1.1 You can force the type to be a CBOR number by numifying it:
450    
451     my $x = "3"; # some variable containing a string
452     $x += 0; # numify it, ensuring it will be dumped as a number
453     $x *= 1; # same thing, the choice is yours.
454    
455     You can not currently force the type in other, less obscure, ways. Tell me
456     if you need this capability (but don't forget to explain why it's needed
457     :).
458    
459 root 1.4 Perl values that seem to be integers generally use the shortest possible
460     representation. Floating-point values will use either the IEEE single
461     format if possible without loss of precision, otherwise the IEEE double
462     format will be used. Perls that use formats other than IEEE double to
463     represent numerical values are supported, but might suffer loss of
464     precision.
465 root 1.1
466     =back
467    
468 root 1.7 =head2 OBJECT SERIALISATION
469    
470     This module knows two way to serialise a Perl object: The CBOR-specific
471     way, and the generic way.
472    
473     Whenever the encoder encounters a Perl object that it cnanot serialise
474     directly (most of them), it will first look up the C<TO_CBOR> method on
475     it.
476    
477     If it has a C<TO_CBOR> method, it will call it with the object as only
478     argument, and expects exactly one return value, which it will then
479     substitute and encode it in the place of the object.
480    
481     Otherwise, it will look up the C<FREEZE> method. If it exists, it will
482     call it with the object as first argument, and the constant string C<CBOR>
483     as the second argument, to distinguish it from other serialisers.
484    
485     The C<FREEZE> method can return any number of values (i.e. zero or
486     more). These will be encoded as CBOR perl object, together with the
487     classname.
488    
489     If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
490     with an error.
491    
492     Objects encoded via C<TO_CBOR> cannot be automatically decoded, but
493     objects encoded via C<FREEZE> can be decoded using the following protocol:
494    
495     When an encoded CBOR perl object is encountered by the decoder, it will
496     look up the C<THAW> method, by using the stored classname, and will fail
497     if the method cannot be found.
498    
499     After the lookup it will call the C<THAW> method with the stored classname
500     as first argument, the constant string C<CBOR> as second argument, and all
501     values returned by C<FREEZE> as remaining arguments.
502    
503     =head4 EXAMPLES
504    
505     Here is an example C<TO_CBOR> method:
506    
507     sub My::Object::TO_CBOR {
508     my ($obj) = @_;
509    
510     ["this is a serialised My::Object object", $obj->{id}]
511     }
512    
513     When a C<My::Object> is encoded to CBOR, it will instead encode a simple
514     array with two members: a string, and the "object id". Decoding this CBOR
515     string will yield a normal perl array reference in place of the object.
516    
517     A more useful and practical example would be a serialisation method for
518     the URI module. CBOR has a custom tag value for URIs, namely 32:
519    
520     sub URI::TO_CBOR {
521     my ($self) = @_;
522     my $uri = "$self"; # stringify uri
523     utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
524 root 1.28 CBOR::XS::tag 32, "$_[0]"
525 root 1.7 }
526    
527     This will encode URIs as a UTF-8 string with tag 32, which indicates an
528     URI.
529    
530     Decoding such an URI will not (currently) give you an URI object, but
531     instead a CBOR::XS::Tagged object with tag number 32 and the string -
532     exactly what was returned by C<TO_CBOR>.
533    
534     To serialise an object so it can automatically be deserialised, you need
535     to use C<FREEZE> and C<THAW>. To take the URI module as example, this
536     would be a possible implementation:
537    
538     sub URI::FREEZE {
539     my ($self, $serialiser) = @_;
540     "$self" # encode url string
541     }
542    
543     sub URI::THAW {
544     my ($class, $serialiser, $uri) = @_;
545    
546     $class->new ($uri)
547     }
548    
549     Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
550     example, a C<FREEZE> method that returns "type", "id" and "variant" values
551     would cause an invocation of C<THAW> with 5 arguments:
552    
553     sub My::Object::FREEZE {
554     my ($self, $serialiser) = @_;
555    
556     ($self->{type}, $self->{id}, $self->{variant})
557     }
558    
559     sub My::Object::THAW {
560     my ($class, $serialiser, $type, $id, $variant) = @_;
561    
562     $class-<new (type => $type, id => $id, variant => $variant)
563     }
564    
565 root 1.1
566 root 1.7 =head1 MAGIC HEADER
567 root 1.3
568     There is no way to distinguish CBOR from other formats
569     programmatically. To make it easier to distinguish CBOR from other
570     formats, the CBOR specification has a special "magic string" that can be
571 root 1.18 prepended to any CBOR string without changing its meaning.
572 root 1.3
573     This string is available as C<$CBOR::XS::MAGIC>. This module does not
574 root 1.18 prepend this string to the CBOR data it generates, but it will ignore it
575 root 1.3 if present, so users can prepend this string as a "file type" indicator as
576     required.
577    
578    
579 root 1.12 =head1 THE CBOR::XS::Tagged CLASS
580    
581     CBOR has the concept of tagged values - any CBOR value can be tagged with
582     a numeric 64 bit number, which are centrally administered.
583    
584     C<CBOR::XS> handles a few tags internally when en- or decoding. You can
585     also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
586     decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
587     unknown tag.
588    
589     These objects are simply blessed array references - the first member of
590     the array being the numerical tag, the second being the value.
591    
592     You can interact with C<CBOR::XS::Tagged> objects in the following ways:
593    
594     =over 4
595    
596     =item $tagged = CBOR::XS::tag $tag, $value
597    
598     This function(!) creates a new C<CBOR::XS::Tagged> object using the given
599     C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
600     value that can be encoded in CBOR, including serialisable Perl objects and
601     C<CBOR::XS::Tagged> objects).
602    
603     =item $tagged->[0]
604    
605     =item $tagged->[0] = $new_tag
606    
607     =item $tag = $tagged->tag
608    
609     =item $new_tag = $tagged->tag ($new_tag)
610    
611     Access/mutate the tag.
612    
613     =item $tagged->[1]
614    
615     =item $tagged->[1] = $new_value
616    
617     =item $value = $tagged->value
618    
619     =item $new_value = $tagged->value ($new_value)
620    
621     Access/mutate the tagged value.
622    
623     =back
624    
625     =cut
626    
627     sub tag($$) {
628     bless [@_], CBOR::XS::Tagged::;
629     }
630    
631     sub CBOR::XS::Tagged::tag {
632     $_[0][0] = $_[1] if $#_;
633     $_[0][0]
634     }
635    
636     sub CBOR::XS::Tagged::value {
637     $_[0][1] = $_[1] if $#_;
638     $_[0][1]
639     }
640    
641 root 1.13 =head2 EXAMPLES
642    
643     Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
644    
645     You can look up CBOR tag value and emanings in the IANA registry at
646     L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
647    
648     Prepend a magic header (C<$CBOR::XS::MAGIC>):
649    
650     my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
651     # same as:
652     my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
653    
654     Serialise some URIs and a regex in an array:
655    
656     my $cbor = encode_cbor [
657     (CBOR::XS::tag 32, "http://www.nethype.de/"),
658     (CBOR::XS::tag 32, "http://software.schmorp.de/"),
659     (CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
660     ];
661    
662     Wrap CBOR data in CBOR:
663    
664     my $cbor_cbor = encode_cbor
665     CBOR::XS::tag 24,
666     encode_cbor [1, 2, 3];
667    
668 root 1.19 =head1 TAG HANDLING AND EXTENSIONS
669    
670 root 1.22 This section describes how this module handles specific tagged values
671     and extensions. If a tag is not mentioned here and no additional filters
672     are provided for it, then the default handling applies (creating a
673     CBOR::XS::Tagged object on decoding, and only encoding the tag when
674     explicitly requested).
675 root 1.19
676 root 1.23 Tags not handled specifically are currently converted into a
677     L<CBOR::XS::Tagged> object, which is simply a blessed array reference
678     consisting of the numeric tag value followed by the (decoded) CBOR value.
679    
680 root 1.19 Future versions of this module reserve the right to special case
681 root 1.22 additional tags (such as base64url).
682    
683     =head2 ENFORCED TAGS
684    
685     These tags are always handled when decoding, and their handling cannot be
686     overriden by the user.
687 root 1.19
688     =over 4
689    
690 root 1.26 =item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
691 root 1.19
692 root 1.23 These tags are automatically created (and decoded) for serialisable
693     objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
694     serialisation protocol). See L<OBJECT SERIALISATION> for details.
695 root 1.19
696 root 1.26 =item 28, 29 (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)
697 root 1.19
698     These tags are automatically decoded when encountered, resulting in
699     shared values in the decoded object. They are only encoded, however, when
700     C<allow_sharable> is enabled.
701    
702 root 1.26 =item 256, 25 (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)
703 root 1.21
704     These tags are automatically decoded when encountered. They are only
705 root 1.25 encoded, however, when C<pack_strings> is enabled.
706 root 1.21
707 root 1.19 =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
708    
709     This tag is automatically generated when a reference are encountered (with
710     the exception of hash and array refernces). It is converted to a reference
711     when decoding.
712    
713     =item 55799 (self-describe CBOR, RFC 7049)
714    
715     This value is not generated on encoding (unless explicitly requested by
716     the user), and is simply ignored when decoding.
717    
718     =back
719    
720 root 1.24 =head2 NON-ENFORCED TAGS
721 root 1.22
722     These tags have default filters provided when decoding. Their handling can
723     be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
724 root 1.24 providing a custom C<filter> callback when decoding.
725 root 1.22
726     When they result in decoding into a specific Perl class, the module
727     usually provides a corresponding C<TO_CBOR> method as well.
728    
729     When any of these need to load additional modules that are not part of the
730     perl core distribution (e.g. L<URI>), it is (currently) up to the user to
731     provide these modules. The decoding usually fails with an exception if the
732     required module cannot be loaded.
733    
734     =over 4
735    
736     =item 2, 3 (positive/negative bignum)
737    
738     These tags are decoded into L<Math::BigInt> objects. The corresponding
739     C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
740     integers, and others into positive/negative CBOR bignums.
741    
742     =item 4, 5 (decimal fraction/bigfloat)
743    
744     Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
745     objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
746     encodes into a decimal fraction.
747    
748     CBOR cannot represent bigfloats with I<very> large exponents - conversion
749     of such big float objects is undefined.
750    
751     Also, NaN and infinities are not encoded properly.
752    
753     =item 21, 22, 23 (expected later JSON conversion)
754    
755     CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
756     tags.
757    
758     =item 32 (URI)
759    
760     These objects decode into L<URI> objects. The corresponding
761     C<URI::TO_CBOR> method again results in a CBOR URI value.
762    
763     =back
764    
765     =cut
766    
767     our %FILTER = (
768     # 0 # rfc4287 datetime, utf-8
769     # 1 # unix timestamp, any
770    
771     2 => sub { # pos bigint
772     require Math::BigInt;
773     Math::BigInt->new ("0x" . unpack "H*", pop)
774     },
775    
776     3 => sub { # neg bigint
777     require Math::BigInt;
778     -Math::BigInt->new ("0x" . unpack "H*", pop)
779     },
780    
781     4 => sub { # decimal fraction, array
782     require Math::BigFloat;
783     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
784     },
785    
786     5 => sub { # bigfloat, array
787     require Math::BigFloat;
788     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
789     },
790    
791     21 => sub { pop }, # expected conversion to base64url encoding
792     22 => sub { pop }, # expected conversion to base64 encoding
793     23 => sub { pop }, # expected conversion to base16 encoding
794    
795     # 24 # embedded cbor, byte string
796    
797     32 => sub {
798     require URI;
799     URI->new (pop)
800     },
801    
802     # 33 # base64url rfc4648, utf-8
803     # 34 # base64 rfc46484, utf-8
804     # 35 # regex pcre/ecma262, utf-8
805     # 36 # mime message rfc2045, utf-8
806     );
807    
808 root 1.19
809 root 1.7 =head1 CBOR and JSON
810 root 1.1
811 root 1.4 CBOR is supposed to implement a superset of the JSON data model, and is,
812     with some coercion, able to represent all JSON texts (something that other
813     "binary JSON" formats such as BSON generally do not support).
814    
815     CBOR implements some extra hints and support for JSON interoperability,
816     and the spec offers further guidance for conversion between CBOR and
817     JSON. None of this is currently implemented in CBOR, and the guidelines
818     in the spec do not result in correct round-tripping of data. If JSON
819     interoperability is improved in the future, then the goal will be to
820     ensure that decoded JSON data will round-trip encoding and decoding to
821     CBOR intact.
822 root 1.1
823    
824     =head1 SECURITY CONSIDERATIONS
825    
826     When you are using CBOR in a protocol, talking to untrusted potentially
827     hostile creatures requires relatively few measures.
828    
829     First of all, your CBOR decoder should be secure, that is, should not have
830     any buffer overflows. Obviously, this module should ensure that and I am
831     trying hard on making that true, but you never know.
832    
833     Second, you need to avoid resource-starving attacks. That means you should
834     limit the size of CBOR data you accept, or make sure then when your
835     resources run out, that's just fine (e.g. by using a separate process that
836     can crash safely). The size of a CBOR string in octets is usually a good
837     indication of the size of the resources required to decode it into a Perl
838     structure. While CBOR::XS can check the size of the CBOR text, it might be
839     too late when you already have it in memory, so you might want to check
840     the size before you accept the string.
841    
842     Third, CBOR::XS recurses using the C stack when decoding objects and
843     arrays. The C stack is a limited resource: for instance, on my amd64
844     machine with 8MB of stack size I can decode around 180k nested arrays but
845     only 14k nested CBOR objects (due to perl itself recursing deeply on croak
846     to free the temporary). If that is exceeded, the program crashes. To be
847     conservative, the default nesting limit is set to 512. If your process
848     has a smaller stack, you should adjust this setting accordingly with the
849     C<max_depth> method.
850    
851     Something else could bomb you, too, that I forgot to think of. In that
852     case, you get to keep the pieces. I am always open for hints, though...
853    
854     Also keep in mind that CBOR::XS might leak contents of your Perl data
855     structures in its error messages, so when you serialise sensitive
856     information you might want to make sure that exceptions thrown by CBOR::XS
857     will not end up in front of untrusted eyes.
858    
859     =head1 CBOR IMPLEMENTATION NOTES
860    
861     This section contains some random implementation notes. They do not
862     describe guaranteed behaviour, but merely behaviour as-is implemented
863     right now.
864    
865     64 bit integers are only properly decoded when Perl was built with 64 bit
866     support.
867    
868     Strings and arrays are encoded with a definite length. Hashes as well,
869     unless they are tied (or otherwise magical).
870    
871     Only the double data type is supported for NV data types - when Perl uses
872     long double to represent floating point values, they might not be encoded
873     properly. Half precision types are accepted, but not encoded.
874    
875     Strict mode and canonical mode are not implemented.
876    
877    
878     =head1 THREADS
879    
880     This module is I<not> guaranteed to be thread safe and there are no
881     plans to change this until Perl gets thread support (as opposed to the
882     horribly slow so-called "threads" which are simply slow and bloated
883     process simulations - use fork, it's I<much> faster, cheaper, better).
884    
885     (It might actually work, but you have been warned).
886    
887    
888     =head1 BUGS
889    
890     While the goal of this module is to be correct, that unfortunately does
891     not mean it's bug-free, only that I think its design is bug-free. If you
892     keep reporting bugs they will be fixed swiftly, though.
893    
894     Please refrain from using rt.cpan.org or any other bug reporting
895     service. I put the contact address into my modules for a reason.
896    
897     =cut
898    
899 root 1.22 our %FILTER = (
900     # 0 # rfc4287 datetime, utf-8
901     # 1 # unix timestamp, any
902    
903     2 => sub { # pos bigint
904     require Math::BigInt;
905     Math::BigInt->new ("0x" . unpack "H*", pop)
906     },
907    
908     3 => sub { # neg bigint
909     require Math::BigInt;
910     -Math::BigInt->new ("0x" . unpack "H*", pop)
911     },
912    
913     4 => sub { # decimal fraction, array
914     require Math::BigFloat;
915     Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
916     },
917    
918     5 => sub { # bigfloat, array
919     require Math::BigFloat;
920     scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
921     },
922    
923     21 => sub { pop }, # expected conversion to base64url encoding
924     22 => sub { pop }, # expected conversion to base64 encoding
925     23 => sub { pop }, # expected conversion to base16 encoding
926    
927     # 24 # embedded cbor, byte string
928    
929     32 => sub {
930     require URI;
931     URI->new (pop)
932     },
933    
934     # 33 # base64url rfc4648, utf-8
935     # 34 # base64 rfc46484, utf-8
936     # 35 # regex pcre/ecma262, utf-8
937     # 36 # mime message rfc2045, utf-8
938     );
939    
940     sub CBOR::XS::default_filter {
941     &{ $FILTER{$_[0]} or return }
942     }
943    
944     sub URI::TO_CBOR {
945     my $uri = $_[0]->as_string;
946     utf8::upgrade $uri;
947     CBOR::XS::tag 32, $uri
948     }
949    
950     sub Math::BigInt::TO_CBOR {
951     if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
952     $_[0]->numify
953     } else {
954     my $hex = substr $_[0]->as_hex, 2;
955     $hex = "0$hex" if 1 & length $hex; # sigh
956     CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
957     }
958     }
959    
960     sub Math::BigFloat::TO_CBOR {
961     my ($m, $e) = $_[0]->parts;
962     CBOR::XS::tag 4, [$e->numify, $m]
963     }
964    
965 root 1.1 XSLoader::load "CBOR::XS", $VERSION;
966    
967     =head1 SEE ALSO
968    
969     The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
970     serialisation.
971    
972 root 1.6 The L<Types::Serialiser> module provides the data model for true, false
973     and error values.
974    
975 root 1.1 =head1 AUTHOR
976    
977     Marc Lehmann <schmorp@schmorp.de>
978     http://home.schmorp.de/
979    
980     =cut
981    
982 root 1.6 1
983