[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.49 by root, Thu Apr 21 16:24:03 2016 UTC vs.
Revision 1.81 by root, Fri Dec 11 06:10:26 2020 UTC

…		…
38	with the added ability of supporting serialisation of Perl objects. (JSON	38	with the added ability of supporting serialisation of Perl objects. (JSON
39	often compresses better than CBOR though, so if you plan to compress the	39	often compresses better than CBOR though, so if you plan to compress the
40	data later and speed is less important you might want to compare both	40	data later and speed is less important you might want to compare both
41	formats first).	41	formats first).
42		42
		43	The primary goal of this module is to be I<correct> and the secondary goal
		44	is to be I<fast>. To reach the latter goal it was written in C.
		45
43	To give you a general idea about speed, with texts in the megabyte range,	46	To give you a general idea about speed, with texts in the megabyte range,
44	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or	47	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
45	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the	48	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
46	data, the worse L<Storable> performs in comparison.	49	data, the worse L<Storable> performs in comparison.
47		50
…		…
52	In addition to the core CBOR data format, this module implements a	55	In addition to the core CBOR data format, this module implements a
53	number of extensions, to support cyclic and shared data structures	56	number of extensions, to support cyclic and shared data structures
54	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see	57	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
55	C<pack_strings>) and scalar references (always enabled).	58	C<pack_strings>) and scalar references (always enabled).
56		59
57	The primary goal of this module is to be I<correct> and the secondary goal
58	is to be I<fast>. To reach the latter goal it was written in C.
59
60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
61	vice versa.	61	vice versa.
62		62
63	=cut	63	=cut
64		64
65	package CBOR::XS;	65	package CBOR::XS;
66		66
67	use common::sense;	67	use common::sense;
68		68
69	our $VERSION = 1.41;	69	our $VERSION = 1.83;
70	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
71		71
72	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
73		73
74	use Exporter;	74	use Exporter;
…		…
112		112
113	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
114	be chained:	114	be chained:
115		115
116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
		117
		118	=item $cbor = new_safe CBOR::XS
		119
		120	Create a new, safe/secure CBOR::XS object. This is similar to C<new>,
		121	but configures the coder object to be safe to use with untrusted
		122	data. Currently, this is equivalent to:
		123
		124	my $cbor = CBOR::XS
		125	->new
		126	->forbid_objects
		127	->filter (\&CBOR::XS::safe_filter)
		128	->max_size (1e8);
		129
		130	But is more future proof (it is better to crash because of a change than
		131	to be exploited in other ways).
		132
		133	=cut
		134
		135	sub new_safe {
		136	CBOR::XS
		137	->new
		138	->forbid_objects
		139	->filter (\&CBOR::XS::safe_filter)
		140	->max_size (1e8)
		141	}
117		142
118	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	143	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
119		144
120	=item $max_depth = $cbor->get_max_depth	145	=item $max_depth = $cbor->get_max_depth
121		146
…		…
137		162
138	Note that nesting is implemented by recursion in C. The default value has	163	Note that nesting is implemented by recursion in C. The default value has
139	been chosen to be as large as typical operating systems allow without	164	been chosen to be as large as typical operating systems allow without
140	crashing.	165	crashing.
141		166
142	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	167	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
143		168
144	=item $cbor = $cbor->max_size ([$maximum_string_size])	169	=item $cbor = $cbor->max_size ([$maximum_string_size])
145		170
146	=item $max_size = $cbor->get_max_size	171	=item $max_size = $cbor->get_max_size
147		172
…		…
152	effect on C<encode> (yet).	177	effect on C<encode> (yet).
153		178
154	If no argument is given, the limit check will be deactivated (same as when	179	If no argument is given, the limit check will be deactivated (same as when
155	C<0> is specified).	180	C<0> is specified).
156		181
157	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	182	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
158		183
159	=item $cbor = $cbor->allow_unknown ([$enable])	184	=item $cbor = $cbor->allow_unknown ([$enable])
160		185
161	=item $enabled = $cbor->get_allow_unknown	186	=item $enabled = $cbor->get_allow_unknown
162		187
…		…
180	reference to the earlier value.	205	reference to the earlier value.
181		206
182	This means that such values will only be encoded once, and will not result	207	This means that such values will only be encoded once, and will not result
183	in a deep cloning of the value on decode, in decoders supporting the value	208	in a deep cloning of the value on decode, in decoders supporting the value
184	sharing extension. This also makes it possible to encode cyclic data	209	sharing extension. This also makes it possible to encode cyclic data
185	structures (which need C<allow_cycles> to ne enabled to be decoded by this	210	structures (which need C<allow_cycles> to be enabled to be decoded by this
186	module).	211	module).
187		212
188	It is recommended to leave it off unless you know your	213	It is recommended to leave it off unless you know your
189	communication partner supports the value sharing extensions to CBOR	214	communication partner supports the value sharing extensions to CBOR
190	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the	215	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
191	resulting data structure might be unusable.	216	resulting data structure might be unusable.
192		217
193	Detecting shared values incurs a runtime overhead when values are encoded	218	Detecting shared values incurs a runtime overhead when values are encoded
194	that have a reference counter large than one, and might unnecessarily	219	that have a reference counter large than one, and might unnecessarily
195	increase the encoded size, as potentially shared values are encode as	220	increase the encoded size, as potentially shared values are encoded as
196	shareable whether or not they are actually shared.	221	shareable whether or not they are actually shared.
197		222
198	At the moment, only targets of references can be shared (e.g. scalars,	223	At the moment, only targets of references can be shared (e.g. scalars,
199	arrays or hashes pointed to by a reference). Weirder constructs, such as	224	arrays or hashes pointed to by a reference). Weirder constructs, such as
200	an array with multiple "copies" of the I<same> string, which are hard but	225	an array with multiple "copies" of the I<same> string, which are hard but
…		…
226	throwing an error.	251	throwing an error.
227		252
228	This option does not affect C<encode> in any way - shared values and	253	This option does not affect C<encode> in any way - shared values and
229	references will always be encoded properly if present.	254	references will always be encoded properly if present.
230		255
		256	=item $cbor = $cbor->forbid_objects ([$enable])
		257
		258	=item $enabled = $cbor->get_forbid_objects
		259
		260	Disables the use of the object serialiser protocol.
		261
		262	If C<$enable> is true (or missing), then C<encode> will will throw an
		263	exception when it encounters perl objects that would be encoded using the
		264	perl-object tag (26). When C<decode> encounters such tags, it will fall
		265	back to the general filter/tagged logic as if this were an unknown tag (by
		266	default resulting in a C<CBOR::XC::Tagged> object).
		267
		268	If C<$enable> is false (the default), then C<encode> will use the
		269	L<Types::Serialiser> object serialisation protocol to serialise objects
		270	into perl-object tags, and C<decode> will do the same to decode such tags.
		271
		272	See L<SECURITY CONSIDERATIONS>, below, for more info on why forbidding this
		273	protocol can be useful.
		274
231	=item $cbor = $cbor->pack_strings ([$enable])	275	=item $cbor = $cbor->pack_strings ([$enable])
232		276
233	=item $enabled = $cbor->get_pack_strings	277	=item $enabled = $cbor->get_pack_strings
234		278
235	If C<$enable> is true (or missing), then C<encode> will try not to encode	279	If C<$enable> is true (or missing), then C<encode> will try not to encode
…		…
247	the standard CBOR way.	291	the standard CBOR way.
248		292
249	This option does not affect C<decode> in any way - string references will	293	This option does not affect C<decode> in any way - string references will
250	always be decoded properly if present.	294	always be decoded properly if present.
251		295
		296	=item $cbor = $cbor->text_keys ([$enable])
		297
		298	=item $enabled = $cbor->get_text_keys
		299
		300	If C<$enabled> is true (or missing), then C<encode> will encode all
		301	perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
		302
		303	If C<$enable> is false (the default), then C<encode> will encode hash keys
		304	normally - upgraded perl strings (strings internally encoded as UTF-8) as
		305	CBOR text strings, and downgraded perl strings as CBOR byte strings.
		306
		307	This option does not affect C<decode> in any way.
		308
		309	This option is useful for interoperability with CBOR decoders that don't
		310	treat byte strings as a form of text. It is especially useful as Perl
		311	gives very little control over hash keys.
		312
		313	Enabling this option can be slow, as all downgraded hash keys that are
		314	encoded need to be scanned and converted to UTF-8.
		315
		316	=item $cbor = $cbor->text_strings ([$enable])
		317
		318	=item $enabled = $cbor->get_text_strings
		319
		320	This option works similar to C<text_keys>, above, but works on all strings
		321	(including hash keys), so C<text_keys> has no further effect after
		322	enabling C<text_strings>.
		323
		324	If C<$enabled> is true (or missing), then C<encode> will encode all perl
		325	strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
		326
		327	If C<$enable> is false (the default), then C<encode> will encode strings
		328	normally (but see C<text_keys>) - upgraded perl strings (strings
		329	internally encoded as UTF-8) as CBOR text strings, and downgraded perl
		330	strings as CBOR byte strings.
		331
		332	This option does not affect C<decode> in any way.
		333
		334	This option has similar advantages and disadvantages as C<text_keys>. In
		335	addition, this option effectively removes the ability to automatically
		336	encode byte strings, which might break some C<FREEZE> and C<TO_CBOR>
		337	methods that rely on this.
		338
		339	A workaround is to use explicit type casts, which are unaffected by this option.
		340
252	=item $cbor = $cbor->validate_utf8 ([$enable])	341	=item $cbor = $cbor->validate_utf8 ([$enable])
253		342
254	=item $enabled = $cbor->get_validate_utf8	343	=item $enabled = $cbor->get_validate_utf8
255		344
256	If C<$enable> is true (or missing), then C<decode> will validate that	345	If C<$enable> is true (or missing), then C<decode> will validate that
…		…
261	The concept of "valid UTF-8" used is perl's concept, which is a superset	350	The concept of "valid UTF-8" used is perl's concept, which is a superset
262	of the official UTF-8.	351	of the official UTF-8.
263		352
264	If C<$enable> is false (the default), then C<decode> will blindly accept	353	If C<$enable> is false (the default), then C<decode> will blindly accept
265	UTF-8 data, marking them as valid UTF-8 in the resulting data structure	354	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
266	regardless of whether thats true or not.	355	regardless of whether that's true or not.
267		356
268	Perl isn't too happy about corrupted UTF-8 in strings, but should	357	Perl isn't too happy about corrupted UTF-8 in strings, but should
269	generally not crash or do similarly evil things. Extensions might be not	358	generally not crash or do similarly evil things. Extensions might be not
270	so forgiving, so it's recommended to turn on this setting if you receive	359	so forgiving, so it's recommended to turn on this setting if you receive
271	untrusted CBOR.	360	untrusted CBOR.
…		…
293	replace the tagged value in the decoded data structure, or no values,	382	replace the tagged value in the decoded data structure, or no values,
294	which will result in default handling, which currently means the decoder	383	which will result in default handling, which currently means the decoder
295	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.	384	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
296		385
297	When the filter is cleared (the default state), the default filter	386	When the filter is cleared (the default state), the default filter
298	function, C<CBOR::XS::default_filter>, is used. This function simply looks	387	function, C<CBOR::XS::default_filter>, is used. This function simply
299	up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be	388	looks up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists
300	a code reference that is called with tag and value, and is responsible for	389	it must be a code reference that is called with tag and value, and is
301	decoding the value. If no entry exists, it returns no values.	390	responsible for decoding the value. If no entry exists, it returns no
		391	values. C<CBOR::XS> provides a number of default filter functions already,
		392	the the C<%CBOR::XS::FILTER> hash can be freely extended with more.
		393
		394	C<CBOR::XS> additionally provides an alternative filter function that is
		395	supposed to be safe to use with untrusted data (which the default filter
		396	might not), called C<CBOR::XS::safe_filter>, which works the same as
		397	the C<default_filter> but uses the C<%CBOR::XS::SAFE_FILTER> variable
		398	instead. It is prepopulated with the tag decoding functions that are
		399	deemed safe (basically the same as C<%CBOR::XS::FILTER> without all
		400	the bignum tags), and can be extended by user code as wlel, although,
		401	obviously, one should be very careful about adding decoding functions
		402	here, since the expectation is that they are safe to use on untrusted
		403	data, after all.
302		404
303	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>	405	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
304	objects, with no other special handling (useful when working with	406	objects, with no other special handling (useful when working with
305	potentially "unsafe" CBOR data).	407	potentially "unsafe" CBOR data).
306		408
…		…
313	my ($tag, $value);	415	my ($tag, $value);
314		416
315	"tag 1347375694 value $value"	417	"tag 1347375694 value $value"
316	};	418	};
317		419
		420	Example: provide your own filter function that looks up tags in your own
		421	hash:
		422
		423	my %my_filter = (
		424	998347484 => sub {
		425	my ($tag, $value);
		426
		427	"tag 998347484 value $value"
		428	};
		429	);
		430
		431	my $coder = CBOR::XS->new->filter (sub {
		432	&{ $my_filter{$_[0]} or return }
		433	});
		434
		435
		436	Example: use the safe filter function (see L<SECURITY CONSIDERATIONS> for
		437	more considerations on security).
		438
		439	CBOR::XS->new->filter (\&CBOR::XS::safe_filter)->decode ($cbor_data);
		440
318	=item $cbor_data = $cbor->encode ($perl_scalar)	441	=item $cbor_data = $cbor->encode ($perl_scalar)
319		442
320	Converts the given Perl data structure (a scalar value) to its CBOR	443	Converts the given Perl data structure (a scalar value) to its CBOR
321	representation.	444	representation.
322		445
…		…
331	when there is trailing garbage after the CBOR string, it will silently	454	when there is trailing garbage after the CBOR string, it will silently
332	stop parsing there and return the number of characters consumed so far.	455	stop parsing there and return the number of characters consumed so far.
333		456
334	This is useful if your CBOR texts are not delimited by an outer protocol	457	This is useful if your CBOR texts are not delimited by an outer protocol
335	and you need to know where the first CBOR string ends amd the next one	458	and you need to know where the first CBOR string ends amd the next one
336	starts.	459	starts - CBOR strings are self-delimited, so it is possible to concatenate
		460	CBOR strings without any delimiters or size fields and recover their data.
337		461
338	CBOR::XS->new->decode_prefix ("......")	462	CBOR::XS->new->decode_prefix ("......")
339	=> ("...", 3)	463	=> ("...", 3)
340		464
341	=back	465	=back
…		…
396		520
397	Resets the incremental decoder. This throws away any saved state, so that	521	Resets the incremental decoder. This throws away any saved state, so that
398	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse	522	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
399	a new CBOR value from the beginning of the C<$buffer> again.	523	a new CBOR value from the beginning of the C<$buffer> again.
400		524
401	This method can be caled at any time, but it I<must> be called if you want	525	This method can be called at any time, but it I<must> be called if you want
402	to change your C<$buffer> or there was a decoding error and you want to	526	to change your C<$buffer> or there was a decoding error and you want to
403	reuse the C<$cbor> object for future incremental parsings.	527	reuse the C<$cbor> object for future incremental parsings.
404		528
405	=back	529	=back
406		530
…		…
544	my $x = 3.1; # some variable containing a number	668	my $x = 3.1; # some variable containing a number
545	"$x"; # stringified	669	"$x"; # stringified
546	$x .= ""; # another, more awkward way to stringify	670	$x .= ""; # another, more awkward way to stringify
547	print $x; # perl does it for you, too, quite often	671	print $x; # perl does it for you, too, quite often
548		672
549	You can force whether a string ie encoded as byte or text string by using	673	You can force whether a string is encoded as byte or text string by using
550	C<utf8::upgrade> and C<utf8::downgrade>):	674	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled).
551		675
552	utf8::upgrade $x; # encode $x as text string	676	utf8::upgrade $x; # encode $x as text string
553	utf8::downgrade $x; # encode $x as byte string	677	utf8::downgrade $x; # encode $x as byte string
554		678
		679	More options are available, see L<TYPE CASTS>, below, and the C<text_keys>
		680	and C<text_strings> options.
		681
555	Perl doesn't define what operations up- and downgrade strings, so if the	682	Perl doesn't define what operations up- and downgrade strings, so if the
556	difference between byte and text is important, you should up- or downgrade	683	difference between byte and text is important, you should up- or downgrade
557	your string as late as possible before encoding.	684	your string as late as possible before encoding. You can also force the
		685	use of CBOR text strings by using C<text_keys> or C<text_strings>.
558		686
559	You can force the type to be a CBOR number by numifying it:	687	You can force the type to be a CBOR number by numifying it:
560		688
561	my $x = "3"; # some variable containing a string	689	my $x = "3"; # some variable containing a string
562	$x += 0; # numify it, ensuring it will be dumped as a number	690	$x += 0; # numify it, ensuring it will be dumped as a number
…		…
573	represent numerical values are supported, but might suffer loss of	701	represent numerical values are supported, but might suffer loss of
574	precision.	702	precision.
575		703
576	=back	704	=back
577		705
		706	=head2 TYPE CASTS
		707
		708	B<EXPERIMENTAL>: As an experimental extension, C<CBOR::XS> allows you to
		709	force specific CBOR types to be used when encoding. That allows you to
		710	encode types not normally accessible (e.g. half floats) as well as force
		711	string types even when C<text_strings> is in effect.
		712
		713	Type forcing is done by calling a special "cast" function which keeps a
		714	copy of the value and returns a new value that can be handed over to any
		715	CBOR encoder function.
		716
		717	The following casts are currently available (all of which are unary
		718	operators, that is, have a prototype of C<$>):
		719
		720	=over
		721
		722	=item CBOR::XS::as_int $value
		723
		724	Forces the value to be encoded as some form of (basic, not bignum) integer
		725	type.
		726
		727	=item CBOR::XS::as_text $value
		728
		729	Forces the value to be encoded as (UTF-8) text values.
		730
		731	=item CBOR::XS::as_bytes $value
		732
		733	Forces the value to be encoded as a (binary) string value.
		734
		735	Example: encode a perl string as binary even though C<text_strings> is in
		736	effect.
		737
		738	CBOR::XS->new->text_strings->encode ([4, "text", CBOR::XS::bytes "bytevalue"]);
		739
		740	=item CBOR::XS::as_bool $value
		741
		742	Converts a Perl boolean (which can be any kind of scalar) into a CBOR
		743	boolean. Strictly the same, but shorter to write, than:
		744
		745	$value ? Types::Serialiser::true : Types::Serialiser::false
		746
		747	=item CBOR::XS::as_float16 $value
		748
		749	Forces half-float (IEEE 754 binary16) encoding of the given value.
		750
		751	=item CBOR::XS::as_float32 $value
		752
		753	Forces single-float (IEEE 754 binary32) encoding of the given value.
		754
		755	=item CBOR::XS::as_float64 $value
		756
		757	Forces double-float (IEEE 754 binary64) encoding of the given value.
		758
		759	=item CBOR::XS::as_cbor $cbor_text
		760
		761	Not a type cast per-se, this type cast forces the argument to be encoded
		762	as-is. This can be used to embed pre-encoded CBOR data.
		763
		764	Note that no checking on the validity of the C<$cbor_text> is done - it's
		765	the callers responsibility to correctly encode values.
		766
		767	=item CBOR::XS::as_map [key => value...]
		768
		769	Treat the array reference as key value pairs and output a CBOR map. This
		770	allows you to generate CBOR maps with arbitrary key types (or, if you
		771	don't care about semantics, duplicate keys or pairs in a custom order),
		772	which is otherwise hard to do with Perl.
		773
		774	The single argument must be an array reference with an even number of
		775	elements.
		776
		777	Note that only the reference to the array is copied, the array itself is
		778	not. Modifications done to the array before calling an encoding function
		779	will be reflected in the encoded output.
		780
		781	Example: encode a CBOR map with a string and an integer as keys.
		782
		783	encode_cbor CBOR::XS::as_map [string => "value", 5 => "value"]
		784
		785	=back
		786
		787	=cut
		788
		789	sub CBOR::XS::as_cbor ($) { bless [$_[0], 0, undef], CBOR::XS::Tagged:: }
		790	sub CBOR::XS::as_int ($) { bless [$_[0], 1, undef], CBOR::XS::Tagged:: }
		791	sub CBOR::XS::as_bytes ($) { bless [$_[0], 2, undef], CBOR::XS::Tagged:: }
		792	sub CBOR::XS::as_text ($) { bless [$_[0], 3, undef], CBOR::XS::Tagged:: }
		793	sub CBOR::XS::as_float16 ($) { bless [$_[0], 4, undef], CBOR::XS::Tagged:: }
		794	sub CBOR::XS::as_float32 ($) { bless [$_[0], 5, undef], CBOR::XS::Tagged:: }
		795	sub CBOR::XS::as_float64 ($) { bless [$_[0], 6, undef], CBOR::XS::Tagged:: }
		796
		797	sub CBOR::XS::as_bool ($) { $_[0] ? $Types::Serialiser::true : $Types::Serialiser::false }
		798
		799	sub CBOR::XS::as_map ($) {
		800	ARRAY:: eq ref $_[0]
		801	and $#{ $_[0] } & 1
		802	or do { require Carp; Carp::croak ("CBOR::XS::as_map only acepts array references with an even number of elements, caught") };
		803
		804	bless [$_[0], 7, undef], CBOR::XS::Tagged::
		805	}
		806
578	=head2 OBJECT SERIALISATION	807	=head2 OBJECT SERIALISATION
579		808
580	This module implements both a CBOR-specific and the generic	809	This module implements both a CBOR-specific and the generic
581	L<Types::Serialier> object serialisation protocol. The following	810	L<Types::Serialier> object serialisation protocol. The following
582	subsections explain both methods.	811	subsections explain both methods.
…		…
663	"$self" # encode url string	892	"$self" # encode url string
664	}	893	}
665		894
666	sub URI::THAW {	895	sub URI::THAW {
667	my ($class, $serialiser, $uri) = @_;	896	my ($class, $serialiser, $uri) = @_;
668
669	$class->new ($uri)	897	$class->new ($uri)
670	}	898	}
671		899
672	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For	900	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
673	example, a C<FREEZE> method that returns "type", "id" and "variant" values	901	example, a C<FREEZE> method that returns "type", "id" and "variant" values
…		…
804	additional tags (such as base64url).	1032	additional tags (such as base64url).
805		1033
806	=head2 ENFORCED TAGS	1034	=head2 ENFORCED TAGS
807		1035
808	These tags are always handled when decoding, and their handling cannot be	1036	These tags are always handled when decoding, and their handling cannot be
809	overriden by the user.	1037	overridden by the user.
810		1038
811	=over 4	1039	=over 4
812		1040
813	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)	1041	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
814		1042
…		…
842	encoded, however, when C<pack_strings> is enabled.	1070	encoded, however, when C<pack_strings> is enabled.
843		1071
844	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)	1072	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
845		1073
846	This tag is automatically generated when a reference are encountered (with	1074	This tag is automatically generated when a reference are encountered (with
847	the exception of hash and array refernces). It is converted to a reference	1075	the exception of hash and array references). It is converted to a reference
848	when decoding.	1076	when decoding.
849		1077
850	=item 55799 (self-describe CBOR, RFC 7049)	1078	=item 55799 (self-describe CBOR, RFC 7049)
851		1079
852	This value is not generated on encoding (unless explicitly requested by	1080	This value is not generated on encoding (unless explicitly requested by
…		…
855	=back	1083	=back
856		1084
857	=head2 NON-ENFORCED TAGS	1085	=head2 NON-ENFORCED TAGS
858		1086
859	These tags have default filters provided when decoding. Their handling can	1087	These tags have default filters provided when decoding. Their handling can
860	be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by	1088	be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
861	providing a custom C<filter> callback when decoding.	1089	providing a custom C<filter> callback when decoding.
862		1090
863	When they result in decoding into a specific Perl class, the module	1091	When they result in decoding into a specific Perl class, the module
864	usually provides a corresponding C<TO_CBOR> method as well.	1092	usually provides a corresponding C<TO_CBOR> method as well.
865		1093
…		…
883		1111
884	These tags are decoded into L<Math::BigInt> objects. The corresponding	1112	These tags are decoded into L<Math::BigInt> objects. The corresponding
885	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR	1113	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
886	integers, and others into positive/negative CBOR bignums.	1114	integers, and others into positive/negative CBOR bignums.
887		1115
888	=item 4, 5 (decimal fraction/bigfloat)	1116	=item 4, 5, 264, 265 (decimal fraction/bigfloat)
889		1117
890	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>	1118	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
891	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>	1119	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
892	encodes into a decimal fraction.	1120	encodes into a decimal fraction (either tag 4 or 264).
893		1121
894	CBOR cannot represent bigfloats with I<very> large exponents - conversion	1122	NaN and infinities are not encoded properly, as they cannot be represented
895	of such big float objects is undefined.	1123	in CBOR.
896		1124
897	Also, NaN and infinities are not encoded properly.	1125	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1126
		1127	=item 30 (rational numbers)
		1128
		1129	These tags are decoded into L<Math::BigRat> objects. The corresponding
		1130	C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator
		1131	C<1> via their numerator only, i.e., they become normal integers or
		1132	C<bignums>.
		1133
		1134	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
898		1135
899	=item 21, 22, 23 (expected later JSON conversion)	1136	=item 21, 22, 23 (expected later JSON conversion)
900		1137
901	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these	1138	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
902	tags.	1139	tags.
…		…
907	C<URI::TO_CBOR> method again results in a CBOR URI value.	1144	C<URI::TO_CBOR> method again results in a CBOR URI value.
908		1145
909	=back	1146	=back
910		1147
911	=cut	1148	=cut
912
913	our %FILTER = (
914	# 0 # rfc4287 datetime, utf-8
915	# 1 # unix timestamp, any
916
917	2 => sub { # pos bigint
918	require Math::BigInt;
919	Math::BigInt->new ("0x" . unpack "H*", pop)
920	},
921
922	3 => sub { # neg bigint
923	require Math::BigInt;
924	-Math::BigInt->new ("0x" . unpack "H*", pop)
925	},
926
927	4 => sub { # decimal fraction, array
928	require Math::BigFloat;
929	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
930	},
931
932	5 => sub { # bigfloat, array
933	require Math::BigFloat;
934	scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
935	},
936
937	21 => sub { pop }, # expected conversion to base64url encoding
938	22 => sub { pop }, # expected conversion to base64 encoding
939	23 => sub { pop }, # expected conversion to base16 encoding
940
941	# 24 # embedded cbor, byte string
942
943	32 => sub {
944	require URI;
945	URI->new (pop)
946	},
947
948	# 33 # base64url rfc4648, utf-8
949	# 34 # base64 rfc46484, utf-8
950	# 35 # regex pcre/ecma262, utf-8
951	# 36 # mime message rfc2045, utf-8
952	);
953
954		1149
955	=head1 CBOR and JSON	1150	=head1 CBOR and JSON
956		1151
957	CBOR is supposed to implement a superset of the JSON data model, and is,	1152	CBOR is supposed to implement a superset of the JSON data model, and is,
958	with some coercion, able to represent all JSON texts (something that other	1153	with some coercion, able to represent all JSON texts (something that other
…		…
967	CBOR intact.	1162	CBOR intact.
968		1163
969		1164
970	=head1 SECURITY CONSIDERATIONS	1165	=head1 SECURITY CONSIDERATIONS
971		1166
972	When you are using CBOR in a protocol, talking to untrusted potentially	1167	Tl;dr... if you want to decode or encode CBOR from untrusted sources, you
973	hostile creatures requires relatively few measures.	1168	should start with a coder object created via C<new_safe> (which implements
		1169	the mitigations explained below):
974		1170
		1171	my $coder = CBOR::XS->new_safe;
		1172
		1173	my $data = $coder->decode ($cbor_text);
		1174	my $cbor = $coder->encode ($data);
		1175
		1176	Longer version: When you are using CBOR in a protocol, talking to
		1177	untrusted potentially hostile creatures requires some thought:
		1178
		1179	=over 4
		1180
		1181	=item Security of the CBOR decoder itself
		1182
975	First of all, your CBOR decoder should be secure, that is, should not have	1183	First and foremost, your CBOR decoder should be secure, that is, should
		1184	not have any buffer overflows or similar bugs that could potentially be
976	any buffer overflows. Obviously, this module should ensure that and I am	1185	exploited. Obviously, this module should ensure that and I am trying hard
977	trying hard on making that true, but you never know.	1186	on making that true, but you never know.
978		1187
		1188	=item CBOR::XS can invoke almost arbitrary callbacks during decoding
		1189
		1190	CBOR::XS supports object serialisation - decoding CBOR can cause calls
		1191	to I<any> C<THAW> method in I<any> package that exists in your process
		1192	(that is, CBOR::XS will not try to load modules, but any existing C<THAW>
		1193	method or function can be called, so they all have to be secure).
		1194
		1195	Less obviously, it will also invoke C<TO_CBOR> and C<FREEZE> methods -
		1196	even if all your C<THAW> methods are secure, encoding data structures from
		1197	untrusted sources can invoke those and trigger bugs in those.
		1198
		1199	So, if you are not sure about the security of all the modules you
		1200	have loaded (you shouldn't), you should disable this part using
		1201	C<forbid_objects> or using C<new_safe>.
		1202
		1203	=item CBOR can be extended with tags that call library code
		1204
		1205	CBOR can be extended with tags, and C<CBOR::XS> has a registry of
		1206	conversion functions for many existing tags that can be extended via
		1207	third-party modules (see the C<filter> method).
		1208
		1209	If you don't trust these, you should configure the "safe" filter function,
		1210	C<CBOR::XS::safe_filter> (C<new_safe> does this), which by default only
		1211	includes conversion functions that are considered "safe" by the author
		1212	(but again, they can be extended by third party modules).
		1213
		1214	Depending on your level of paranoia, you can use the "safe" filter:
		1215
		1216	$cbor->filter (\&CBOR::XS::safe_filter);
		1217
		1218	... your own filter...
		1219
		1220	$cbor->filter (sub { ... do your stuffs here ... });
		1221
		1222	... or even no filter at all, disabling all tag decoding:
		1223
		1224	$cbor->filter (sub { });
		1225
		1226	This is never a problem for encoding, as the tag mechanism only exists in
		1227	CBOR texts.
		1228
		1229	=item Resource-starving attacks: object memory usage
		1230
979	Second, you need to avoid resource-starving attacks. That means you should	1231	You need to avoid resource-starving attacks. That means you should limit
980	limit the size of CBOR data you accept, or make sure then when your	1232	the size of CBOR data you accept, or make sure then when your resources
981	resources run out, that's just fine (e.g. by using a separate process that	1233	run out, that's just fine (e.g. by using a separate process that can
982	can crash safely). The size of a CBOR string in octets is usually a good	1234	crash safely). The size of a CBOR string in octets is usually a good
983	indication of the size of the resources required to decode it into a Perl	1235	indication of the size of the resources required to decode it into a Perl
984	structure. While CBOR::XS can check the size of the CBOR text, it might be	1236	structure. While CBOR::XS can check the size of the CBOR text (using
985	too late when you already have it in memory, so you might want to check	1237	C<max_size> - done by C<new_safe>), it might be too late when you already
986	the size before you accept the string.	1238	have it in memory, so you might want to check the size before you accept
		1239	the string.
987		1240
		1241	As for encoding, it is possible to construct data structures that are
		1242	relatively small but result in large CBOR texts (for example by having an
		1243	array full of references to the same big data structure, which will all be
		1244	deep-cloned during encoding by default). This is rarely an actual issue
		1245	(and the worst case is still just running out of memory), but you can
		1246	reduce this risk by using C<allow_sharing>.
		1247
		1248	=item Resource-starving attacks: stack overflows
		1249
988	Third, CBOR::XS recurses using the C stack when decoding objects and	1250	CBOR::XS recurses using the C stack when decoding objects and arrays. The
989	arrays. The C stack is a limited resource: for instance, on my amd64	1251	C stack is a limited resource: for instance, on my amd64 machine with 8MB
990	machine with 8MB of stack size I can decode around 180k nested arrays but	1252	of stack size I can decode around 180k nested arrays but only 14k nested
991	only 14k nested CBOR objects (due to perl itself recursing deeply on croak	1253	CBOR objects (due to perl itself recursing deeply on croak to free the
992	to free the temporary). If that is exceeded, the program crashes. To be	1254	temporary). If that is exceeded, the program crashes. To be conservative,
993	conservative, the default nesting limit is set to 512. If your process	1255	the default nesting limit is set to 512. If your process has a smaller
994	has a smaller stack, you should adjust this setting accordingly with the	1256	stack, you should adjust this setting accordingly with the C<max_depth>
995	C<max_depth> method.	1257	method.
		1258
		1259	=item Resource-starving attacks: CPU en-/decoding complexity
		1260
		1261	CBOR::XS will use the L<Math::BigInt>, L<Math::BigFloat> and
		1262	L<Math::BigRat> libraries to represent encode/decode bignums. These can be
		1263	very slow (as in, centuries of CPU time) and can even crash your program
		1264	(and are generally not very trustworthy). See the next section on bignum
		1265	security for details.
		1266
		1267	=item Data breaches: leaking information in error messages
		1268
		1269	CBOR::XS might leak contents of your Perl data structures in its error
		1270	messages, so when you serialise sensitive information you might want to
		1271	make sure that exceptions thrown by CBOR::XS will not end up in front of
		1272	untrusted eyes.
		1273
		1274	=item Something else...
996		1275
997	Something else could bomb you, too, that I forgot to think of. In that	1276	Something else could bomb you, too, that I forgot to think of. In that
998	case, you get to keep the pieces. I am always open for hints, though...	1277	case, you get to keep the pieces. I am always open for hints, though...
999		1278
1000	Also keep in mind that CBOR::XS might leak contents of your Perl data	1279	=back
1001	structures in its error messages, so when you serialise sensitive	1280
1002	information you might want to make sure that exceptions thrown by CBOR::XS	1281
1003	will not end up in front of untrusted eyes.	1282	=head1 BIGNUM SECURITY CONSIDERATIONS
		1283
		1284	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
		1285	L<Math::BigFloat> that tries to encode the number in the simplest possible
		1286	way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
		1287	4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
		1288	(L<Math::BigRat>, tag 30) can also contain bignums as members.
		1289
		1290	CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
		1291	bigfloats (tags 5 and 265), but it will never generate these on its own.
		1292
		1293	Using the built-in L<Math::BigInt::Calc> support, encoding and decoding
		1294	decimal fractions is generally fast. Decoding bigints can be slow for very
		1295	big numbers (tens of thousands of digits, something that could potentially
		1296	be caught by limiting the size of CBOR texts), and decoding bigfloats or
		1297	arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades)
		1298	for large exponents (roughly 40 bit and longer).
		1299
		1300	Additionally, L<Math::BigInt> can take advantage of other bignum
		1301	libraries, such as L<Math::GMP>, which cannot handle big floats with large
		1302	exponents, and might simply abort or crash your program, due to their code
		1303	quality.
		1304
		1305	This can be a concern if you want to parse untrusted CBOR. If it is, you
		1306	might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
		1307	types. You should also disable types 5 and 265, as these can be slow even
		1308	without bigints.
		1309
		1310	Disabling bigints will also partially or fully disable types that rely on
		1311	them, e.g. rational numbers that use bignums.
		1312
1004		1313
1005	=head1 CBOR IMPLEMENTATION NOTES	1314	=head1 CBOR IMPLEMENTATION NOTES
1006		1315
1007	This section contains some random implementation notes. They do not	1316	This section contains some random implementation notes. They do not
1008	describe guaranteed behaviour, but merely behaviour as-is implemented	1317	describe guaranteed behaviour, but merely behaviour as-is implemented
…		…
1024	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT	1333	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
1025		1334
1026	On perls that were built without 64 bit integer support (these are rare	1335	On perls that were built without 64 bit integer support (these are rare
1027	nowadays, even on 32 bit architectures, as all major Perl distributions	1336	nowadays, even on 32 bit architectures, as all major Perl distributions
1028	are built with 64 bit integer support), support for any kind of 64 bit	1337	are built with 64 bit integer support), support for any kind of 64 bit
1029	integer in CBOR is very limited - most likely, these 64 bit values will	1338	value in CBOR is very limited - most likely, these 64 bit values will
1030	be truncated, corrupted, or otherwise not decoded correctly. This also	1339	be truncated, corrupted, or otherwise not decoded correctly. This also
1031	includes string, array and map sizes that are stored as 64 bit integers.	1340	includes string, float, array and map sizes that are stored as 64 bit
		1341	integers.
1032		1342
1033		1343
1034	=head1 THREADS	1344	=head1 THREADS
1035		1345
1036	This module is I<not> guaranteed to be thread safe and there are no	1346	This module is I<not> guaranteed to be thread safe and there are no
…		…
1049		1359
1050	Please refrain from using rt.cpan.org or any other bug reporting	1360	Please refrain from using rt.cpan.org or any other bug reporting
1051	service. I put the contact address into my modules for a reason.	1361	service. I put the contact address into my modules for a reason.
1052		1362
1053	=cut	1363	=cut
		1364
		1365	# clumsy and slow hv_store-in-hash helper function
		1366	sub _hv_store {
		1367	$_[0]{$_[1]} = $_[2];
		1368	}
1054		1369
1055	our %FILTER = (	1370	our %FILTER = (
1056	0 => sub { # rfc4287 datetime, utf-8	1371	0 => sub { # rfc4287 datetime, utf-8
1057	require Time::Piece;	1372	require Time::Piece;
1058	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"	1373	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
…		…
1094	4 => sub { # decimal fraction, array	1409	4 => sub { # decimal fraction, array
1095	require Math::BigFloat;	1410	require Math::BigFloat;
1096	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])	1411	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
1097	},	1412	},
1098		1413
		1414	264 => sub { # decimal fraction with arbitrary exponent
		1415	require Math::BigFloat;
		1416	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1417	},
		1418
1099	5 => sub { # bigfloat, array	1419	5 => sub { # bigfloat, array
1100	require Math::BigFloat;	1420	require Math::BigFloat;
1101	scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)	1421	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1422	},
		1423
		1424	265 => sub { # bigfloat with arbitrary exponent
		1425	require Math::BigFloat;
		1426	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1427	},
		1428
		1429	30 => sub { # rational number
		1430	require Math::BigRat;
		1431	Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons
1102	},	1432	},
1103		1433
1104	21 => sub { pop }, # expected conversion to base64url encoding	1434	21 => sub { pop }, # expected conversion to base64url encoding
1105	22 => sub { pop }, # expected conversion to base64 encoding	1435	22 => sub { pop }, # expected conversion to base64 encoding
1106	23 => sub { pop }, # expected conversion to base16 encoding	1436	23 => sub { pop }, # expected conversion to base16 encoding
…		…
1116	# 34 # base64 rfc46484, utf-8	1446	# 34 # base64 rfc46484, utf-8
1117	# 35 # regex pcre/ecma262, utf-8	1447	# 35 # regex pcre/ecma262, utf-8
1118	# 36 # mime message rfc2045, utf-8	1448	# 36 # mime message rfc2045, utf-8
1119	);	1449	);
1120		1450
1121	sub CBOR::XS::default_filter {	1451	sub default_filter {
1122	&{ $FILTER{$_[0]} or return }	1452	&{ $FILTER{$_[0]} or return }
		1453	}
		1454
		1455	our %SAFE_FILTER = map { $_ => $FILTER{$_} } 0, 1, 21, 22, 23, 32;
		1456
		1457	sub safe_filter {
		1458	&{ $SAFE_FILTER{$_[0]} or return }
1123	}	1459	}
1124		1460
1125	sub URI::TO_CBOR {	1461	sub URI::TO_CBOR {
1126	my $uri = $_[0]->as_string;	1462	my $uri = $_[0]->as_string;
1127	utf8::upgrade $uri;	1463	utf8::upgrade $uri;
1128	tag 32, $uri	1464	tag 32, $uri
1129	}	1465	}
1130		1466
1131	sub Math::BigInt::TO_CBOR {	1467	sub Math::BigInt::TO_CBOR {
1132	if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {	1468	if (-2147483648 <= $_[0] && $_[0] <= 2147483647) {
1133	$_[0]->numify	1469	$_[0]->numify
1134	} else {	1470	} else {
1135	my $hex = substr $_[0]->as_hex, 2;	1471	my $hex = substr $_[0]->as_hex, 2;
1136	$hex = "0$hex" if 1 & length $hex; # sigh	1472	$hex = "0$hex" if 1 & length $hex; # sigh
1137	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex	1473	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
1138	}	1474	}
1139	}	1475	}
1140		1476
1141	sub Math::BigFloat::TO_CBOR {	1477	sub Math::BigFloat::TO_CBOR {
1142	my ($m, $e) = $_[0]->parts;	1478	my ($m, $e) = $_[0]->parts;
		1479
		1480	-9223372036854775808 <= $e && $e <= 18446744073709551615
1143	tag 4, [$e->numify, $m]	1481	? tag 4, [$e->numify, $m]
		1482	: tag 264, [$e, $m]
		1483	}
		1484
		1485	sub Math::BigRat::TO_CBOR {
		1486	my ($n, $d) = $_[0]->parts;
		1487
		1488	# older versions of BigRat need *1, as they not always return numbers
		1489
		1490	$d*1 == 1
		1491	? $n*1
		1492	: tag 30, [$n1, $d1]
1144	}	1493	}
1145		1494
1146	sub Time::Piece::TO_CBOR {	1495	sub Time::Piece::TO_CBOR {
1147	tag 1, 0 + $_[0]->epoch	1496	tag 1, 0 + $_[0]->epoch
1148	}	1497	}

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.49 by root, Thu Apr 21 16:24:03 2016 UTC vs. Revision 1.81 by root, Fri Dec 11 06:10:26 2020 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.49 by root, Thu Apr 21 16:24:03 2016 UTC vs.
Revision 1.81 by root, Fri Dec 11 06:10:26 2020 UTC