[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.63 by root, Fri Nov 25 12:16:12 2016 UTC vs.
Revision 1.90 by root, Sat Nov 18 18:19:57 2023 UTC

…		…
38	with the added ability of supporting serialisation of Perl objects. (JSON	38	with the added ability of supporting serialisation of Perl objects. (JSON
39	often compresses better than CBOR though, so if you plan to compress the	39	often compresses better than CBOR though, so if you plan to compress the
40	data later and speed is less important you might want to compare both	40	data later and speed is less important you might want to compare both
41	formats first).	41	formats first).
42		42
		43	The primary goal of this module is to be I<correct> and the secondary goal
		44	is to be I<fast>. To reach the latter goal it was written in C.
		45
43	To give you a general idea about speed, with texts in the megabyte range,	46	To give you a general idea about speed, with texts in the megabyte range,
44	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or	47	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
45	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the	48	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
46	data, the worse L<Storable> performs in comparison.	49	data, the worse L<Storable> performs in comparison.
47		50
…		…
52	In addition to the core CBOR data format, this module implements a	55	In addition to the core CBOR data format, this module implements a
53	number of extensions, to support cyclic and shared data structures	56	number of extensions, to support cyclic and shared data structures
54	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see	57	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
55	C<pack_strings>) and scalar references (always enabled).	58	C<pack_strings>) and scalar references (always enabled).
56		59
57	The primary goal of this module is to be I<correct> and the secondary goal
58	is to be I<fast>. To reach the latter goal it was written in C.
59
60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
61	vice versa.	61	vice versa.
62		62
63	=cut	63	=cut
64		64
65	package CBOR::XS;	65	package CBOR::XS;
66		66
67	use common::sense;	67	use common::sense;
68		68
69	our $VERSION = 1.51;	69	our $VERSION = 1.87;
70	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
71		71
72	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
73		73
74	use Exporter;	74	use Exporter;
…		…
112		112
113	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
114	be chained:	114	be chained:
115		115
116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
		117
		118	=item $cbor = new_safe CBOR::XS
		119
		120	Create a new, safe/secure CBOR::XS object. This is similar to C<new>,
		121	but configures the coder object to be safe to use with untrusted
		122	data. Currently, this is equivalent to:
		123
		124	my $cbor = CBOR::XS
		125	->new
		126	->validate_utf8
		127	->forbid_objects
		128	->filter (\&CBOR::XS::safe_filter)
		129	->max_size (1e8);
		130
		131	But is more future proof (it is better to crash because of a change than
		132	to be exploited in other ways).
		133
		134	=cut
		135
		136	sub new_safe {
		137	CBOR::XS
		138	->new
		139	->validate_utf8
		140	->forbid_objects
		141	->filter (\&CBOR::XS::safe_filter)
		142	->max_size (1e8)
		143	}
117		144
118	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	145	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
119		146
120	=item $max_depth = $cbor->get_max_depth	147	=item $max_depth = $cbor->get_max_depth
121		148
…		…
137		164
138	Note that nesting is implemented by recursion in C. The default value has	165	Note that nesting is implemented by recursion in C. The default value has
139	been chosen to be as large as typical operating systems allow without	166	been chosen to be as large as typical operating systems allow without
140	crashing.	167	crashing.
141		168
142	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	169	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
143		170
144	=item $cbor = $cbor->max_size ([$maximum_string_size])	171	=item $cbor = $cbor->max_size ([$maximum_string_size])
145		172
146	=item $max_size = $cbor->get_max_size	173	=item $max_size = $cbor->get_max_size
147		174
…		…
152	effect on C<encode> (yet).	179	effect on C<encode> (yet).
153		180
154	If no argument is given, the limit check will be deactivated (same as when	181	If no argument is given, the limit check will be deactivated (same as when
155	C<0> is specified).	182	C<0> is specified).
156		183
157	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	184	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
158		185
159	=item $cbor = $cbor->allow_unknown ([$enable])	186	=item $cbor = $cbor->allow_unknown ([$enable])
160		187
161	=item $enabled = $cbor->get_allow_unknown	188	=item $enabled = $cbor->get_allow_unknown
162		189
…		…
189	communication partner supports the value sharing extensions to CBOR	216	communication partner supports the value sharing extensions to CBOR
190	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the	217	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
191	resulting data structure might be unusable.	218	resulting data structure might be unusable.
192		219
193	Detecting shared values incurs a runtime overhead when values are encoded	220	Detecting shared values incurs a runtime overhead when values are encoded
194	that have a reference counter large than one, and might unnecessarily	221	that have a reference counter larger than one, and might unnecessarily
195	increase the encoded size, as potentially shared values are encode as	222	increase the encoded size, as potentially shared values are encoded as
196	shareable whether or not they are actually shared.	223	shareable whether or not they are actually shared.
197		224
198	At the moment, only targets of references can be shared (e.g. scalars,	225	At the moment, only targets of references can be shared (e.g. scalars,
199	arrays or hashes pointed to by a reference). Weirder constructs, such as	226	arrays or hashes pointed to by a reference). Weirder constructs, such as
200	an array with multiple "copies" of the I<same> string, which are hard but	227	an array with multiple "copies" of the I<same> string, which are hard but
…		…
218	isn't prepared for this will not leak memory.	245	isn't prepared for this will not leak memory.
219		246
220	If C<$enable> is false (the default), then C<decode> will throw an error	247	If C<$enable> is false (the default), then C<decode> will throw an error
221	when it encounters a self-referential/cyclic data structure.	248	when it encounters a self-referential/cyclic data structure.
222		249
223	FUTURE DIRECTION: the motivation behind this option is to avoid I<real>
224	cycles - future versions of this module might chose to decode cyclic data
225	structures using weak references when this option is off, instead of
226	throwing an error.
227
228	This option does not affect C<encode> in any way - shared values and	250	This option does not affect C<encode> in any way - shared values and
229	references will always be encoded properly if present.	251	references will always be encoded properly if present.
		252
		253	=item $cbor = $cbor->allow_weak_cycles ([$enable])
		254
		255	=item $enabled = $cbor->get_allow_weak_cycles
		256
		257	This works like C<allow_cycles> in that it allows the resulting data
		258	structures to contain cycles, but unlike C<allow_cycles>, those cyclic
		259	rreferences will be weak. That means that code that recurrsively walks
		260	the data structure must be prepared with cycles, but at least not special
		261	precautions must be implemented to free these data structures.
		262
		263	Only those references leading to actual cycles will be weakened - other
		264	references, e.g. when the same hash or arrray is referenced multiple times
		265	in an arrray, will be normal references.
		266
		267	This option does not affect C<encode> in any way - shared values and
		268	references will always be encoded properly if present.
		269
		270	=item $cbor = $cbor->forbid_objects ([$enable])
		271
		272	=item $enabled = $cbor->get_forbid_objects
		273
		274	Disables the use of the object serialiser protocol.
		275
		276	If C<$enable> is true (or missing), then C<encode> will will throw an
		277	exception when it encounters perl objects that would be encoded using the
		278	perl-object tag (26). When C<decode> encounters such tags, it will fall
		279	back to the general filter/tagged logic as if this were an unknown tag (by
		280	default resulting in a C<CBOR::XC::Tagged> object).
		281
		282	If C<$enable> is false (the default), then C<encode> will use the
		283	L<Types::Serialiser> object serialisation protocol to serialise objects
		284	into perl-object tags, and C<decode> will do the same to decode such tags.
		285
		286	See L<SECURITY CONSIDERATIONS>, below, for more info on why forbidding this
		287	protocol can be useful.
230		288
231	=item $cbor = $cbor->pack_strings ([$enable])	289	=item $cbor = $cbor->pack_strings ([$enable])
232		290
233	=item $enabled = $cbor->get_pack_strings	291	=item $enabled = $cbor->get_pack_strings
234		292
…		…
286	strings as CBOR byte strings.	344	strings as CBOR byte strings.
287		345
288	This option does not affect C<decode> in any way.	346	This option does not affect C<decode> in any way.
289		347
290	This option has similar advantages and disadvantages as C<text_keys>. In	348	This option has similar advantages and disadvantages as C<text_keys>. In
291	addition, this option effectively removes the ability to encode byte	349	addition, this option effectively removes the ability to automatically
292	strings, which might break some C<FREEZE> and C<TO_CBOR> methods that rely	350	encode byte strings, which might break some C<FREEZE> and C<TO_CBOR>
293	on this, such as bignum encoding, so this option is mainly useful for very	351	methods that rely on this.
294	simple data.	352
		353	A workaround is to use explicit type casts, which are unaffected by this option.
295		354
296	=item $cbor = $cbor->validate_utf8 ([$enable])	355	=item $cbor = $cbor->validate_utf8 ([$enable])
297		356
298	=item $enabled = $cbor->get_validate_utf8	357	=item $enabled = $cbor->get_validate_utf8
299		358
…		…
337	replace the tagged value in the decoded data structure, or no values,	396	replace the tagged value in the decoded data structure, or no values,
338	which will result in default handling, which currently means the decoder	397	which will result in default handling, which currently means the decoder
339	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.	398	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
340		399
341	When the filter is cleared (the default state), the default filter	400	When the filter is cleared (the default state), the default filter
342	function, C<CBOR::XS::default_filter>, is used. This function simply looks	401	function, C<CBOR::XS::default_filter>, is used. This function simply
343	up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be	402	looks up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists
344	a code reference that is called with tag and value, and is responsible for	403	it must be a code reference that is called with tag and value, and is
345	decoding the value. If no entry exists, it returns no values.	404	responsible for decoding the value. If no entry exists, it returns no
		405	values. C<CBOR::XS> provides a number of default filter functions already,
		406	the the C<%CBOR::XS::FILTER> hash can be freely extended with more.
		407
		408	C<CBOR::XS> additionally provides an alternative filter function that is
		409	supposed to be safe to use with untrusted data (which the default filter
		410	might not), called C<CBOR::XS::safe_filter>, which works the same as
		411	the C<default_filter> but uses the C<%CBOR::XS::SAFE_FILTER> variable
		412	instead. It is prepopulated with the tag decoding functions that are
		413	deemed safe (basically the same as C<%CBOR::XS::FILTER> without all
		414	the bignum tags), and can be extended by user code as wlel, although,
		415	obviously, one should be very careful about adding decoding functions
		416	here, since the expectation is that they are safe to use on untrusted
		417	data, after all.
346		418
347	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>	419	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
348	objects, with no other special handling (useful when working with	420	objects, with no other special handling (useful when working with
349	potentially "unsafe" CBOR data).	421	potentially "unsafe" CBOR data).
350		422
…		…
357	my ($tag, $value);	429	my ($tag, $value);
358		430
359	"tag 1347375694 value $value"	431	"tag 1347375694 value $value"
360	};	432	};
361		433
		434	Example: provide your own filter function that looks up tags in your own
		435	hash:
		436
		437	my %my_filter = (
		438	998347484 => sub {
		439	my ($tag, $value);
		440
		441	"tag 998347484 value $value"
		442	};
		443	);
		444
		445	my $coder = CBOR::XS->new->filter (sub {
		446	&{ $my_filter{$_[0]} or return }
		447	});
		448
		449
		450	Example: use the safe filter function (see L<SECURITY CONSIDERATIONS> for
		451	more considerations regarding security).
		452
		453	CBOR::XS->new->filter (\&CBOR::XS::safe_filter)->decode ($cbor_data);
		454
362	=item $cbor_data = $cbor->encode ($perl_scalar)	455	=item $cbor_data = $cbor->encode ($perl_scalar)
363		456
364	Converts the given Perl data structure (a scalar value) to its CBOR	457	Converts the given Perl data structure (a scalar value) to its CBOR
365	representation.	458	representation.
366		459
…		…
375	when there is trailing garbage after the CBOR string, it will silently	468	when there is trailing garbage after the CBOR string, it will silently
376	stop parsing there and return the number of characters consumed so far.	469	stop parsing there and return the number of characters consumed so far.
377		470
378	This is useful if your CBOR texts are not delimited by an outer protocol	471	This is useful if your CBOR texts are not delimited by an outer protocol
379	and you need to know where the first CBOR string ends amd the next one	472	and you need to know where the first CBOR string ends amd the next one
380	starts.	473	starts - CBOR strings are self-delimited, so it is possible to concatenate
		474	CBOR strings without any delimiters or size fields and recover their data.
381		475
382	CBOR::XS->new->decode_prefix ("......")	476	CBOR::XS->new->decode_prefix ("......")
383	=> ("...", 3)	477	=> ("...", 3)
384		478
385	=back	479	=back
…		…
391	Perl data structure in memory at one time, it does allow you to parse a	485	Perl data structure in memory at one time, it does allow you to parse a
392	CBOR stream incrementally, using a similar to using "decode_prefix" to see	486	CBOR stream incrementally, using a similar to using "decode_prefix" to see
393	if a full CBOR object is available, but is much more efficient.	487	if a full CBOR object is available, but is much more efficient.
394		488
395	It basically works by parsing as much of a CBOR string as possible - if	489	It basically works by parsing as much of a CBOR string as possible - if
396	the CBOR data is not complete yet, the pasrer will remember where it was,	490	the CBOR data is not complete yet, the parser will remember where it was,
397	to be able to restart when more data has been accumulated. Once enough	491	to be able to restart when more data has been accumulated. Once enough
398	data is available to either decode a complete CBOR value or raise an	492	data is available to either decode a complete CBOR value or raise an
399	error, a real decode will be attempted.	493	error, a real decode will be attempted.
400		494
401	A typical use case would be a network protocol that consists of sending	495	A typical use case would be a network protocol that consists of sending
…		…
553	create such objects.	647	create such objects.
554		648
555	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error	649	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
556		650
557	These special values become CBOR true, CBOR false and CBOR undefined	651	These special values become CBOR true, CBOR false and CBOR undefined
558	values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly	652	values, respectively.
559	if you want.
560		653
561	=item other blessed objects	654	=item other blessed objects
562		655
563	Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See	656	Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
564	L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this	657	L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
…		…
589	"$x"; # stringified	682	"$x"; # stringified
590	$x .= ""; # another, more awkward way to stringify	683	$x .= ""; # another, more awkward way to stringify
591	print $x; # perl does it for you, too, quite often	684	print $x; # perl does it for you, too, quite often
592		685
593	You can force whether a string is encoded as byte or text string by using	686	You can force whether a string is encoded as byte or text string by using
594	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled):	687	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled).
595		688
596	utf8::upgrade $x; # encode $x as text string	689	utf8::upgrade $x; # encode $x as text string
597	utf8::downgrade $x; # encode $x as byte string	690	utf8::downgrade $x; # encode $x as byte string
		691
		692	More options are available, see L<TYPE CASTS>, below, and the C<text_keys>
		693	and C<text_strings> options.
598		694
599	Perl doesn't define what operations up- and downgrade strings, so if the	695	Perl doesn't define what operations up- and downgrade strings, so if the
600	difference between byte and text is important, you should up- or downgrade	696	difference between byte and text is important, you should up- or downgrade
601	your string as late as possible before encoding. You can also force the	697	your string as late as possible before encoding. You can also force the
602	use of CBOR text strings by using C<text_keys> or C<text_strings>.	698	use of CBOR text strings by using C<text_keys> or C<text_strings>.
…		…
617	format will be used. Perls that use formats other than IEEE double to	713	format will be used. Perls that use formats other than IEEE double to
618	represent numerical values are supported, but might suffer loss of	714	represent numerical values are supported, but might suffer loss of
619	precision.	715	precision.
620		716
621	=back	717	=back
		718
		719	=head2 TYPE CASTS
		720
		721	B<EXPERIMENTAL>: As an experimental extension, C<CBOR::XS> allows you to
		722	force specific CBOR types to be used when encoding. That allows you to
		723	encode types not normally accessible (e.g. half floats) as well as force
		724	string types even when C<text_strings> is in effect.
		725
		726	Type forcing is done by calling a special "cast" function which keeps a
		727	copy of the value and returns a new value that can be handed over to any
		728	CBOR encoder function.
		729
		730	The following casts are currently available (all of which are unary
		731	operators, that is, have a prototype of C<$>):
		732
		733	=over
		734
		735	=item CBOR::XS::as_int $value
		736
		737	Forces the value to be encoded as some form of (basic, not bignum) integer
		738	type.
		739
		740	=item CBOR::XS::as_text $value
		741
		742	Forces the value to be encoded as (UTF-8) text values.
		743
		744	=item CBOR::XS::as_bytes $value
		745
		746	Forces the value to be encoded as a (binary) string value.
		747
		748	Example: encode a perl string as binary even though C<text_strings> is in
		749	effect.
		750
		751	CBOR::XS->new->text_strings->encode ([4, "text", CBOR::XS::bytes "bytevalue"]);
		752
		753	=item CBOR::XS::as_bool $value
		754
		755	Converts a Perl boolean (which can be any kind of scalar) into a CBOR
		756	boolean. Strictly the same, but shorter to write, than:
		757
		758	$value ? Types::Serialiser::true : Types::Serialiser::false
		759
		760	=item CBOR::XS::as_float16 $value
		761
		762	Forces half-float (IEEE 754 binary16) encoding of the given value.
		763
		764	=item CBOR::XS::as_float32 $value
		765
		766	Forces single-float (IEEE 754 binary32) encoding of the given value.
		767
		768	=item CBOR::XS::as_float64 $value
		769
		770	Forces double-float (IEEE 754 binary64) encoding of the given value.
		771
		772	=item CBOR::XS::as_cbor $cbor_text
		773
		774	Not a type cast per-se, this type cast forces the argument to be encoded
		775	as-is. This can be used to embed pre-encoded CBOR data.
		776
		777	Note that no checking on the validity of the C<$cbor_text> is done - it's
		778	the callers responsibility to correctly encode values.
		779
		780	=item CBOR::XS::as_map [key => value...]
		781
		782	Treat the array reference as key value pairs and output a CBOR map. This
		783	allows you to generate CBOR maps with arbitrary key types (or, if you
		784	don't care about semantics, duplicate keys or pairs in a custom order),
		785	which is otherwise hard to do with Perl.
		786
		787	The single argument must be an array reference with an even number of
		788	elements.
		789
		790	Note that only the reference to the array is copied, the array itself is
		791	not. Modifications done to the array before calling an encoding function
		792	will be reflected in the encoded output.
		793
		794	Example: encode a CBOR map with a string and an integer as keys.
		795
		796	encode_cbor CBOR::XS::as_map [string => "value", 5 => "value"]
		797
		798	=back
		799
		800	=cut
		801
		802	sub CBOR::XS::as_cbor ($) { bless [$_[0], 0, undef], CBOR::XS::Tagged:: }
		803	sub CBOR::XS::as_int ($) { bless [$_[0], 1, undef], CBOR::XS::Tagged:: }
		804	sub CBOR::XS::as_bytes ($) { bless [$_[0], 2, undef], CBOR::XS::Tagged:: }
		805	sub CBOR::XS::as_text ($) { bless [$_[0], 3, undef], CBOR::XS::Tagged:: }
		806	sub CBOR::XS::as_float16 ($) { bless [$_[0], 4, undef], CBOR::XS::Tagged:: }
		807	sub CBOR::XS::as_float32 ($) { bless [$_[0], 5, undef], CBOR::XS::Tagged:: }
		808	sub CBOR::XS::as_float64 ($) { bless [$_[0], 6, undef], CBOR::XS::Tagged:: }
		809
		810	sub CBOR::XS::as_bool ($) { $_[0] ? $Types::Serialiser::true : $Types::Serialiser::false }
		811
		812	sub CBOR::XS::as_map ($) {
		813	ARRAY:: eq ref $_[0]
		814	and $#{ $_[0] } & 1
		815	or do { require Carp; Carp::croak ("CBOR::XS::as_map only acepts array references with an even number of elements, caught") };
		816
		817	bless [$_[0], 7, undef], CBOR::XS::Tagged::
		818	}
622		819
623	=head2 OBJECT SERIALISATION	820	=head2 OBJECT SERIALISATION
624		821
625	This module implements both a CBOR-specific and the generic	822	This module implements both a CBOR-specific and the generic
626	L<Types::Serialier> object serialisation protocol. The following	823	L<Types::Serialier> object serialisation protocol. The following
…		…
978	CBOR intact.	1175	CBOR intact.
979		1176
980		1177
981	=head1 SECURITY CONSIDERATIONS	1178	=head1 SECURITY CONSIDERATIONS
982		1179
983	When you are using CBOR in a protocol, talking to untrusted potentially	1180	Tl;dr... if you want to decode or encode CBOR from untrusted sources, you
984	hostile creatures requires relatively few measures.	1181	should start with a coder object created via C<new_safe> (which implements
		1182	the mitigations explained below):
985		1183
		1184	my $coder = CBOR::XS->new_safe;
		1185
		1186	my $data = $coder->decode ($cbor_text);
		1187	my $cbor = $coder->encode ($data);
		1188
		1189	Longer version: When you are using CBOR in a protocol, talking to
		1190	untrusted potentially hostile creatures requires some thought:
		1191
		1192	=over 4
		1193
		1194	=item Security of the CBOR decoder itself
		1195
986	First of all, your CBOR decoder should be secure, that is, should not have	1196	First and foremost, your CBOR decoder should be secure, that is, should
		1197	not have any buffer overflows or similar bugs that could potentially be
987	any buffer overflows. Obviously, this module should ensure that and I am	1198	exploited. Obviously, this module should ensure that and I am trying hard
988	trying hard on making that true, but you never know.	1199	on making that true, but you never know.
989		1200
		1201	=item CBOR::XS can invoke almost arbitrary callbacks during decoding
		1202
990	Second, CBOR::XS supports object serialisation - decoding CBOR can cause	1203	CBOR::XS supports object serialisation - decoding CBOR can cause calls
991	calls to I<any> C<THAW> method in I<any> package that exists in your	1204	to I<any> C<THAW> method in I<any> package that exists in your process
992	process (that is, CBOR::XS will not try to load modules, but any existing	1205	(that is, CBOR::XS will not try to load modules, but any existing C<THAW>
993	C<THAW> method or function can be called, so they all have to be secure).	1206	method or function can be called, so they all have to be secure).
994		1207
		1208	Less obviously, it will also invoke C<TO_CBOR> and C<FREEZE> methods -
		1209	even if all your C<THAW> methods are secure, encoding data structures from
		1210	untrusted sources can invoke those and trigger bugs in those.
		1211
		1212	So, if you are not sure about the security of all the modules you
		1213	have loaded (you shouldn't), you should disable this part using
		1214	C<forbid_objects> or using C<new_safe>.
		1215
		1216	=item CBOR can be extended with tags that call library code
		1217
		1218	CBOR can be extended with tags, and C<CBOR::XS> has a registry of
		1219	conversion functions for many existing tags that can be extended via
		1220	third-party modules (see the C<filter> method).
		1221
		1222	If you don't trust these, you should configure the "safe" filter function,
		1223	C<CBOR::XS::safe_filter> (C<new_safe> does this), which by default only
		1224	includes conversion functions that are considered "safe" by the author
		1225	(but again, they can be extended by third party modules).
		1226
		1227	Depending on your level of paranoia, you can use the "safe" filter:
		1228
		1229	$cbor->filter (\&CBOR::XS::safe_filter);
		1230
		1231	... your own filter...
		1232
		1233	$cbor->filter (sub { ... do your stuffs here ... });
		1234
		1235	... or even no filter at all, disabling all tag decoding:
		1236
		1237	$cbor->filter (sub { });
		1238
		1239	This is never a problem for encoding, as the tag mechanism only exists in
		1240	CBOR texts.
		1241
		1242	=item Resource-starving attacks: object memory usage
		1243
995	Third, you need to avoid resource-starving attacks. That means you should	1244	You need to avoid resource-starving attacks. That means you should limit
996	limit the size of CBOR data you accept, or make sure then when your	1245	the size of CBOR data you accept, or make sure then when your resources
997	resources run out, that's just fine (e.g. by using a separate process that	1246	run out, that's just fine (e.g. by using a separate process that can
998	can crash safely). The size of a CBOR string in octets is usually a good	1247	crash safely). The size of a CBOR string in octets is usually a good
999	indication of the size of the resources required to decode it into a Perl	1248	indication of the size of the resources required to decode it into a Perl
1000	structure. While CBOR::XS can check the size of the CBOR text, it might be	1249	structure. While CBOR::XS can check the size of the CBOR text (using
1001	too late when you already have it in memory, so you might want to check	1250	C<max_size> - done by C<new_safe>), it might be too late when you already
1002	the size before you accept the string.	1251	have it in memory, so you might want to check the size before you accept
		1252	the string.
1003		1253
		1254	As for encoding, it is possible to construct data structures that are
		1255	relatively small but result in large CBOR texts (for example by having an
		1256	array full of references to the same big data structure, which will all be
		1257	deep-cloned during encoding by default). This is rarely an actual issue
		1258	(and the worst case is still just running out of memory), but you can
		1259	reduce this risk by using C<allow_sharing>.
		1260
		1261	=item Resource-starving attacks: stack overflows
		1262
1004	Fourth, CBOR::XS recurses using the C stack when decoding objects and	1263	CBOR::XS recurses using the C stack when decoding objects and arrays. The
1005	arrays. The C stack is a limited resource: for instance, on my amd64	1264	C stack is a limited resource: for instance, on my amd64 machine with 8MB
1006	machine with 8MB of stack size I can decode around 180k nested arrays but	1265	of stack size I can decode around 180k nested arrays but only 14k nested
1007	only 14k nested CBOR objects (due to perl itself recursing deeply on croak	1266	CBOR objects (due to perl itself recursing deeply on croak to free the
1008	to free the temporary). If that is exceeded, the program crashes. To be	1267	temporary). If that is exceeded, the program crashes. To be conservative,
1009	conservative, the default nesting limit is set to 512. If your process	1268	the default nesting limit is set to 512. If your process has a smaller
1010	has a smaller stack, you should adjust this setting accordingly with the	1269	stack, you should adjust this setting accordingly with the C<max_depth>
1011	C<max_depth> method.	1270	method.
		1271
		1272	=item Resource-starving attacks: CPU en-/decoding complexity
		1273
		1274	CBOR::XS will use the L<Math::BigInt>, L<Math::BigFloat> and
		1275	L<Math::BigRat> libraries to represent encode/decode bignums. These can be
		1276	very slow (as in, centuries of CPU time) and can even crash your program
		1277	(and are generally not very trustworthy). See the next section on bignum
		1278	security for details.
		1279
		1280	=item Data breaches: leaking information in error messages
		1281
		1282	CBOR::XS might leak contents of your Perl data structures in its error
		1283	messages, so when you serialise sensitive information you might want to
		1284	make sure that exceptions thrown by CBOR::XS will not end up in front of
		1285	untrusted eyes.
		1286
		1287	=item Something else...
1012		1288
1013	Something else could bomb you, too, that I forgot to think of. In that	1289	Something else could bomb you, too, that I forgot to think of. In that
1014	case, you get to keep the pieces. I am always open for hints, though...	1290	case, you get to keep the pieces. I am always open for hints, though...
1015		1291
1016	Also keep in mind that CBOR::XS might leak contents of your Perl data	1292	=back
1017	structures in its error messages, so when you serialise sensitive
1018	information you might want to make sure that exceptions thrown by CBOR::XS
1019	will not end up in front of untrusted eyes.
1020		1293
1021		1294
1022	=head1 BIGNUM SECURITY CONSIDERATIONS	1295	=head1 BIGNUM SECURITY CONSIDERATIONS
1023		1296
1024	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and	1297	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
…		…
1073	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT	1346	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
1074		1347
1075	On perls that were built without 64 bit integer support (these are rare	1348	On perls that were built without 64 bit integer support (these are rare
1076	nowadays, even on 32 bit architectures, as all major Perl distributions	1349	nowadays, even on 32 bit architectures, as all major Perl distributions
1077	are built with 64 bit integer support), support for any kind of 64 bit	1350	are built with 64 bit integer support), support for any kind of 64 bit
1078	integer in CBOR is very limited - most likely, these 64 bit values will	1351	value in CBOR is very limited - most likely, these 64 bit values will
1079	be truncated, corrupted, or otherwise not decoded correctly. This also	1352	be truncated, corrupted, or otherwise not decoded correctly. This also
1080	includes string, array and map sizes that are stored as 64 bit integers.	1353	includes string, float, array and map sizes that are stored as 64 bit
		1354	integers.
1081		1355
1082		1356
1083	=head1 THREADS	1357	=head1 THREADS
1084		1358
1085	This module is I<not> guaranteed to be thread safe and there are no	1359	This module is I<not> guaranteed to be thread safe and there are no
…		…
1098		1372
1099	Please refrain from using rt.cpan.org or any other bug reporting	1373	Please refrain from using rt.cpan.org or any other bug reporting
1100	service. I put the contact address into my modules for a reason.	1374	service. I put the contact address into my modules for a reason.
1101		1375
1102	=cut	1376	=cut
		1377
		1378	# clumsy and slow hv_store-in-hash helper function
		1379	sub _hv_store {
		1380	$_[0]{$_[1]} = $_[2];
		1381	}
1103		1382
1104	our %FILTER = (	1383	our %FILTER = (
1105	0 => sub { # rfc4287 datetime, utf-8	1384	0 => sub { # rfc4287 datetime, utf-8
1106	require Time::Piece;	1385	require Time::Piece;
1107	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"	1386	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
…		…
1180	# 34 # base64 rfc46484, utf-8	1459	# 34 # base64 rfc46484, utf-8
1181	# 35 # regex pcre/ecma262, utf-8	1460	# 35 # regex pcre/ecma262, utf-8
1182	# 36 # mime message rfc2045, utf-8	1461	# 36 # mime message rfc2045, utf-8
1183	);	1462	);
1184		1463
1185	sub CBOR::XS::default_filter {	1464	sub default_filter {
1186	&{ $FILTER{$_[0]} or return }	1465	&{ $FILTER{$_[0]} or return }
		1466	}
		1467
		1468	our %SAFE_FILTER = map { $_ => $FILTER{$_} } 0, 1, 21, 22, 23, 32;
		1469
		1470	sub safe_filter {
		1471	&{ $SAFE_FILTER{$_[0]} or return }
1187	}	1472	}
1188		1473
1189	sub URI::TO_CBOR {	1474	sub URI::TO_CBOR {
1190	my $uri = $_[0]->as_string;	1475	my $uri = $_[0]->as_string;
1191	utf8::upgrade $uri;	1476	utf8::upgrade $uri;

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.63 by root, Fri Nov 25 12:16:12 2016 UTC vs. Revision 1.90 by root, Sat Nov 18 18:19:57 2023 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.63 by root, Fri Nov 25 12:16:12 2016 UTC vs.
Revision 1.90 by root, Sat Nov 18 18:19:57 2023 UTC