[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.1 by root, Fri Oct 25 23:09:45 2013 UTC vs.
Revision 1.70 by root, Sat Nov 9 07:30:36 2019 UTC

…		…
12	$perl_value = decode_cbor $binary_cbor_data;	12	$perl_value = decode_cbor $binary_cbor_data;
13		13
14	# OO-interface	14	# OO-interface
15		15
16	$coder = CBOR::XS->new;	16	$coder = CBOR::XS->new;
17	#TODO	17	$binary_cbor_data = $coder->encode ($perl_value);
		18	$perl_value = $coder->decode ($binary_cbor_data);
		19
		20	# prefix decoding
		21
		22	my $many_cbor_strings = ...;
		23	while (length $many_cbor_strings) {
		24	my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
		25	# data was decoded
		26	substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
		27	}
18		28
19	=head1 DESCRIPTION	29	=head1 DESCRIPTION
20		30
21	WARNING! THIS IS A PRE-ALPHA RELEASE! IT WILL CRASH, CORRUPT YOUR DATA AND	31	This module converts Perl data structures to the Concise Binary Object
22	EAT YOUR CHILDREN!	32	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
		33	format that aims to use an (almost) superset of the JSON data model, i.e.
		34	when you can represent something useful in JSON, you should be able to
		35	represent it in CBOR.
23		36
24	This module converts Perl data structures to CBOR and vice versa. Its	37	In short, CBOR is a faster and quite compact binary alternative to JSON,
		38	with the added ability of supporting serialisation of Perl objects. (JSON
		39	often compresses better than CBOR though, so if you plan to compress the
		40	data later and speed is less important you might want to compare both
		41	formats first).
		42
25	primary goal is to be I<correct> and its secondary goal is to be	43	The primary goal of this module is to be I<correct> and the secondary goal
26	I<fast>. To reach the latter goal it was written in C.	44	is to be I<fast>. To reach the latter goal it was written in C.
		45
		46	To give you a general idea about speed, with texts in the megabyte range,
		47	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
		48	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
		49	data, the worse L<Storable> performs in comparison.
		50
		51	Regarding compactness, C<CBOR::XS>-encoded data structures are usually
		52	about 20% smaller than the same data encoded as (compact) JSON or
		53	L<Storable>.
		54
		55	In addition to the core CBOR data format, this module implements a
		56	number of extensions, to support cyclic and shared data structures
		57	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
		58	C<pack_strings>) and scalar references (always enabled).
27		59
28	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
29	vice versa.	61	vice versa.
30		62
31	=cut	63	=cut
32		64
33	package CBOR::XS;	65	package CBOR::XS;
34		66
35	use common::sense;	67	use common::sense;
36		68
37	our $VERSION = 0.01;	69	our $VERSION = 1.71;
38	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
39		71
40	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
41		73
42	use Exporter;	74	use Exporter;
43	use XSLoader;	75	use XSLoader;
44		76
		77	use Types::Serialiser;
		78
		79	our $MAGIC = "\xd9\xd9\xf7";
		80
45	=head1 FUNCTIONAL INTERFACE	81	=head1 FUNCTIONAL INTERFACE
46		82
47	The following convenience methods are provided by this module. They are	83	The following convenience methods are provided by this module. They are
48	exported by default:	84	exported by default:
49		85
…		…
75	strings. All boolean flags described below are by default I<disabled>.	111	strings. All boolean flags described below are by default I<disabled>.
76		112
77	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
78	be chained:	114	be chained:
79		115
80	#TODO
81	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
		117
		118	=item $cbor = new_safe CBOR::XS
		119
		120	Create a new, safe/secure CBOR::XS object. This is similar to C<new>,
		121	but configures the coder object to be safe to use with untrusted
		122	data. Currently, this is equivalent to:
		123
		124	my $cbor = CBOR::XS
		125	->new
		126	->forbid_objects
		127	->filter (\&CBOR::XS::safe_filter)
		128	->max_size (1e8);
		129
		130	But is more future proof (it is better to crash because of a change than
		131	to be exploited in other ways).
		132
		133	=cut
		134
		135	sub new_safe {
		136	CBOR::XS
		137	->new
		138	->forbid_objects
		139	->filter (\&CBOR::XS::safe_filter)
		140	->max_size (1e8)
		141	}
82		142
83	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	143	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
84		144
85	=item $max_depth = $cbor->get_max_depth	145	=item $max_depth = $cbor->get_max_depth
86		146
…		…
102		162
103	Note that nesting is implemented by recursion in C. The default value has	163	Note that nesting is implemented by recursion in C. The default value has
104	been chosen to be as large as typical operating systems allow without	164	been chosen to be as large as typical operating systems allow without
105	crashing.	165	crashing.
106		166
107	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	167	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
108		168
109	=item $cbor = $cbor->max_size ([$maximum_string_size])	169	=item $cbor = $cbor->max_size ([$maximum_string_size])
110		170
111	=item $max_size = $cbor->get_max_size	171	=item $max_size = $cbor->get_max_size
112		172
…		…
117	effect on C<encode> (yet).	177	effect on C<encode> (yet).
118		178
119	If no argument is given, the limit check will be deactivated (same as when	179	If no argument is given, the limit check will be deactivated (same as when
120	C<0> is specified).	180	C<0> is specified).
121		181
122	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	182	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
		183
		184	=item $cbor = $cbor->allow_unknown ([$enable])
		185
		186	=item $enabled = $cbor->get_allow_unknown
		187
		188	If C<$enable> is true (or missing), then C<encode> will I<not> throw an
		189	exception when it encounters values it cannot represent in CBOR (for
		190	example, filehandles) but instead will encode a CBOR C<error> value.
		191
		192	If C<$enable> is false (the default), then C<encode> will throw an
		193	exception when it encounters anything it cannot encode as CBOR.
		194
		195	This option does not affect C<decode> in any way, and it is recommended to
		196	leave it off unless you know your communications partner.
		197
		198	=item $cbor = $cbor->allow_sharing ([$enable])
		199
		200	=item $enabled = $cbor->get_allow_sharing
		201
		202	If C<$enable> is true (or missing), then C<encode> will not double-encode
		203	values that have been referenced before (e.g. when the same object, such
		204	as an array, is referenced multiple times), but instead will emit a
		205	reference to the earlier value.
		206
		207	This means that such values will only be encoded once, and will not result
		208	in a deep cloning of the value on decode, in decoders supporting the value
		209	sharing extension. This also makes it possible to encode cyclic data
		210	structures (which need C<allow_cycles> to be enabled to be decoded by this
		211	module).
		212
		213	It is recommended to leave it off unless you know your
		214	communication partner supports the value sharing extensions to CBOR
		215	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
		216	resulting data structure might be unusable.
		217
		218	Detecting shared values incurs a runtime overhead when values are encoded
		219	that have a reference counter large than one, and might unnecessarily
		220	increase the encoded size, as potentially shared values are encoded as
		221	shareable whether or not they are actually shared.
		222
		223	At the moment, only targets of references can be shared (e.g. scalars,
		224	arrays or hashes pointed to by a reference). Weirder constructs, such as
		225	an array with multiple "copies" of the I<same> string, which are hard but
		226	not impossible to create in Perl, are not supported (this is the same as
		227	with L<Storable>).
		228
		229	If C<$enable> is false (the default), then C<encode> will encode shared
		230	data structures repeatedly, unsharing them in the process. Cyclic data
		231	structures cannot be encoded in this mode.
		232
		233	This option does not affect C<decode> in any way - shared values and
		234	references will always be decoded properly if present.
		235
		236	=item $cbor = $cbor->allow_cycles ([$enable])
		237
		238	=item $enabled = $cbor->get_allow_cycles
		239
		240	If C<$enable> is true (or missing), then C<decode> will happily decode
		241	self-referential (cyclic) data structures. By default these will not be
		242	decoded, as they need manual cleanup to avoid memory leaks, so code that
		243	isn't prepared for this will not leak memory.
		244
		245	If C<$enable> is false (the default), then C<decode> will throw an error
		246	when it encounters a self-referential/cyclic data structure.
		247
		248	FUTURE DIRECTION: the motivation behind this option is to avoid I<real>
		249	cycles - future versions of this module might chose to decode cyclic data
		250	structures using weak references when this option is off, instead of
		251	throwing an error.
		252
		253	This option does not affect C<encode> in any way - shared values and
		254	references will always be encoded properly if present.
		255
		256	=item $cbor = $cbor->forbid_objects ([$enable])
		257
		258	=item $enabled = $cbor->get_forbid_objects
		259
		260	Disables the use of the object serialiser protocol.
		261
		262	If C<$enable> is true (or missing), then C<encode> will will throw an
		263	exception when it encounters perl objects that would be encoded using the
		264	perl-object tag (26). When C<decode> encounters such tags, it will fall
		265	back to the general filter/tagged logic as if this were an unknown tag (by
		266	default resulting in a C<CBOR::XC::Tagged> object).
		267
		268	If C<$enable> is false (the default), then C<encode> will use the
		269	L<Types::Serialiser> object serialisation protocol to serialise objects
		270	into perl-object tags, and C<decode> will do the same to decode such tags.
		271
		272	See L<SECURITY CONSIDERATIONS>, below, for more info on why forbidding this
		273	protocol can be useful.
		274
		275	=item $cbor = $cbor->pack_strings ([$enable])
		276
		277	=item $enabled = $cbor->get_pack_strings
		278
		279	If C<$enable> is true (or missing), then C<encode> will try not to encode
		280	the same string twice, but will instead encode a reference to the string
		281	instead. Depending on your data format, this can save a lot of space, but
		282	also results in a very large runtime overhead (expect encoding times to be
		283	2-4 times as high as without).
		284
		285	It is recommended to leave it off unless you know your
		286	communications partner supports the stringref extension to CBOR
		287	(L<http://cbor.schmorp.de/stringref>), as without decoder support, the
		288	resulting data structure might not be usable.
		289
		290	If C<$enable> is false (the default), then C<encode> will encode strings
		291	the standard CBOR way.
		292
		293	This option does not affect C<decode> in any way - string references will
		294	always be decoded properly if present.
		295
		296	=item $cbor = $cbor->text_keys ([$enable])
		297
		298	=item $enabled = $cbor->get_text_keys
		299
		300	If C<$enabled> is true (or missing), then C<encode> will encode all
		301	perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
		302
		303	If C<$enable> is false (the default), then C<encode> will encode hash keys
		304	normally - upgraded perl strings (strings internally encoded as UTF-8) as
		305	CBOR text strings, and downgraded perl strings as CBOR byte strings.
		306
		307	This option does not affect C<decode> in any way.
		308
		309	This option is useful for interoperability with CBOR decoders that don't
		310	treat byte strings as a form of text. It is especially useful as Perl
		311	gives very little control over hash keys.
		312
		313	Enabling this option can be slow, as all downgraded hash keys that are
		314	encoded need to be scanned and converted to UTF-8.
		315
		316	=item $cbor = $cbor->text_strings ([$enable])
		317
		318	=item $enabled = $cbor->get_text_strings
		319
		320	This option works similar to C<text_keys>, above, but works on all strings
		321	(including hash keys), so C<text_keys> has no further effect after
		322	enabling C<text_strings>.
		323
		324	If C<$enabled> is true (or missing), then C<encode> will encode all perl
		325	strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
		326
		327	If C<$enable> is false (the default), then C<encode> will encode strings
		328	normally (but see C<text_keys>) - upgraded perl strings (strings
		329	internally encoded as UTF-8) as CBOR text strings, and downgraded perl
		330	strings as CBOR byte strings.
		331
		332	This option does not affect C<decode> in any way.
		333
		334	This option has similar advantages and disadvantages as C<text_keys>. In
		335	addition, this option effectively removes the ability to encode byte
		336	strings, which might break some C<FREEZE> and C<TO_CBOR> methods that rely
		337	on this, such as bignum encoding, so this option is mainly useful for very
		338	simple data.
		339
		340	=item $cbor = $cbor->validate_utf8 ([$enable])
		341
		342	=item $enabled = $cbor->get_validate_utf8
		343
		344	If C<$enable> is true (or missing), then C<decode> will validate that
		345	elements (text strings) containing UTF-8 data in fact contain valid UTF-8
		346	data (instead of blindly accepting it). This validation obviously takes
		347	extra time during decoding.
		348
		349	The concept of "valid UTF-8" used is perl's concept, which is a superset
		350	of the official UTF-8.
		351
		352	If C<$enable> is false (the default), then C<decode> will blindly accept
		353	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
		354	regardless of whether that's true or not.
		355
		356	Perl isn't too happy about corrupted UTF-8 in strings, but should
		357	generally not crash or do similarly evil things. Extensions might be not
		358	so forgiving, so it's recommended to turn on this setting if you receive
		359	untrusted CBOR.
		360
		361	This option does not affect C<encode> in any way - strings that are
		362	supposedly valid UTF-8 will simply be dumped into the resulting CBOR
		363	string without checking whether that is, in fact, true or not.
		364
		365	=item $cbor = $cbor->filter ([$cb->($tag, $value)])
		366
		367	=item $cb_or_undef = $cbor->get_filter
		368
		369	Sets or replaces the tagged value decoding filter (when C<$cb> is
		370	specified) or clears the filter (if no argument or C<undef> is provided).
		371
		372	The filter callback is called only during decoding, when a non-enforced
		373	tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
		374	list of enforced tags). For specific tags, it's often better to provide a
		375	default converter using the C<%CBOR::XS::FILTER> hash (see below).
		376
		377	The first argument is the numerical tag, the second is the (decoded) value
		378	that has been tagged.
		379
		380	The filter function should return either exactly one value, which will
		381	replace the tagged value in the decoded data structure, or no values,
		382	which will result in default handling, which currently means the decoder
		383	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
		384
		385	When the filter is cleared (the default state), the default filter
		386	function, C<CBOR::XS::default_filter>, is used. This function simply
		387	looks up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists
		388	it must be a code reference that is called with tag and value, and is
		389	responsible for decoding the value. If no entry exists, it returns no
		390	values. C<CBOR::XS> provides a number of default filter functions already,
		391	the the C<%CBOR::XS::FILTER> hash can be freely extended with more.
		392
		393	C<CBOR::XS> additionally provides an alternative filter function that is
		394	supposed to be safe to use with untrusted data (which the default filter
		395	might not), called C<CBOR::XS::safe_filter>, which works the same as
		396	the C<default_filter> but uses the C<%CBOR::XS::SAFE_FILTER> variable
		397	instead. It is prepopulated with the tag decoding functions that are
		398	deemed safe (basically the same as C<%CBOR::XS::FILTER> without all
		399	the bignum tags), and can be extended by user code as wlel, although,
		400	obviously, one should be very careful about adding decoding functions
		401	here, since the expectation is that they are safe to use on untrusted
		402	data, after all.
		403
		404	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
		405	objects, with no other special handling (useful when working with
		406	potentially "unsafe" CBOR data).
		407
		408	CBOR::XS->new->filter (sub { })->decode ($cbor_data);
		409
		410	Example: provide a global filter for tag 1347375694, converting the value
		411	into some string form.
		412
		413	$CBOR::XS::FILTER{1347375694} = sub {
		414	my ($tag, $value);
		415
		416	"tag 1347375694 value $value"
		417	};
		418
		419	Example: provide your own filter function that looks up tags in your own
		420	hash:
		421
		422	my %my_filter = (
		423	998347484 => sub {
		424	my ($tag, $value);
		425
		426	"tag 998347484 value $value"
		427	};
		428	);
		429
		430	my $coder = CBOR::XS->new->filter (sub {
		431	&{ $my_filter{$_[0]} or return }
		432	});
		433
		434
		435	Example: use the safe filter function (see L<SECURITY CONSIDERATIONS> for
		436	more considerations on security).
		437
		438	CBOR::XS->new->filter (\&CBOR::XS::safe_filter)->decode ($cbor_data);
123		439
124	=item $cbor_data = $cbor->encode ($perl_scalar)	440	=item $cbor_data = $cbor->encode ($perl_scalar)
125		441
126	Converts the given Perl data structure (a scalar value) to its CBOR	442	Converts the given Perl data structure (a scalar value) to its CBOR
127	representation.	443	representation.
…		…
137	when there is trailing garbage after the CBOR string, it will silently	453	when there is trailing garbage after the CBOR string, it will silently
138	stop parsing there and return the number of characters consumed so far.	454	stop parsing there and return the number of characters consumed so far.
139		455
140	This is useful if your CBOR texts are not delimited by an outer protocol	456	This is useful if your CBOR texts are not delimited by an outer protocol
141	and you need to know where the first CBOR string ends amd the next one	457	and you need to know where the first CBOR string ends amd the next one
142	starts.	458	starts - CBOR strings are self-delimited, so it is possible to concatenate
		459	CBOR strings without any delimiters or size fields and recover their data.
143		460
144	CBOR::XS->new->decode_prefix ("......")	461	CBOR::XS->new->decode_prefix ("......")
145	=> ("...", 3)	462	=> ("...", 3)
		463
		464	=back
		465
		466	=head2 INCREMENTAL PARSING
		467
		468	In some cases, there is the need for incremental parsing of JSON
		469	texts. While this module always has to keep both CBOR text and resulting
		470	Perl data structure in memory at one time, it does allow you to parse a
		471	CBOR stream incrementally, using a similar to using "decode_prefix" to see
		472	if a full CBOR object is available, but is much more efficient.
		473
		474	It basically works by parsing as much of a CBOR string as possible - if
		475	the CBOR data is not complete yet, the pasrer will remember where it was,
		476	to be able to restart when more data has been accumulated. Once enough
		477	data is available to either decode a complete CBOR value or raise an
		478	error, a real decode will be attempted.
		479
		480	A typical use case would be a network protocol that consists of sending
		481	and receiving CBOR-encoded messages. The solution that works with CBOR and
		482	about anything else is by prepending a length to every CBOR value, so the
		483	receiver knows how many octets to read. More compact (and slightly slower)
		484	would be to just send CBOR values back-to-back, as C<CBOR::XS> knows where
		485	a CBOR value ends, and doesn't need an explicit length.
		486
		487	The following methods help with this:
		488
		489	=over 4
		490
		491	=item @decoded = $cbor->incr_parse ($buffer)
		492
		493	This method attempts to decode exactly one CBOR value from the beginning
		494	of the given C<$buffer>. The value is removed from the C<$buffer> on
		495	success. When C<$buffer> doesn't contain a complete value yet, it returns
		496	nothing. Finally, when the C<$buffer> doesn't start with something
		497	that could ever be a valid CBOR value, it raises an exception, just as
		498	C<decode> would. In the latter case the decoder state is undefined and
		499	must be reset before being able to parse further.
		500
		501	This method modifies the C<$buffer> in place. When no CBOR value can be
		502	decoded, the decoder stores the current string offset. On the next call,
		503	continues decoding at the place where it stopped before. For this to make
		504	sense, the C<$buffer> must begin with the same octets as on previous
		505	unsuccessful calls.
		506
		507	You can call this method in scalar context, in which case it either
		508	returns a decoded value or C<undef>. This makes it impossible to
		509	distinguish between CBOR null values (which decode to C<undef>) and an
		510	unsuccessful decode, which is often acceptable.
		511
		512	=item @decoded = $cbor->incr_parse_multiple ($buffer)
		513
		514	Same as C<incr_parse>, but attempts to decode as many CBOR values as
		515	possible in one go, instead of at most one. Calls to C<incr_parse> and
		516	C<incr_parse_multiple> can be interleaved.
		517
		518	=item $cbor->incr_reset
		519
		520	Resets the incremental decoder. This throws away any saved state, so that
		521	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
		522	a new CBOR value from the beginning of the C<$buffer> again.
		523
		524	This method can be called at any time, but it I<must> be called if you want
		525	to change your C<$buffer> or there was a decoding error and you want to
		526	reuse the C<$cbor> object for future incremental parsings.
146		527
147	=back	528	=back
148		529
149		530
150	=head1 MAPPING	531	=head1 MAPPING
…		…
161		542
162	=head2 CBOR -> PERL	543	=head2 CBOR -> PERL
163		544
164	=over 4	545	=over 4
165		546
166	=item True, False	547	=item integers
167		548
168	These CBOR values become C<CBOR::XS::true> and C<CBOR::XS::false>,	549	CBOR integers become (numeric) perl scalars. On perls without 64 bit
		550	support, 64 bit integers will be truncated or otherwise corrupted.
		551
		552	=item byte strings
		553
		554	Byte strings will become octet strings in Perl (the Byte values 0..255
		555	will simply become characters of the same value in Perl).
		556
		557	=item UTF-8 strings
		558
		559	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
		560	decoded into proper Unicode code points. At the moment, the validity of
		561	the UTF-8 octets will not be validated - corrupt input will result in
		562	corrupted Perl strings.
		563
		564	=item arrays, maps
		565
		566	CBOR arrays and CBOR maps will be converted into references to a Perl
		567	array or hash, respectively. The keys of the map will be stringified
		568	during this process.
		569
		570	=item null
		571
		572	CBOR null becomes C<undef> in Perl.
		573
		574	=item true, false, undefined
		575
		576	These CBOR values become C<Types:Serialiser::true>,
		577	C<Types:Serialiser::false> and C<Types::Serialiser::error>,
169	respectively. They are overloaded to act almost exactly like the numbers	578	respectively. They are overloaded to act almost exactly like the numbers
170	C<1> and C<0>. You can check whether a scalar is a CBOR boolean by using	579	C<1> and C<0> (for true and false) or to throw an exception on access (for
171	the C<CBOR::XS::is_bool> function.	580	error). See the L<Types::Serialiser> manpage for details.
172		581
173	=item null	582	=item tagged values
174		583
175	A CBOR Null value becomes C<undef> in Perl.	584	Tagged items consists of a numeric tag and another CBOR value.
		585
		586	See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
		587	for details on which tags are handled how.
		588
		589	=item anything else
		590
		591	Anything else (e.g. unsupported simple values) will raise a decoding
		592	error.
176		593
177	=back	594	=back
178		595
179		596
180	=head2 PERL -> CBOR	597	=head2 PERL -> CBOR
181		598
182	The mapping from Perl to CBOR is slightly more difficult, as Perl is a	599	The mapping from Perl to CBOR is slightly more difficult, as Perl is a
183	truly typeless language, so we can only guess which CBOR type is meant by	600	typeless language. That means this module can only guess which CBOR type
184	a Perl value.	601	is meant by a perl value.
185		602
186	=over 4	603	=over 4
187		604
188	=item hash references	605	=item hash references
189		606
190	Perl hash references become CBOR maps. As there is no inherent ordering	607	Perl hash references become CBOR maps. As there is no inherent ordering in
191	in hash keys (or CBOR maps), they will usually be encoded in a	608	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
192	pseudo-random order.	609	order. This order can be different each time a hash is encoded.
		610
		611	Currently, tied hashes will use the indefinite-length format, while normal
		612	hashes will use the fixed-length format.
193		613
194	=item array references	614	=item array references
195		615
196	Perl array references become CBOR arrays.	616	Perl array references become fixed-length CBOR arrays.
197		617
198	=item other references	618	=item other references
199		619
200	Other unblessed references are generally not allowed and will cause an	620	Other unblessed references will be represented using
201	exception to be thrown, except for references to the integers C<0> and	621	the indirection tag extension (tag value C<22098>,
202	C<1>, which get turned into C<False> and C<True> in CBOR.	622	L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
		623	to be able to decode these values somehow, by either "doing the right
		624	thing", decoding into a generic tagged object, simply ignoring the tag, or
		625	something else.
203		626
204	=item CBOR::XS::true, CBOR::XS::false	627	=item CBOR::XS::Tagged objects
205		628
		629	Objects of this type must be arrays consisting of a single C<[tag, value]>
		630	pair. The (numerical) tag will be encoded as a CBOR tag, the value will
		631	be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
		632	create such objects.
		633
		634	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
		635
206	These special values become CBOR True and CBOR False values,	636	These special values become CBOR true, CBOR false and CBOR undefined
207	respectively. You can also use C<\1> and C<\0> directly if you want.	637	values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
		638	if you want.
208		639
209	=item blessed objects	640	=item other blessed objects
210		641
211	Blessed objects are not directly representable in CBOR. TODO	642	Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
212	See the	643	L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
213	C<allow_blessed> and C<convert_blessed> methods on various options on	644	module, and L<OBJECT SERIALISATION> for generic object serialisation.
214	how to deal with this: basically, you can choose between throwing an
215	exception, encoding the reference as if it weren't blessed, or provide
216	your own serialiser method.
217		645
218	=item simple scalars	646	=item simple scalars
219		647
220	TODO
221	Simple Perl scalars (any scalar that is not a reference) are the most	648	Simple Perl scalars (any scalar that is not a reference) are the most
222	difficult objects to encode: CBOR::XS will encode undefined scalars as	649	difficult objects to encode: CBOR::XS will encode undefined scalars as
223	CBOR C<Null> values, scalars that have last been used in a string context	650	CBOR null values, scalars that have last been used in a string context
224	before encoding as CBOR strings, and anything else as number value:	651	before encoding as CBOR strings, and anything else as number value:
225		652
226	# dump as number	653	# dump as number
227	encode_cbor [2] # yields [2]	654	encode_cbor [2] # yields [2]
228	encode_cbor [-3.0e17] # yields [-3e+17]	655	encode_cbor [-3.0e17] # yields [-3e+17]
229	my $value = 5; encode_cbor [$value] # yields [5]	656	my $value = 5; encode_cbor [$value] # yields [5]
230		657
231	# used as string, so dump as string	658	# used as string, so dump as string (either byte or text)
232	print $value;	659	print $value;
233	encode_cbor [$value] # yields ["5"]	660	encode_cbor [$value] # yields ["5"]
234		661
235	# undef becomes null	662	# undef becomes null
236	encode_cbor [undef] # yields [null]	663	encode_cbor [undef] # yields [null]
…		…
240	my $x = 3.1; # some variable containing a number	667	my $x = 3.1; # some variable containing a number
241	"$x"; # stringified	668	"$x"; # stringified
242	$x .= ""; # another, more awkward way to stringify	669	$x .= ""; # another, more awkward way to stringify
243	print $x; # perl does it for you, too, quite often	670	print $x; # perl does it for you, too, quite often
244		671
		672	You can force whether a string is encoded as byte or text string by using
		673	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled):
		674
		675	utf8::upgrade $x; # encode $x as text string
		676	utf8::downgrade $x; # encode $x as byte string
		677
		678	Perl doesn't define what operations up- and downgrade strings, so if the
		679	difference between byte and text is important, you should up- or downgrade
		680	your string as late as possible before encoding. You can also force the
		681	use of CBOR text strings by using C<text_keys> or C<text_strings>.
		682
245	You can force the type to be a CBOR number by numifying it:	683	You can force the type to be a CBOR number by numifying it:
246		684
247	my $x = "3"; # some variable containing a string	685	my $x = "3"; # some variable containing a string
248	$x += 0; # numify it, ensuring it will be dumped as a number	686	$x += 0; # numify it, ensuring it will be dumped as a number
249	$x *= 1; # same thing, the choice is yours.	687	$x *= 1; # same thing, the choice is yours.
250		688
251	You can not currently force the type in other, less obscure, ways. Tell me	689	You can not currently force the type in other, less obscure, ways. Tell me
252	if you need this capability (but don't forget to explain why it's needed	690	if you need this capability (but don't forget to explain why it's needed
253	:).	691	:).
254		692
255	Note that numerical precision has the same meaning as under Perl (so	693	Perl values that seem to be integers generally use the shortest possible
256	binary to decimal conversion follows the same rules as in Perl, which	694	representation. Floating-point values will use either the IEEE single
257	can differ to other languages). Also, your perl interpreter might expose	695	format if possible without loss of precision, otherwise the IEEE double
258	extensions to the floating point numbers of your platform, such as	696	format will be used. Perls that use formats other than IEEE double to
259	infinities or NaN's - these cannot be represented in CBOR, and it is an	697	represent numerical values are supported, but might suffer loss of
260	error to pass those in.	698	precision.
261		699
262	=back	700	=back
263		701
		702	=head2 OBJECT SERIALISATION
264		703
		704	This module implements both a CBOR-specific and the generic
		705	L<Types::Serialier> object serialisation protocol. The following
		706	subsections explain both methods.
		707
		708	=head3 ENCODING
		709
		710	This module knows two way to serialise a Perl object: The CBOR-specific
		711	way, and the generic way.
		712
		713	Whenever the encoder encounters a Perl object that it cannot serialise
		714	directly (most of them), it will first look up the C<TO_CBOR> method on
		715	it.
		716
		717	If it has a C<TO_CBOR> method, it will call it with the object as only
		718	argument, and expects exactly one return value, which it will then
		719	substitute and encode it in the place of the object.
		720
		721	Otherwise, it will look up the C<FREEZE> method. If it exists, it will
		722	call it with the object as first argument, and the constant string C<CBOR>
		723	as the second argument, to distinguish it from other serialisers.
		724
		725	The C<FREEZE> method can return any number of values (i.e. zero or
		726	more). These will be encoded as CBOR perl object, together with the
		727	classname.
		728
		729	These methods I<MUST NOT> change the data structure that is being
		730	serialised. Failure to comply to this can result in memory corruption -
		731	and worse.
		732
		733	If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
		734	with an error.
		735
		736	=head3 DECODING
		737
		738	Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
		739	but objects encoded via C<FREEZE> can be decoded using the following
		740	protocol:
		741
		742	When an encoded CBOR perl object is encountered by the decoder, it will
		743	look up the C<THAW> method, by using the stored classname, and will fail
		744	if the method cannot be found.
		745
		746	After the lookup it will call the C<THAW> method with the stored classname
		747	as first argument, the constant string C<CBOR> as second argument, and all
		748	values returned by C<FREEZE> as remaining arguments.
		749
		750	=head3 EXAMPLES
		751
		752	Here is an example C<TO_CBOR> method:
		753
		754	sub My::Object::TO_CBOR {
		755	my ($obj) = @_;
		756
		757	["this is a serialised My::Object object", $obj->{id}]
		758	}
		759
		760	When a C<My::Object> is encoded to CBOR, it will instead encode a simple
		761	array with two members: a string, and the "object id". Decoding this CBOR
		762	string will yield a normal perl array reference in place of the object.
		763
		764	A more useful and practical example would be a serialisation method for
		765	the URI module. CBOR has a custom tag value for URIs, namely 32:
		766
		767	sub URI::TO_CBOR {
		768	my ($self) = @_;
		769	my $uri = "$self"; # stringify uri
		770	utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
		771	CBOR::XS::tag 32, "$_[0]"
		772	}
		773
		774	This will encode URIs as a UTF-8 string with tag 32, which indicates an
		775	URI.
		776
		777	Decoding such an URI will not (currently) give you an URI object, but
		778	instead a CBOR::XS::Tagged object with tag number 32 and the string -
		779	exactly what was returned by C<TO_CBOR>.
		780
		781	To serialise an object so it can automatically be deserialised, you need
		782	to use C<FREEZE> and C<THAW>. To take the URI module as example, this
		783	would be a possible implementation:
		784
		785	sub URI::FREEZE {
		786	my ($self, $serialiser) = @_;
		787	"$self" # encode url string
		788	}
		789
		790	sub URI::THAW {
		791	my ($class, $serialiser, $uri) = @_;
		792	$class->new ($uri)
		793	}
		794
		795	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
		796	example, a C<FREEZE> method that returns "type", "id" and "variant" values
		797	would cause an invocation of C<THAW> with 5 arguments:
		798
		799	sub My::Object::FREEZE {
		800	my ($self, $serialiser) = @_;
		801
		802	($self->{type}, $self->{id}, $self->{variant})
		803	}
		804
		805	sub My::Object::THAW {
		806	my ($class, $serialiser, $type, $id, $variant) = @_;
		807
		808	$class-<new (type => $type, id => $id, variant => $variant)
		809	}
		810
		811
		812	=head1 MAGIC HEADER
		813
		814	There is no way to distinguish CBOR from other formats
		815	programmatically. To make it easier to distinguish CBOR from other
		816	formats, the CBOR specification has a special "magic string" that can be
		817	prepended to any CBOR string without changing its meaning.
		818
		819	This string is available as C<$CBOR::XS::MAGIC>. This module does not
		820	prepend this string to the CBOR data it generates, but it will ignore it
		821	if present, so users can prepend this string as a "file type" indicator as
		822	required.
		823
		824
		825	=head1 THE CBOR::XS::Tagged CLASS
		826
		827	CBOR has the concept of tagged values - any CBOR value can be tagged with
		828	a numeric 64 bit number, which are centrally administered.
		829
		830	C<CBOR::XS> handles a few tags internally when en- or decoding. You can
		831	also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
		832	decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
		833	unknown tag.
		834
		835	These objects are simply blessed array references - the first member of
		836	the array being the numerical tag, the second being the value.
		837
		838	You can interact with C<CBOR::XS::Tagged> objects in the following ways:
		839
		840	=over 4
		841
		842	=item $tagged = CBOR::XS::tag $tag, $value
		843
		844	This function(!) creates a new C<CBOR::XS::Tagged> object using the given
		845	C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
		846	value that can be encoded in CBOR, including serialisable Perl objects and
		847	C<CBOR::XS::Tagged> objects).
		848
		849	=item $tagged->[0]
		850
		851	=item $tagged->[0] = $new_tag
		852
		853	=item $tag = $tagged->tag
		854
		855	=item $new_tag = $tagged->tag ($new_tag)
		856
		857	Access/mutate the tag.
		858
		859	=item $tagged->[1]
		860
		861	=item $tagged->[1] = $new_value
		862
		863	=item $value = $tagged->value
		864
		865	=item $new_value = $tagged->value ($new_value)
		866
		867	Access/mutate the tagged value.
		868
		869	=back
		870
		871	=cut
		872
		873	sub tag($$) {
		874	bless [@_], CBOR::XS::Tagged::;
		875	}
		876
		877	sub CBOR::XS::Tagged::tag {
		878	$_[0][0] = $_[1] if $#_;
		879	$_[0][0]
		880	}
		881
		882	sub CBOR::XS::Tagged::value {
		883	$_[0][1] = $_[1] if $#_;
		884	$_[0][1]
		885	}
		886
		887	=head2 EXAMPLES
		888
		889	Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
		890
		891	You can look up CBOR tag value and emanings in the IANA registry at
		892	L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
		893
		894	Prepend a magic header (C<$CBOR::XS::MAGIC>):
		895
		896	my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
		897	# same as:
		898	my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
		899
		900	Serialise some URIs and a regex in an array:
		901
		902	my $cbor = encode_cbor [
		903	(CBOR::XS::tag 32, "http://www.nethype.de/"),
		904	(CBOR::XS::tag 32, "http://software.schmorp.de/"),
		905	(CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
		906	];
		907
		908	Wrap CBOR data in CBOR:
		909
		910	my $cbor_cbor = encode_cbor
		911	CBOR::XS::tag 24,
		912	encode_cbor [1, 2, 3];
		913
		914	=head1 TAG HANDLING AND EXTENSIONS
		915
		916	This section describes how this module handles specific tagged values
		917	and extensions. If a tag is not mentioned here and no additional filters
		918	are provided for it, then the default handling applies (creating a
		919	CBOR::XS::Tagged object on decoding, and only encoding the tag when
		920	explicitly requested).
		921
		922	Tags not handled specifically are currently converted into a
		923	L<CBOR::XS::Tagged> object, which is simply a blessed array reference
		924	consisting of the numeric tag value followed by the (decoded) CBOR value.
		925
		926	Future versions of this module reserve the right to special case
		927	additional tags (such as base64url).
		928
		929	=head2 ENFORCED TAGS
		930
		931	These tags are always handled when decoding, and their handling cannot be
		932	overridden by the user.
		933
		934	=over 4
		935
		936	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
		937
		938	These tags are automatically created (and decoded) for serialisable
		939	objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
		940	serialisation protocol). See L<OBJECT SERIALISATION> for details.
		941
		942	=item 28, 29 (shareable, sharedref, L<http://cbor.schmorp.de/value-sharing>)
		943
		944	These tags are automatically decoded when encountered (and they do not
		945	result in a cyclic data structure, see C<allow_cycles>), resulting in
		946	shared values in the decoded object. They are only encoded, however, when
		947	C<allow_sharing> is enabled.
		948
		949	Not all shared values can be successfully decoded: values that reference
		950	themselves will I<currently> decode as C<undef> (this is not the same
		951	as a reference pointing to itself, which will be represented as a value
		952	that contains an indirect reference to itself - these will be decoded
		953	properly).
		954
		955	Note that considerably more shared value data structures can be decoded
		956	than will be encoded - currently, only values pointed to by references
		957	will be shared, others will not. While non-reference shared values can be
		958	generated in Perl with some effort, they were considered too unimportant
		959	to be supported in the encoder. The decoder, however, will decode these
		960	values as shared values.
		961
		962	=item 256, 25 (stringref-namespace, stringref, L<http://cbor.schmorp.de/stringref>)
		963
		964	These tags are automatically decoded when encountered. They are only
		965	encoded, however, when C<pack_strings> is enabled.
		966
		967	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
		968
		969	This tag is automatically generated when a reference are encountered (with
		970	the exception of hash and array references). It is converted to a reference
		971	when decoding.
		972
		973	=item 55799 (self-describe CBOR, RFC 7049)
		974
		975	This value is not generated on encoding (unless explicitly requested by
		976	the user), and is simply ignored when decoding.
		977
		978	=back
		979
		980	=head2 NON-ENFORCED TAGS
		981
		982	These tags have default filters provided when decoding. Their handling can
		983	be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
		984	providing a custom C<filter> callback when decoding.
		985
		986	When they result in decoding into a specific Perl class, the module
		987	usually provides a corresponding C<TO_CBOR> method as well.
		988
		989	When any of these need to load additional modules that are not part of the
		990	perl core distribution (e.g. L<URI>), it is (currently) up to the user to
		991	provide these modules. The decoding usually fails with an exception if the
		992	required module cannot be loaded.
		993
		994	=over 4
		995
		996	=item 0, 1 (date/time string, seconds since the epoch)
		997
		998	These tags are decoded into L<Time::Piece> objects. The corresponding
		999	C<Time::Piece::TO_CBOR> method always encodes into tag 1 values currently.
		1000
		1001	The L<Time::Piece> API is generally surprisingly bad, and fractional
		1002	seconds are only accidentally kept intact, so watch out. On the plus side,
		1003	the module comes with perl since 5.10, which has to count for something.
		1004
		1005	=item 2, 3 (positive/negative bignum)
		1006
		1007	These tags are decoded into L<Math::BigInt> objects. The corresponding
		1008	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
		1009	integers, and others into positive/negative CBOR bignums.
		1010
		1011	=item 4, 5, 264, 265 (decimal fraction/bigfloat)
		1012
		1013	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
		1014	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
		1015	encodes into a decimal fraction (either tag 4 or 264).
		1016
		1017	NaN and infinities are not encoded properly, as they cannot be represented
		1018	in CBOR.
		1019
		1020	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1021
		1022	=item 30 (rational numbers)
		1023
		1024	These tags are decoded into L<Math::BigRat> objects. The corresponding
		1025	C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator
		1026	C<1> via their numerator only, i.e., they become normal integers or
		1027	C<bignums>.
		1028
		1029	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1030
		1031	=item 21, 22, 23 (expected later JSON conversion)
		1032
		1033	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
		1034	tags.
		1035
		1036	=item 32 (URI)
		1037
		1038	These objects decode into L<URI> objects. The corresponding
		1039	C<URI::TO_CBOR> method again results in a CBOR URI value.
		1040
		1041	=back
		1042
		1043	=cut
		1044
265	=head2 CBOR and JSON	1045	=head1 CBOR and JSON
266		1046
267	TODO	1047	CBOR is supposed to implement a superset of the JSON data model, and is,
		1048	with some coercion, able to represent all JSON texts (something that other
		1049	"binary JSON" formats such as BSON generally do not support).
		1050
		1051	CBOR implements some extra hints and support for JSON interoperability,
		1052	and the spec offers further guidance for conversion between CBOR and
		1053	JSON. None of this is currently implemented in CBOR, and the guidelines
		1054	in the spec do not result in correct round-tripping of data. If JSON
		1055	interoperability is improved in the future, then the goal will be to
		1056	ensure that decoded JSON data will round-trip encoding and decoding to
		1057	CBOR intact.
268		1058
269		1059
270	=head1 SECURITY CONSIDERATIONS	1060	=head1 SECURITY CONSIDERATIONS
271		1061
272	When you are using CBOR in a protocol, talking to untrusted potentially	1062	Tl;dr... if you want to decode or encode CBOR from untrusted sources, you
273	hostile creatures requires relatively few measures.	1063	should start with a coder object created via C<new_safe> (which implements
		1064	the mitigations explained below):
274		1065
		1066	my $coder = CBOR::XS->new_safe;
		1067
		1068	my $data = $coder->decode ($cbor_text);
		1069	my $cbor = $coder->encode ($data);
		1070
		1071	Longer version: When you are using CBOR in a protocol, talking to
		1072	untrusted potentially hostile creatures requires some thought:
		1073
		1074	=over 4
		1075
		1076	=item Security of the CBOR decoder itself
		1077
275	First of all, your CBOR decoder should be secure, that is, should not have	1078	First and foremost, your CBOR decoder should be secure, that is, should
		1079	not have any buffer overflows or similar bugs that could potentially be
276	any buffer overflows. Obviously, this module should ensure that and I am	1080	exploited. Obviously, this module should ensure that and I am trying hard
277	trying hard on making that true, but you never know.	1081	on making that true, but you never know.
278		1082
		1083	=item CBOR::XS can invoke almost arbitrary callbacks during decoding
		1084
		1085	CBOR::XS supports object serialisation - decoding CBOR can cause calls
		1086	to I<any> C<THAW> method in I<any> package that exists in your process
		1087	(that is, CBOR::XS will not try to load modules, but any existing C<THAW>
		1088	method or function can be called, so they all have to be secure).
		1089
		1090	Less obviously, it will also invoke C<TO_CBOR> and C<FREEZE> methods -
		1091	even if all your C<THAW> methods are secure, encoding data structures from
		1092	untrusted sources can invoke those and trigger bugs in those.
		1093
		1094	So, if you are not sure about the security of all the modules you
		1095	have loaded (you shouldn't), you should disable this part using
		1096	C<forbid_objects> or using C<new_safe>.
		1097
		1098	=item CBOR can be extended with tags that call library code
		1099
		1100	CBOR can be extended with tags, and C<CBOR::XS> has a registry of
		1101	conversion functions for many existing tags that can be extended via
		1102	third-party modules (see the C<filter> method).
		1103
		1104	If you don't trust these, you should configure the "safe" filter function,
		1105	C<CBOR::XS::safe_filter> (C<new_safe> does this), which by default only
		1106	includes conversion functions that are considered "safe" by the author
		1107	(but again, they can be extended by third party modules).
		1108
		1109	Depending on your level of paranoia, you can use the "safe" filter:
		1110
		1111	$cbor->filter (\&CBOR::XS::safe_filter);
		1112
		1113	... your own filter...
		1114
		1115	$cbor->filter (sub { ... do your stuffs here ... });
		1116
		1117	... or even no filter at all, disabling all tag decoding:
		1118
		1119	$cbor->filter (sub { });
		1120
		1121	This is never a problem for encoding, as the tag mechanism only exists in
		1122	CBOR texts.
		1123
		1124	=item Resource-starving attacks: object memory usage
		1125
279	Second, you need to avoid resource-starving attacks. That means you should	1126	You need to avoid resource-starving attacks. That means you should limit
280	limit the size of CBOR data you accept, or make sure then when your	1127	the size of CBOR data you accept, or make sure then when your resources
281	resources run out, that's just fine (e.g. by using a separate process that	1128	run out, that's just fine (e.g. by using a separate process that can
282	can crash safely). The size of a CBOR string in octets is usually a good	1129	crash safely). The size of a CBOR string in octets is usually a good
283	indication of the size of the resources required to decode it into a Perl	1130	indication of the size of the resources required to decode it into a Perl
284	structure. While CBOR::XS can check the size of the CBOR text, it might be	1131	structure. While CBOR::XS can check the size of the CBOR text (using
285	too late when you already have it in memory, so you might want to check	1132	C<max_size> - done by C<new_safe>), it might be too late when you already
286	the size before you accept the string.	1133	have it in memory, so you might want to check the size before you accept
		1134	the string.
287		1135
		1136	As for encoding, it is possible to construct data structures that are
		1137	relatively small but result in large CBOR texts (for example by having an
		1138	array full of references to the same big data structure, which will all be
		1139	deep-cloned during encoding by default). This is rarely an actual issue
		1140	(and the worst case is still just running out of memory), but you can
		1141	reduce this risk by using C<allow_sharing>.
		1142
		1143	=item Resource-starving attacks: stack overflows
		1144
288	Third, CBOR::XS recurses using the C stack when decoding objects and	1145	CBOR::XS recurses using the C stack when decoding objects and arrays. The
289	arrays. The C stack is a limited resource: for instance, on my amd64	1146	C stack is a limited resource: for instance, on my amd64 machine with 8MB
290	machine with 8MB of stack size I can decode around 180k nested arrays but	1147	of stack size I can decode around 180k nested arrays but only 14k nested
291	only 14k nested CBOR objects (due to perl itself recursing deeply on croak	1148	CBOR objects (due to perl itself recursing deeply on croak to free the
292	to free the temporary). If that is exceeded, the program crashes. To be	1149	temporary). If that is exceeded, the program crashes. To be conservative,
293	conservative, the default nesting limit is set to 512. If your process	1150	the default nesting limit is set to 512. If your process has a smaller
294	has a smaller stack, you should adjust this setting accordingly with the	1151	stack, you should adjust this setting accordingly with the C<max_depth>
295	C<max_depth> method.	1152	method.
		1153
		1154	=item Resource-starving attacks: CPU en-/decoding complexity
		1155
		1156	CBOR::XS will use the L<Math::BigInt>, L<Math::BigFloat> and
		1157	L<Math::BigRat> libraries to represent encode/decode bignums. These can be
		1158	very slow (as in, centuries of CPU time) and can even crash your program
		1159	(and are generally not very trustworthy). See the next section on bignum
		1160	security for details.
		1161
		1162	=item Data breaches: leaking information in error messages
		1163
		1164	CBOR::XS might leak contents of your Perl data structures in its error
		1165	messages, so when you serialise sensitive information you might want to
		1166	make sure that exceptions thrown by CBOR::XS will not end up in front of
		1167	untrusted eyes.
		1168
		1169	=item Something else...
296		1170
297	Something else could bomb you, too, that I forgot to think of. In that	1171	Something else could bomb you, too, that I forgot to think of. In that
298	case, you get to keep the pieces. I am always open for hints, though...	1172	case, you get to keep the pieces. I am always open for hints, though...
299		1173
300	Also keep in mind that CBOR::XS might leak contents of your Perl data	1174	=back
301	structures in its error messages, so when you serialise sensitive	1175
302	information you might want to make sure that exceptions thrown by CBOR::XS	1176
303	will not end up in front of untrusted eyes.	1177	=head1 BIGNUM SECURITY CONSIDERATIONS
		1178
		1179	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
		1180	L<Math::BigFloat> that tries to encode the number in the simplest possible
		1181	way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
		1182	4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
		1183	(L<Math::BigRat>, tag 30) can also contain bignums as members.
		1184
		1185	CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
		1186	bigfloats (tags 5 and 265), but it will never generate these on its own.
		1187
		1188	Using the built-in L<Math::BigInt::Calc> support, encoding and decoding
		1189	decimal fractions is generally fast. Decoding bigints can be slow for very
		1190	big numbers (tens of thousands of digits, something that could potentially
		1191	be caught by limiting the size of CBOR texts), and decoding bigfloats or
		1192	arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades)
		1193	for large exponents (roughly 40 bit and longer).
		1194
		1195	Additionally, L<Math::BigInt> can take advantage of other bignum
		1196	libraries, such as L<Math::GMP>, which cannot handle big floats with large
		1197	exponents, and might simply abort or crash your program, due to their code
		1198	quality.
		1199
		1200	This can be a concern if you want to parse untrusted CBOR. If it is, you
		1201	might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
		1202	types. You should also disable types 5 and 265, as these can be slow even
		1203	without bigints.
		1204
		1205	Disabling bigints will also partially or fully disable types that rely on
		1206	them, e.g. rational numbers that use bignums.
		1207
304		1208
305	=head1 CBOR IMPLEMENTATION NOTES	1209	=head1 CBOR IMPLEMENTATION NOTES
306		1210
307	This section contains some random implementation notes. They do not	1211	This section contains some random implementation notes. They do not
308	describe guaranteed behaviour, but merely behaviour as-is implemented	1212	describe guaranteed behaviour, but merely behaviour as-is implemented
…		…
317	Only the double data type is supported for NV data types - when Perl uses	1221	Only the double data type is supported for NV data types - when Perl uses
318	long double to represent floating point values, they might not be encoded	1222	long double to represent floating point values, they might not be encoded
319	properly. Half precision types are accepted, but not encoded.	1223	properly. Half precision types are accepted, but not encoded.
320		1224
321	Strict mode and canonical mode are not implemented.	1225	Strict mode and canonical mode are not implemented.
		1226
		1227
		1228	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
		1229
		1230	On perls that were built without 64 bit integer support (these are rare
		1231	nowadays, even on 32 bit architectures, as all major Perl distributions
		1232	are built with 64 bit integer support), support for any kind of 64 bit
		1233	integer in CBOR is very limited - most likely, these 64 bit values will
		1234	be truncated, corrupted, or otherwise not decoded correctly. This also
		1235	includes string, array and map sizes that are stored as 64 bit integers.
322		1236
323		1237
324	=head1 THREADS	1238	=head1 THREADS
325		1239
326	This module is I<not> guaranteed to be thread safe and there are no	1240	This module is I<not> guaranteed to be thread safe and there are no
…		…
340	Please refrain from using rt.cpan.org or any other bug reporting	1254	Please refrain from using rt.cpan.org or any other bug reporting
341	service. I put the contact address into my modules for a reason.	1255	service. I put the contact address into my modules for a reason.
342		1256
343	=cut	1257	=cut
344		1258
345	our $true = do { bless \(my $dummy = 1), "CBOR::XS::Boolean" };	1259	# clumsy and slow hv_store-in-hash helper function
346	our $false = do { bless \(my $dummy = 0), "CBOR::XS::Boolean" };	1260	sub _hv_store {
347		1261	$_[0]{$_[1]} = $_[2];
348	sub true() { $true }
349	sub false() { $false }
350
351	sub is_bool($) {
352	UNIVERSAL::isa $_[0], "CBOR::XS::Boolean"
353	# or UNIVERSAL::isa $_[0], "CBOR::Literal"
354	}	1262	}
355		1263
		1264	our %FILTER = (
		1265	0 => sub { # rfc4287 datetime, utf-8
		1266	require Time::Piece;
		1267	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
		1268	# from FreeBSD, which can't parse ISO 8601, RFC3339, RFC4287 or much of anything
		1269	# else either. Whats incredibe over standard strptime totally escapes me.
		1270	# doesn't do fractional times, either. sigh.
		1271	# In fact, it's all a lie, it uses whatever strptime it wants, and of course,
		1272	# they are all incompatible. The openbsd one simply ignores %z (but according to the
		1273	# docs, it would be much more incredibly flexible indeed. If it worked, that is.).
		1274	scalar eval {
		1275	my $s = $_[1];
		1276
		1277	$s =~ s/Z$/+00:00/;
		1278	$s =~ s/(\.[0-9]+)?([+-][0-9][0-9]):([0-9][0-9])$//
		1279	or die;
		1280
		1281	my $b = $1 - ($2 * 60 + $3) * 60; # fractional part + offset. hopefully
		1282	my $d = Time::Piece->strptime ($s, "%Y-%m-%dT%H:%M:%S");
		1283
		1284	Time::Piece::gmtime ($d->epoch + $b)
		1285	} \|\| die "corrupted CBOR date/time string ($_[0])";
		1286	},
		1287
		1288	1 => sub { # seconds since the epoch, possibly fractional
		1289	require Time::Piece;
		1290	scalar Time::Piece::gmtime (pop)
		1291	},
		1292
		1293	2 => sub { # pos bigint
		1294	require Math::BigInt;
		1295	Math::BigInt->new ("0x" . unpack "H*", pop)
		1296	},
		1297
		1298	3 => sub { # neg bigint
		1299	require Math::BigInt;
		1300	-Math::BigInt->new ("0x" . unpack "H*", pop)
		1301	},
		1302
		1303	4 => sub { # decimal fraction, array
		1304	require Math::BigFloat;
		1305	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1306	},
		1307
		1308	264 => sub { # decimal fraction with arbitrary exponent
		1309	require Math::BigFloat;
		1310	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1311	},
		1312
		1313	5 => sub { # bigfloat, array
		1314	require Math::BigFloat;
		1315	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1316	},
		1317
		1318	265 => sub { # bigfloat with arbitrary exponent
		1319	require Math::BigFloat;
		1320	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1321	},
		1322
		1323	30 => sub { # rational number
		1324	require Math::BigRat;
		1325	Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons
		1326	},
		1327
		1328	21 => sub { pop }, # expected conversion to base64url encoding
		1329	22 => sub { pop }, # expected conversion to base64 encoding
		1330	23 => sub { pop }, # expected conversion to base16 encoding
		1331
		1332	# 24 # embedded cbor, byte string
		1333
		1334	32 => sub {
		1335	require URI;
		1336	URI->new (pop)
		1337	},
		1338
		1339	# 33 # base64url rfc4648, utf-8
		1340	# 34 # base64 rfc46484, utf-8
		1341	# 35 # regex pcre/ecma262, utf-8
		1342	# 36 # mime message rfc2045, utf-8
		1343	);
		1344
		1345	sub default_filter {
		1346	&{ $FILTER{$_[0]} or return }
		1347	}
		1348
		1349	our %SAFE_FILTER = map { $_ => $FILTER{$_} } 0, 1, 21, 22, 23, 32;
		1350
		1351	sub safe_filter {
		1352	&{ $SAFE_FILTER{$_[0]} or return }
		1353	}
		1354
		1355	sub URI::TO_CBOR {
		1356	my $uri = $_[0]->as_string;
		1357	utf8::upgrade $uri;
		1358	tag 32, $uri
		1359	}
		1360
		1361	sub Math::BigInt::TO_CBOR {
		1362	if (-2147483648 <= $_[0] && $_[0] <= 2147483647) {
		1363	$_[0]->numify
		1364	} else {
		1365	my $hex = substr $_[0]->as_hex, 2;
		1366	$hex = "0$hex" if 1 & length $hex; # sigh
		1367	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
		1368	}
		1369	}
		1370
		1371	sub Math::BigFloat::TO_CBOR {
		1372	my ($m, $e) = $_[0]->parts;
		1373
		1374	-9223372036854775808 <= $e && $e <= 18446744073709551615
		1375	? tag 4, [$e->numify, $m]
		1376	: tag 264, [$e, $m]
		1377	}
		1378
		1379	sub Math::BigRat::TO_CBOR {
		1380	my ($n, $d) = $_[0]->parts;
		1381
		1382	# older versions of BigRat need *1, as they not always return numbers
		1383
		1384	$d*1 == 1
		1385	? $n*1
		1386	: tag 30, [$n1, $d1]
		1387	}
		1388
		1389	sub Time::Piece::TO_CBOR {
		1390	tag 1, 0 + $_[0]->epoch
		1391	}
		1392
356	XSLoader::load "CBOR::XS", $VERSION;	1393	XSLoader::load "CBOR::XS", $VERSION;
357
358	package CBOR::XS::Boolean;
359
360	use overload
361	"0+" => sub { ${$_[0]} },
362	"++" => sub { $_[0] = ${$_[0]} + 1 },
363	"--" => sub { $_[0] = ${$_[0]} - 1 },
364	fallback => 1;
365
366	1;
367		1394
368	=head1 SEE ALSO	1395	=head1 SEE ALSO
369		1396
370	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,	1397	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
371	serialisation.	1398	serialisation.
372		1399
		1400	The L<Types::Serialiser> module provides the data model for true, false
		1401	and error values.
		1402
373	=head1 AUTHOR	1403	=head1 AUTHOR
374		1404
375	Marc Lehmann <schmorp@schmorp.de>	1405	Marc Lehmann <schmorp@schmorp.de>
376	http://home.schmorp.de/	1406	http://home.schmorp.de/
377		1407
378	=cut	1408	=cut
379		1409
		1410	1
		1411

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.1 by root, Fri Oct 25 23:09:45 2013 UTC vs. Revision 1.70 by root, Sat Nov 9 07:30:36 2019 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.1 by root, Fri Oct 25 23:09:45 2013 UTC vs.
Revision 1.70 by root, Sat Nov 9 07:30:36 2019 UTC