[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.23 by root, Fri Nov 22 16:00:30 2013 UTC vs.
Revision 1.76 by root, Tue Dec 1 01:49:47 2020 UTC

…		…
26	substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string	26	substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
27	}	27	}
28		28
29	=head1 DESCRIPTION	29	=head1 DESCRIPTION
30		30
31	WARNING! This module is very new, and not very well tested (that's up
32	to you to do). Furthermore, details of the implementation might change
33	freely before version 1.0. And lastly, most extensions depend on an IANA
34	assignment, and until that assignment is official, this implementation is
35	not interoperable with other implementations (even future versions of this
36	module) until the assignment is done.
37
38	You are still invited to try out CBOR, and this module.
39
40	This module converts Perl data structures to the Concise Binary Object	31	This module converts Perl data structures to the Concise Binary Object
41	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation	32	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
42	format that aims to use a superset of the JSON data model, i.e. when you	33	format that aims to use an (almost) superset of the JSON data model, i.e.
43	can represent something in JSON, you should be able to represent it in	34	when you can represent something useful in JSON, you should be able to
44	CBOR.	35	represent it in CBOR.
45		36
46	In short, CBOR is a faster and very compact binary alternative to JSON,	37	In short, CBOR is a faster and quite compact binary alternative to JSON,
47	with the added ability of supporting serialisation of Perl objects. (JSON	38	with the added ability of supporting serialisation of Perl objects. (JSON
48	often compresses better than CBOR though, so if you plan to compress the	39	often compresses better than CBOR though, so if you plan to compress the
49	data later you might want to compare both formats first).	40	data later and speed is less important you might want to compare both
		41	formats first).
		42
		43	The primary goal of this module is to be I<correct> and the secondary goal
		44	is to be I<fast>. To reach the latter goal it was written in C.
50		45
51	To give you a general idea about speed, with texts in the megabyte range,	46	To give you a general idea about speed, with texts in the megabyte range,
52	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or	47	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
53	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the	48	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
54	data, the worse L<Storable> performs in comparison.	49	data, the worse L<Storable> performs in comparison.
55		50
56	As for compactness, C<CBOR::XS> encoded data structures are usually about	51	Regarding compactness, C<CBOR::XS>-encoded data structures are usually
57	20% smaller than the same data encoded as (compact) JSON or L<Storable>.	52	about 20% smaller than the same data encoded as (compact) JSON or
		53	L<Storable>.
58		54
59	In addition to the core CBOR data format, this module implements a number	55	In addition to the core CBOR data format, this module implements a
60	of extensions, to support cyclic and self-referencing data structures	56	number of extensions, to support cyclic and shared data structures
61	(see C<allow_sharing>), string deduplication (see C<allow_stringref>) and	57	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
62	scalar references (always enabled).	58	C<pack_strings>) and scalar references (always enabled).
63
64	The primary goal of this module is to be I<correct> and the secondary goal
65	is to be I<fast>. To reach the latter goal it was written in C.
66		59
67	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
68	vice versa.	61	vice versa.
69		62
70	=cut	63	=cut
71		64
72	package CBOR::XS;	65	package CBOR::XS;
73		66
74	use common::sense;	67	use common::sense;
75		68
76	our $VERSION = 0.08;	69	our $VERSION = 1.82;
77	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
78		71
79	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
80		73
81	use Exporter;	74	use Exporter;
…		…
119		112
120	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
121	be chained:	114	be chained:
122		115
123	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
		117
		118	=item $cbor = new_safe CBOR::XS
		119
		120	Create a new, safe/secure CBOR::XS object. This is similar to C<new>,
		121	but configures the coder object to be safe to use with untrusted
		122	data. Currently, this is equivalent to:
		123
		124	my $cbor = CBOR::XS
		125	->new
		126	->forbid_objects
		127	->filter (\&CBOR::XS::safe_filter)
		128	->max_size (1e8);
		129
		130	But is more future proof (it is better to crash because of a change than
		131	to be exploited in other ways).
		132
		133	=cut
		134
		135	sub new_safe {
		136	CBOR::XS
		137	->new
		138	->forbid_objects
		139	->filter (\&CBOR::XS::safe_filter)
		140	->max_size (1e8)
		141	}
124		142
125	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	143	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
126		144
127	=item $max_depth = $cbor->get_max_depth	145	=item $max_depth = $cbor->get_max_depth
128		146
…		…
144		162
145	Note that nesting is implemented by recursion in C. The default value has	163	Note that nesting is implemented by recursion in C. The default value has
146	been chosen to be as large as typical operating systems allow without	164	been chosen to be as large as typical operating systems allow without
147	crashing.	165	crashing.
148		166
149	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	167	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
150		168
151	=item $cbor = $cbor->max_size ([$maximum_string_size])	169	=item $cbor = $cbor->max_size ([$maximum_string_size])
152		170
153	=item $max_size = $cbor->get_max_size	171	=item $max_size = $cbor->get_max_size
154		172
…		…
159	effect on C<encode> (yet).	177	effect on C<encode> (yet).
160		178
161	If no argument is given, the limit check will be deactivated (same as when	179	If no argument is given, the limit check will be deactivated (same as when
162	C<0> is specified).	180	C<0> is specified).
163		181
164	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	182	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
165		183
166	=item $cbor = $cbor->allow_unknown ([$enable])	184	=item $cbor = $cbor->allow_unknown ([$enable])
167		185
168	=item $enabled = $cbor->get_allow_unknown	186	=item $enabled = $cbor->get_allow_unknown
169		187
…		…
186	as an array, is referenced multiple times), but instead will emit a	204	as an array, is referenced multiple times), but instead will emit a
187	reference to the earlier value.	205	reference to the earlier value.
188		206
189	This means that such values will only be encoded once, and will not result	207	This means that such values will only be encoded once, and will not result
190	in a deep cloning of the value on decode, in decoders supporting the value	208	in a deep cloning of the value on decode, in decoders supporting the value
191	sharing extension.	209	sharing extension. This also makes it possible to encode cyclic data
		210	structures (which need C<allow_cycles> to be enabled to be decoded by this
		211	module).
192		212
193	It is recommended to leave it off unless you know your	213	It is recommended to leave it off unless you know your
194	communication partner supports the value sharing extensions to CBOR	214	communication partner supports the value sharing extensions to CBOR
195	(http://cbor.schmorp.de/value-sharing).	215	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
		216	resulting data structure might be unusable.
196		217
197	Detecting shared values incurs a runtime overhead when values are encoded	218	Detecting shared values incurs a runtime overhead when values are encoded
198	that have a reference counter large than one, and might unnecessarily	219	that have a reference counter large than one, and might unnecessarily
199	increase the encoded size, as potentially shared values are encode as	220	increase the encoded size, as potentially shared values are encoded as
200	sharable whether or not they are actually shared.	221	shareable whether or not they are actually shared.
201		222
202	At the moment, only targets of references can be shared (e.g. scalars,	223	At the moment, only targets of references can be shared (e.g. scalars,
203	arrays or hashes pointed to by a reference). Weirder constructs, such as	224	arrays or hashes pointed to by a reference). Weirder constructs, such as
204	an array with multiple "copies" of the I<same> string, which are hard but	225	an array with multiple "copies" of the I<same> string, which are hard but
205	not impossible to create in Perl, are not supported (this is the same as	226	not impossible to create in Perl, are not supported (this is the same as
206	for L<Storable>).	227	with L<Storable>).
207		228
208	If C<$enable> is false (the default), then C<encode> will encode	229	If C<$enable> is false (the default), then C<encode> will encode shared
209	exception when it encounters anything it cannot encode as CBOR.	230	data structures repeatedly, unsharing them in the process. Cyclic data
		231	structures cannot be encoded in this mode.
210		232
211	This option does not affect C<decode> in any way - shared values and	233	This option does not affect C<decode> in any way - shared values and
212	references will always be decoded properly if present.	234	references will always be decoded properly if present.
213		235
		236	=item $cbor = $cbor->allow_cycles ([$enable])
		237
		238	=item $enabled = $cbor->get_allow_cycles
		239
		240	If C<$enable> is true (or missing), then C<decode> will happily decode
		241	self-referential (cyclic) data structures. By default these will not be
		242	decoded, as they need manual cleanup to avoid memory leaks, so code that
		243	isn't prepared for this will not leak memory.
		244
		245	If C<$enable> is false (the default), then C<decode> will throw an error
		246	when it encounters a self-referential/cyclic data structure.
		247
		248	FUTURE DIRECTION: the motivation behind this option is to avoid I<real>
		249	cycles - future versions of this module might chose to decode cyclic data
		250	structures using weak references when this option is off, instead of
		251	throwing an error.
		252
		253	This option does not affect C<encode> in any way - shared values and
		254	references will always be encoded properly if present.
		255
		256	=item $cbor = $cbor->forbid_objects ([$enable])
		257
		258	=item $enabled = $cbor->get_forbid_objects
		259
		260	Disables the use of the object serialiser protocol.
		261
		262	If C<$enable> is true (or missing), then C<encode> will will throw an
		263	exception when it encounters perl objects that would be encoded using the
		264	perl-object tag (26). When C<decode> encounters such tags, it will fall
		265	back to the general filter/tagged logic as if this were an unknown tag (by
		266	default resulting in a C<CBOR::XC::Tagged> object).
		267
		268	If C<$enable> is false (the default), then C<encode> will use the
		269	L<Types::Serialiser> object serialisation protocol to serialise objects
		270	into perl-object tags, and C<decode> will do the same to decode such tags.
		271
		272	See L<SECURITY CONSIDERATIONS>, below, for more info on why forbidding this
		273	protocol can be useful.
		274
214	=item $cbor = $cbor->allow_stringref ([$enable])	275	=item $cbor = $cbor->pack_strings ([$enable])
215		276
216	=item $enabled = $cbor->get_allow_stringref	277	=item $enabled = $cbor->get_pack_strings
217		278
218	If C<$enable> is true (or missing), then C<encode> will try not to encode	279	If C<$enable> is true (or missing), then C<encode> will try not to encode
219	the same string twice, but will instead encode a reference to the string	280	the same string twice, but will instead encode a reference to the string
220	instead. Depending on your data format. this can save a lot of space, but	281	instead. Depending on your data format, this can save a lot of space, but
221	also results in a very large runtime overhead (expect encoding times to be	282	also results in a very large runtime overhead (expect encoding times to be
222	2-4 times as high as without).	283	2-4 times as high as without).
223		284
224	It is recommended to leave it off unless you know your	285	It is recommended to leave it off unless you know your
225	communications partner supports the stringref extension to CBOR	286	communications partner supports the stringref extension to CBOR
226	(http://cbor.schmorp.de/stringref).	287	(L<http://cbor.schmorp.de/stringref>), as without decoder support, the
		288	resulting data structure might not be usable.
227		289
228	If C<$enable> is false (the default), then C<encode> will encode	290	If C<$enable> is false (the default), then C<encode> will encode strings
229	exception when it encounters anything it cannot encode as CBOR.	291	the standard CBOR way.
230		292
231	This option does not affect C<decode> in any way - string references will	293	This option does not affect C<decode> in any way - string references will
232	always be decoded properly if present.	294	always be decoded properly if present.
233		295
		296	=item $cbor = $cbor->text_keys ([$enable])
		297
		298	=item $enabled = $cbor->get_text_keys
		299
		300	If C<$enabled> is true (or missing), then C<encode> will encode all
		301	perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
		302
		303	If C<$enable> is false (the default), then C<encode> will encode hash keys
		304	normally - upgraded perl strings (strings internally encoded as UTF-8) as
		305	CBOR text strings, and downgraded perl strings as CBOR byte strings.
		306
		307	This option does not affect C<decode> in any way.
		308
		309	This option is useful for interoperability with CBOR decoders that don't
		310	treat byte strings as a form of text. It is especially useful as Perl
		311	gives very little control over hash keys.
		312
		313	Enabling this option can be slow, as all downgraded hash keys that are
		314	encoded need to be scanned and converted to UTF-8.
		315
		316	=item $cbor = $cbor->text_strings ([$enable])
		317
		318	=item $enabled = $cbor->get_text_strings
		319
		320	This option works similar to C<text_keys>, above, but works on all strings
		321	(including hash keys), so C<text_keys> has no further effect after
		322	enabling C<text_strings>.
		323
		324	If C<$enabled> is true (or missing), then C<encode> will encode all perl
		325	strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
		326
		327	If C<$enable> is false (the default), then C<encode> will encode strings
		328	normally (but see C<text_keys>) - upgraded perl strings (strings
		329	internally encoded as UTF-8) as CBOR text strings, and downgraded perl
		330	strings as CBOR byte strings.
		331
		332	This option does not affect C<decode> in any way.
		333
		334	This option has similar advantages and disadvantages as C<text_keys>. In
		335	addition, this option effectively removes the ability to automatically
		336	encode byte strings, which might break some C<FREEZE> and C<TO_CBOR>
		337	methods that rely on this.
		338
		339	A workaround is to use explicit type casts, which are unaffected by this option.
		340
		341	=item $cbor = $cbor->validate_utf8 ([$enable])
		342
		343	=item $enabled = $cbor->get_validate_utf8
		344
		345	If C<$enable> is true (or missing), then C<decode> will validate that
		346	elements (text strings) containing UTF-8 data in fact contain valid UTF-8
		347	data (instead of blindly accepting it). This validation obviously takes
		348	extra time during decoding.
		349
		350	The concept of "valid UTF-8" used is perl's concept, which is a superset
		351	of the official UTF-8.
		352
		353	If C<$enable> is false (the default), then C<decode> will blindly accept
		354	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
		355	regardless of whether that's true or not.
		356
		357	Perl isn't too happy about corrupted UTF-8 in strings, but should
		358	generally not crash or do similarly evil things. Extensions might be not
		359	so forgiving, so it's recommended to turn on this setting if you receive
		360	untrusted CBOR.
		361
		362	This option does not affect C<encode> in any way - strings that are
		363	supposedly valid UTF-8 will simply be dumped into the resulting CBOR
		364	string without checking whether that is, in fact, true or not.
		365
234	=item $cbor = $cbor->filter ([$cb->($tag, $value)])	366	=item $cbor = $cbor->filter ([$cb->($tag, $value)])
235		367
236	=item $cb_or_undef = $cbor->get_filter	368	=item $cb_or_undef = $cbor->get_filter
237		369
238	TODO	370	Sets or replaces the tagged value decoding filter (when C<$cb> is
		371	specified) or clears the filter (if no argument or C<undef> is provided).
		372
		373	The filter callback is called only during decoding, when a non-enforced
		374	tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
		375	list of enforced tags). For specific tags, it's often better to provide a
		376	default converter using the C<%CBOR::XS::FILTER> hash (see below).
		377
		378	The first argument is the numerical tag, the second is the (decoded) value
		379	that has been tagged.
		380
		381	The filter function should return either exactly one value, which will
		382	replace the tagged value in the decoded data structure, or no values,
		383	which will result in default handling, which currently means the decoder
		384	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
		385
		386	When the filter is cleared (the default state), the default filter
		387	function, C<CBOR::XS::default_filter>, is used. This function simply
		388	looks up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists
		389	it must be a code reference that is called with tag and value, and is
		390	responsible for decoding the value. If no entry exists, it returns no
		391	values. C<CBOR::XS> provides a number of default filter functions already,
		392	the the C<%CBOR::XS::FILTER> hash can be freely extended with more.
		393
		394	C<CBOR::XS> additionally provides an alternative filter function that is
		395	supposed to be safe to use with untrusted data (which the default filter
		396	might not), called C<CBOR::XS::safe_filter>, which works the same as
		397	the C<default_filter> but uses the C<%CBOR::XS::SAFE_FILTER> variable
		398	instead. It is prepopulated with the tag decoding functions that are
		399	deemed safe (basically the same as C<%CBOR::XS::FILTER> without all
		400	the bignum tags), and can be extended by user code as wlel, although,
		401	obviously, one should be very careful about adding decoding functions
		402	here, since the expectation is that they are safe to use on untrusted
		403	data, after all.
		404
		405	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
		406	objects, with no other special handling (useful when working with
		407	potentially "unsafe" CBOR data).
		408
		409	CBOR::XS->new->filter (sub { })->decode ($cbor_data);
		410
		411	Example: provide a global filter for tag 1347375694, converting the value
		412	into some string form.
		413
		414	$CBOR::XS::FILTER{1347375694} = sub {
		415	my ($tag, $value);
		416
		417	"tag 1347375694 value $value"
		418	};
		419
		420	Example: provide your own filter function that looks up tags in your own
		421	hash:
		422
		423	my %my_filter = (
		424	998347484 => sub {
		425	my ($tag, $value);
		426
		427	"tag 998347484 value $value"
		428	};
		429	);
		430
		431	my $coder = CBOR::XS->new->filter (sub {
		432	&{ $my_filter{$_[0]} or return }
		433	});
		434
		435
		436	Example: use the safe filter function (see L<SECURITY CONSIDERATIONS> for
		437	more considerations on security).
		438
		439	CBOR::XS->new->filter (\&CBOR::XS::safe_filter)->decode ($cbor_data);
239		440
240	=item $cbor_data = $cbor->encode ($perl_scalar)	441	=item $cbor_data = $cbor->encode ($perl_scalar)
241		442
242	Converts the given Perl data structure (a scalar value) to its CBOR	443	Converts the given Perl data structure (a scalar value) to its CBOR
243	representation.	444	representation.
…		…
253	when there is trailing garbage after the CBOR string, it will silently	454	when there is trailing garbage after the CBOR string, it will silently
254	stop parsing there and return the number of characters consumed so far.	455	stop parsing there and return the number of characters consumed so far.
255		456
256	This is useful if your CBOR texts are not delimited by an outer protocol	457	This is useful if your CBOR texts are not delimited by an outer protocol
257	and you need to know where the first CBOR string ends amd the next one	458	and you need to know where the first CBOR string ends amd the next one
258	starts.	459	starts - CBOR strings are self-delimited, so it is possible to concatenate
		460	CBOR strings without any delimiters or size fields and recover their data.
259		461
260	CBOR::XS->new->decode_prefix ("......")	462	CBOR::XS->new->decode_prefix ("......")
261	=> ("...", 3)	463	=> ("...", 3)
		464
		465	=back
		466
		467	=head2 INCREMENTAL PARSING
		468
		469	In some cases, there is the need for incremental parsing of JSON
		470	texts. While this module always has to keep both CBOR text and resulting
		471	Perl data structure in memory at one time, it does allow you to parse a
		472	CBOR stream incrementally, using a similar to using "decode_prefix" to see
		473	if a full CBOR object is available, but is much more efficient.
		474
		475	It basically works by parsing as much of a CBOR string as possible - if
		476	the CBOR data is not complete yet, the pasrer will remember where it was,
		477	to be able to restart when more data has been accumulated. Once enough
		478	data is available to either decode a complete CBOR value or raise an
		479	error, a real decode will be attempted.
		480
		481	A typical use case would be a network protocol that consists of sending
		482	and receiving CBOR-encoded messages. The solution that works with CBOR and
		483	about anything else is by prepending a length to every CBOR value, so the
		484	receiver knows how many octets to read. More compact (and slightly slower)
		485	would be to just send CBOR values back-to-back, as C<CBOR::XS> knows where
		486	a CBOR value ends, and doesn't need an explicit length.
		487
		488	The following methods help with this:
		489
		490	=over 4
		491
		492	=item @decoded = $cbor->incr_parse ($buffer)
		493
		494	This method attempts to decode exactly one CBOR value from the beginning
		495	of the given C<$buffer>. The value is removed from the C<$buffer> on
		496	success. When C<$buffer> doesn't contain a complete value yet, it returns
		497	nothing. Finally, when the C<$buffer> doesn't start with something
		498	that could ever be a valid CBOR value, it raises an exception, just as
		499	C<decode> would. In the latter case the decoder state is undefined and
		500	must be reset before being able to parse further.
		501
		502	This method modifies the C<$buffer> in place. When no CBOR value can be
		503	decoded, the decoder stores the current string offset. On the next call,
		504	continues decoding at the place where it stopped before. For this to make
		505	sense, the C<$buffer> must begin with the same octets as on previous
		506	unsuccessful calls.
		507
		508	You can call this method in scalar context, in which case it either
		509	returns a decoded value or C<undef>. This makes it impossible to
		510	distinguish between CBOR null values (which decode to C<undef>) and an
		511	unsuccessful decode, which is often acceptable.
		512
		513	=item @decoded = $cbor->incr_parse_multiple ($buffer)
		514
		515	Same as C<incr_parse>, but attempts to decode as many CBOR values as
		516	possible in one go, instead of at most one. Calls to C<incr_parse> and
		517	C<incr_parse_multiple> can be interleaved.
		518
		519	=item $cbor->incr_reset
		520
		521	Resets the incremental decoder. This throws away any saved state, so that
		522	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
		523	a new CBOR value from the beginning of the C<$buffer> again.
		524
		525	This method can be called at any time, but it I<must> be called if you want
		526	to change your C<$buffer> or there was a decoding error and you want to
		527	reuse the C<$cbor> object for future incremental parsings.
262		528
263	=back	529	=back
264		530
265		531
266	=head1 MAPPING	532	=head1 MAPPING
…		…
284	CBOR integers become (numeric) perl scalars. On perls without 64 bit	550	CBOR integers become (numeric) perl scalars. On perls without 64 bit
285	support, 64 bit integers will be truncated or otherwise corrupted.	551	support, 64 bit integers will be truncated or otherwise corrupted.
286		552
287	=item byte strings	553	=item byte strings
288		554
289	Byte strings will become octet strings in Perl (the byte values 0..255	555	Byte strings will become octet strings in Perl (the Byte values 0..255
290	will simply become characters of the same value in Perl).	556	will simply become characters of the same value in Perl).
291		557
292	=item UTF-8 strings	558	=item UTF-8 strings
293		559
294	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be	560	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
…		…
317	=item tagged values	583	=item tagged values
318		584
319	Tagged items consists of a numeric tag and another CBOR value.	585	Tagged items consists of a numeric tag and another CBOR value.
320		586
321	See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>	587	See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
322	for details.	588	for details on which tags are handled how.
323		589
324	=item anything else	590	=item anything else
325		591
326	Anything else (e.g. unsupported simple values) will raise a decoding	592	Anything else (e.g. unsupported simple values) will raise a decoding
327	error.	593	error.
…		…
330		596
331		597
332	=head2 PERL -> CBOR	598	=head2 PERL -> CBOR
333		599
334	The mapping from Perl to CBOR is slightly more difficult, as Perl is a	600	The mapping from Perl to CBOR is slightly more difficult, as Perl is a
335	truly typeless language, so we can only guess which CBOR type is meant by	601	typeless language. That means this module can only guess which CBOR type
336	a Perl value.	602	is meant by a perl value.
337		603
338	=over 4	604	=over 4
339		605
340	=item hash references	606	=item hash references
341		607
342	Perl hash references become CBOR maps. As there is no inherent ordering in	608	Perl hash references become CBOR maps. As there is no inherent ordering in
343	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random	609	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
344	order.	610	order. This order can be different each time a hash is encoded.
345		611
346	Currently, tied hashes will use the indefinite-length format, while normal	612	Currently, tied hashes will use the indefinite-length format, while normal
347	hashes will use the fixed-length format.	613	hashes will use the fixed-length format.
348		614
349	=item array references	615	=item array references
350		616
351	Perl array references become fixed-length CBOR arrays.	617	Perl array references become fixed-length CBOR arrays.
352		618
353	=item other references	619	=item other references
354		620
355	Other unblessed references are generally not allowed and will cause an	621	Other unblessed references will be represented using
356	exception to be thrown, except for references to the integers C<0> and	622	the indirection tag extension (tag value C<22098>,
357	C<1>, which get turned into false and true in CBOR.	623	L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
		624	to be able to decode these values somehow, by either "doing the right
		625	thing", decoding into a generic tagged object, simply ignoring the tag, or
		626	something else.
358		627
359	=item CBOR::XS::Tagged objects	628	=item CBOR::XS::Tagged objects
360		629
361	Objects of this type must be arrays consisting of a single C<[tag, value]>	630	Objects of this type must be arrays consisting of a single C<[tag, value]>
362	pair. The (numerical) tag will be encoded as a CBOR tag, the value will	631	pair. The (numerical) tag will be encoded as a CBOR tag, the value will
363	be encoded as appropriate for the value. You cna use C<CBOR::XS::tag> to	632	be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
364	create such objects.	633	create such objects.
365		634
366	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error	635	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
367		636
368	These special values become CBOR true, CBOR false and CBOR undefined	637	These special values become CBOR true, CBOR false and CBOR undefined
…		…
385	# dump as number	654	# dump as number
386	encode_cbor [2] # yields [2]	655	encode_cbor [2] # yields [2]
387	encode_cbor [-3.0e17] # yields [-3e+17]	656	encode_cbor [-3.0e17] # yields [-3e+17]
388	my $value = 5; encode_cbor [$value] # yields [5]	657	my $value = 5; encode_cbor [$value] # yields [5]
389		658
390	# used as string, so dump as string	659	# used as string, so dump as string (either byte or text)
391	print $value;	660	print $value;
392	encode_cbor [$value] # yields ["5"]	661	encode_cbor [$value] # yields ["5"]
393		662
394	# undef becomes null	663	# undef becomes null
395	encode_cbor [undef] # yields [null]	664	encode_cbor [undef] # yields [null]
…		…
398		667
399	my $x = 3.1; # some variable containing a number	668	my $x = 3.1; # some variable containing a number
400	"$x"; # stringified	669	"$x"; # stringified
401	$x .= ""; # another, more awkward way to stringify	670	$x .= ""; # another, more awkward way to stringify
402	print $x; # perl does it for you, too, quite often	671	print $x; # perl does it for you, too, quite often
		672
		673	You can force whether a string is encoded as byte or text string by using
		674	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled).
		675
		676	utf8::upgrade $x; # encode $x as text string
		677	utf8::downgrade $x; # encode $x as byte string
		678
		679	More options are available, see L<TYPE CASTS>, below, and the C<text_keys>
		680	and C<text_strings> options.
		681
		682	Perl doesn't define what operations up- and downgrade strings, so if the
		683	difference between byte and text is important, you should up- or downgrade
		684	your string as late as possible before encoding. You can also force the
		685	use of CBOR text strings by using C<text_keys> or C<text_strings>.
403		686
404	You can force the type to be a CBOR number by numifying it:	687	You can force the type to be a CBOR number by numifying it:
405		688
406	my $x = "3"; # some variable containing a string	689	my $x = "3"; # some variable containing a string
407	$x += 0; # numify it, ensuring it will be dumped as a number	690	$x += 0; # numify it, ensuring it will be dumped as a number
…		…
418	represent numerical values are supported, but might suffer loss of	701	represent numerical values are supported, but might suffer loss of
419	precision.	702	precision.
420		703
421	=back	704	=back
422		705
		706	=head2 TYPE CASTS
		707
		708	B<EXPERIMENTAL>: As an experimental extension, C<CBOR::XS> allows you to
		709	force specific cbor types to be used when encoding. That allows you to
		710	encode types not normally accessible (e.g. half floats) as well as force
		711	string types even when C<text_strings> is in effect.
		712
		713	Type forcing is done by calling a special "cast" function which keeps a
		714	copy of the value and returns a new value that can be handed over to any
		715	CBOR encoder function.
		716
		717	The following casts are currently available (all of which are unary operators):
		718
		719	=over
		720
		721	=item CBOR::XS::as_int $value
		722
		723	Forces the value to be encoded as some form of (basic, not bignum) integer
		724	type.
		725
		726	=item CBOR::XS::as_text $value
		727
		728	Forces the value to be encoded as (UTF-8) text values.
		729
		730	=item CBOR::XS::as_bytes $value
		731
		732	Forces the value to be encoded as a (binary) string value.
		733
		734	=item CBOR::XS::as_bool $value
		735
		736	Converts a Perl boolean (which can be any kind of scalar) into a CBOR
		737	boolean. Strictly the same, but shorter to write, than:
		738
		739	$value ? Types::Serialiser::true : Types::Serialiser::false
		740
		741	=item CBOR::XS::as_float16 $value
		742
		743	Forces half-float (IEEE 754 binary16) encoding of the given value.
		744
		745	=item CBOR::XS::as_float32 $value
		746
		747	Forces single-float (IEEE 754 binary32) encoding of the given value.
		748
		749	=item CBOR::XS::as_float64 $value
		750
		751	Forces double-float (IEEE 754 binary64) encoding of the given value.
		752
		753	=item, CBOR::XS::as_cbor $cbor_text
		754
		755	Bot a type cast per-se, this type cast forces the argument to eb encoded
		756	as-is. This can be used to embed pre-encoded CBOR data.
		757
		758	Note that no checking on the validity of the C<$cbor_text> is done - it's
		759	the callers responsibility to correctly encode values.
		760
		761	=back
		762
		763	Example: encode a perl string as binary even though C<text_strings> is in
		764	effect.
		765
		766	CBOR::XS->new->text_strings->encode ([4, "text", CBOR::XS::bytes "bytevalue"]);
		767
		768	=cut
		769
		770	sub CBOR::XS::as_cbor ($) { bless [$_[0], 0, undef], CBOR::XS::Tagged:: }
		771	sub CBOR::XS::as_int ($) { bless [$_[0], 1, undef], CBOR::XS::Tagged:: }
		772	sub CBOR::XS::as_bytes ($) { bless [$_[0], 2, undef], CBOR::XS::Tagged:: }
		773	sub CBOR::XS::as_text ($) { bless [$_[0], 3, undef], CBOR::XS::Tagged:: }
		774	sub CBOR::XS::as_float16 ($) { bless [$_[0], 4, undef], CBOR::XS::Tagged:: }
		775	sub CBOR::XS::as_float32 ($) { bless [$_[0], 5, undef], CBOR::XS::Tagged:: }
		776	sub CBOR::XS::as_float64 ($) { bless [$_[0], 6, undef], CBOR::XS::Tagged:: }
		777
		778	sub CBOR::XS::as_bool ($) { $_[0] ? $Types::Serialiser::true : $Types::Serialiser::false }
		779
423	=head2 OBJECT SERIALISATION	780	=head2 OBJECT SERIALISATION
		781
		782	This module implements both a CBOR-specific and the generic
		783	L<Types::Serialier> object serialisation protocol. The following
		784	subsections explain both methods.
		785
		786	=head3 ENCODING
424		787
425	This module knows two way to serialise a Perl object: The CBOR-specific	788	This module knows two way to serialise a Perl object: The CBOR-specific
426	way, and the generic way.	789	way, and the generic way.
427		790
428	Whenever the encoder encounters a Perl object that it cnanot serialise	791	Whenever the encoder encounters a Perl object that it cannot serialise
429	directly (most of them), it will first look up the C<TO_CBOR> method on	792	directly (most of them), it will first look up the C<TO_CBOR> method on
430	it.	793	it.
431		794
432	If it has a C<TO_CBOR> method, it will call it with the object as only	795	If it has a C<TO_CBOR> method, it will call it with the object as only
433	argument, and expects exactly one return value, which it will then	796	argument, and expects exactly one return value, which it will then
…		…
439		802
440	The C<FREEZE> method can return any number of values (i.e. zero or	803	The C<FREEZE> method can return any number of values (i.e. zero or
441	more). These will be encoded as CBOR perl object, together with the	804	more). These will be encoded as CBOR perl object, together with the
442	classname.	805	classname.
443		806
		807	These methods I<MUST NOT> change the data structure that is being
		808	serialised. Failure to comply to this can result in memory corruption -
		809	and worse.
		810
444	If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail	811	If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
445	with an error.	812	with an error.
446		813
		814	=head3 DECODING
		815
447	Objects encoded via C<TO_CBOR> cannot be automatically decoded, but	816	Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
448	objects encoded via C<FREEZE> can be decoded using the following protocol:	817	but objects encoded via C<FREEZE> can be decoded using the following
		818	protocol:
449		819
450	When an encoded CBOR perl object is encountered by the decoder, it will	820	When an encoded CBOR perl object is encountered by the decoder, it will
451	look up the C<THAW> method, by using the stored classname, and will fail	821	look up the C<THAW> method, by using the stored classname, and will fail
452	if the method cannot be found.	822	if the method cannot be found.
453		823
454	After the lookup it will call the C<THAW> method with the stored classname	824	After the lookup it will call the C<THAW> method with the stored classname
455	as first argument, the constant string C<CBOR> as second argument, and all	825	as first argument, the constant string C<CBOR> as second argument, and all
456	values returned by C<FREEZE> as remaining arguments.	826	values returned by C<FREEZE> as remaining arguments.
457		827
458	=head4 EXAMPLES	828	=head3 EXAMPLES
459		829
460	Here is an example C<TO_CBOR> method:	830	Here is an example C<TO_CBOR> method:
461		831
462	sub My::Object::TO_CBOR {	832	sub My::Object::TO_CBOR {
463	my ($obj) = @_;	833	my ($obj) = @_;
…		…
474		844
475	sub URI::TO_CBOR {	845	sub URI::TO_CBOR {
476	my ($self) = @_;	846	my ($self) = @_;
477	my $uri = "$self"; # stringify uri	847	my $uri = "$self"; # stringify uri
478	utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string	848	utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
479	CBOR::XS::tagged 32, "$_[0]"	849	CBOR::XS::tag 32, "$_[0]"
480	}	850	}
481		851
482	This will encode URIs as a UTF-8 string with tag 32, which indicates an	852	This will encode URIs as a UTF-8 string with tag 32, which indicates an
483	URI.	853	URI.
484		854
…		…
495	"$self" # encode url string	865	"$self" # encode url string
496	}	866	}
497		867
498	sub URI::THAW {	868	sub URI::THAW {
499	my ($class, $serialiser, $uri) = @_;	869	my ($class, $serialiser, $uri) = @_;
500
501	$class->new ($uri)	870	$class->new ($uri)
502	}	871	}
503		872
504	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For	873	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
505	example, a C<FREEZE> method that returns "type", "id" and "variant" values	874	example, a C<FREEZE> method that returns "type", "id" and "variant" values
…		…
636	additional tags (such as base64url).	1005	additional tags (such as base64url).
637		1006
638	=head2 ENFORCED TAGS	1007	=head2 ENFORCED TAGS
639		1008
640	These tags are always handled when decoding, and their handling cannot be	1009	These tags are always handled when decoding, and their handling cannot be
641	overriden by the user.	1010	overridden by the user.
642		1011
643	=over 4	1012	=over 4
644		1013
645	=item <unassigned> (perl-object, L<http://cbor.schmorp.de/perl-object>)	1014	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
646		1015
647	These tags are automatically created (and decoded) for serialisable	1016	These tags are automatically created (and decoded) for serialisable
648	objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object	1017	objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
649	serialisation protocol). See L<OBJECT SERIALISATION> for details.	1018	serialisation protocol). See L<OBJECT SERIALISATION> for details.
650		1019
651	=item <unassigned>, <unassigned> (sharable, sharedref, L <http://cbor.schmorp.de/value-sharing>)	1020	=item 28, 29 (shareable, sharedref, L<http://cbor.schmorp.de/value-sharing>)
652		1021
653	These tags are automatically decoded when encountered, resulting in	1022	These tags are automatically decoded when encountered (and they do not
		1023	result in a cyclic data structure, see C<allow_cycles>), resulting in
654	shared values in the decoded object. They are only encoded, however, when	1024	shared values in the decoded object. They are only encoded, however, when
655	C<allow_sharable> is enabled.	1025	C<allow_sharing> is enabled.
656		1026
		1027	Not all shared values can be successfully decoded: values that reference
		1028	themselves will I<currently> decode as C<undef> (this is not the same
		1029	as a reference pointing to itself, which will be represented as a value
		1030	that contains an indirect reference to itself - these will be decoded
		1031	properly).
		1032
		1033	Note that considerably more shared value data structures can be decoded
		1034	than will be encoded - currently, only values pointed to by references
		1035	will be shared, others will not. While non-reference shared values can be
		1036	generated in Perl with some effort, they were considered too unimportant
		1037	to be supported in the encoder. The decoder, however, will decode these
		1038	values as shared values.
		1039
657	=item <unassigned>, <unassigned> (stringref-namespace, stringref, L <http://cbor.schmorp.de/stringref>)	1040	=item 256, 25 (stringref-namespace, stringref, L<http://cbor.schmorp.de/stringref>)
658		1041
659	These tags are automatically decoded when encountered. They are only	1042	These tags are automatically decoded when encountered. They are only
660	encoded, however, when C<allow_stringref> is enabled.	1043	encoded, however, when C<pack_strings> is enabled.
661		1044
662	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)	1045	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
663		1046
664	This tag is automatically generated when a reference are encountered (with	1047	This tag is automatically generated when a reference are encountered (with
665	the exception of hash and array refernces). It is converted to a reference	1048	the exception of hash and array references). It is converted to a reference
666	when decoding.	1049	when decoding.
667		1050
668	=item 55799 (self-describe CBOR, RFC 7049)	1051	=item 55799 (self-describe CBOR, RFC 7049)
669		1052
670	This value is not generated on encoding (unless explicitly requested by	1053	This value is not generated on encoding (unless explicitly requested by
671	the user), and is simply ignored when decoding.	1054	the user), and is simply ignored when decoding.
672		1055
673	=back	1056	=back
674		1057
675	=head2 OPTIONAL TAGS	1058	=head2 NON-ENFORCED TAGS
676		1059
677	These tags have default filters provided when decoding. Their handling can	1060	These tags have default filters provided when decoding. Their handling can
678	be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by	1061	be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
679	providing a custom C<filter> function when decoding.	1062	providing a custom C<filter> callback when decoding.
680		1063
681	When they result in decoding into a specific Perl class, the module	1064	When they result in decoding into a specific Perl class, the module
682	usually provides a corresponding C<TO_CBOR> method as well.	1065	usually provides a corresponding C<TO_CBOR> method as well.
683		1066
684	When any of these need to load additional modules that are not part of the	1067	When any of these need to load additional modules that are not part of the
…		…
686	provide these modules. The decoding usually fails with an exception if the	1069	provide these modules. The decoding usually fails with an exception if the
687	required module cannot be loaded.	1070	required module cannot be loaded.
688		1071
689	=over 4	1072	=over 4
690		1073
		1074	=item 0, 1 (date/time string, seconds since the epoch)
		1075
		1076	These tags are decoded into L<Time::Piece> objects. The corresponding
		1077	C<Time::Piece::TO_CBOR> method always encodes into tag 1 values currently.
		1078
		1079	The L<Time::Piece> API is generally surprisingly bad, and fractional
		1080	seconds are only accidentally kept intact, so watch out. On the plus side,
		1081	the module comes with perl since 5.10, which has to count for something.
		1082
691	=item 2, 3 (positive/negative bignum)	1083	=item 2, 3 (positive/negative bignum)
692		1084
693	These tags are decoded into L<Math::BigInt> objects. The corresponding	1085	These tags are decoded into L<Math::BigInt> objects. The corresponding
694	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR	1086	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
695	integers, and others into positive/negative CBOR bignums.	1087	integers, and others into positive/negative CBOR bignums.
696		1088
697	=item 4, 5 (decimal fraction/bigfloat)	1089	=item 4, 5, 264, 265 (decimal fraction/bigfloat)
698		1090
699	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>	1091	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
700	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>	1092	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
701	encodes into a decimal fraction.	1093	encodes into a decimal fraction (either tag 4 or 264).
702		1094
703	CBOR cannot represent bigfloats with I<very> large exponents - conversion	1095	NaN and infinities are not encoded properly, as they cannot be represented
704	of such big float objects is undefined.	1096	in CBOR.
705		1097
706	Also, NaN and infinities are not encoded properly.	1098	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1099
		1100	=item 30 (rational numbers)
		1101
		1102	These tags are decoded into L<Math::BigRat> objects. The corresponding
		1103	C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator
		1104	C<1> via their numerator only, i.e., they become normal integers or
		1105	C<bignums>.
		1106
		1107	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
707		1108
708	=item 21, 22, 23 (expected later JSON conversion)	1109	=item 21, 22, 23 (expected later JSON conversion)
709		1110
710	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these	1111	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
711	tags.	1112	tags.
…		…
716	C<URI::TO_CBOR> method again results in a CBOR URI value.	1117	C<URI::TO_CBOR> method again results in a CBOR URI value.
717		1118
718	=back	1119	=back
719		1120
720	=cut	1121	=cut
721
722	our %FILTER = (
723	# 0 # rfc4287 datetime, utf-8
724	# 1 # unix timestamp, any
725
726	2 => sub { # pos bigint
727	require Math::BigInt;
728	Math::BigInt->new ("0x" . unpack "H*", pop)
729	},
730
731	3 => sub { # neg bigint
732	require Math::BigInt;
733	-Math::BigInt->new ("0x" . unpack "H*", pop)
734	},
735
736	4 => sub { # decimal fraction, array
737	require Math::BigFloat;
738	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
739	},
740
741	5 => sub { # bigfloat, array
742	require Math::BigFloat;
743	scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)
744	},
745
746	21 => sub { pop }, # expected conversion to base64url encoding
747	22 => sub { pop }, # expected conversion to base64 encoding
748	23 => sub { pop }, # expected conversion to base16 encoding
749
750	# 24 # embedded cbor, byte string
751
752	32 => sub {
753	require URI;
754	URI->new (pop)
755	},
756
757	# 33 # base64url rfc4648, utf-8
758	# 34 # base64 rfc46484, utf-8
759	# 35 # regex pcre/ecma262, utf-8
760	# 36 # mime message rfc2045, utf-8
761	);
762
763		1122
764	=head1 CBOR and JSON	1123	=head1 CBOR and JSON
765		1124
766	CBOR is supposed to implement a superset of the JSON data model, and is,	1125	CBOR is supposed to implement a superset of the JSON data model, and is,
767	with some coercion, able to represent all JSON texts (something that other	1126	with some coercion, able to represent all JSON texts (something that other
…		…
776	CBOR intact.	1135	CBOR intact.
777		1136
778		1137
779	=head1 SECURITY CONSIDERATIONS	1138	=head1 SECURITY CONSIDERATIONS
780		1139
781	When you are using CBOR in a protocol, talking to untrusted potentially	1140	Tl;dr... if you want to decode or encode CBOR from untrusted sources, you
782	hostile creatures requires relatively few measures.	1141	should start with a coder object created via C<new_safe> (which implements
		1142	the mitigations explained below):
783		1143
		1144	my $coder = CBOR::XS->new_safe;
		1145
		1146	my $data = $coder->decode ($cbor_text);
		1147	my $cbor = $coder->encode ($data);
		1148
		1149	Longer version: When you are using CBOR in a protocol, talking to
		1150	untrusted potentially hostile creatures requires some thought:
		1151
		1152	=over 4
		1153
		1154	=item Security of the CBOR decoder itself
		1155
784	First of all, your CBOR decoder should be secure, that is, should not have	1156	First and foremost, your CBOR decoder should be secure, that is, should
		1157	not have any buffer overflows or similar bugs that could potentially be
785	any buffer overflows. Obviously, this module should ensure that and I am	1158	exploited. Obviously, this module should ensure that and I am trying hard
786	trying hard on making that true, but you never know.	1159	on making that true, but you never know.
787		1160
		1161	=item CBOR::XS can invoke almost arbitrary callbacks during decoding
		1162
		1163	CBOR::XS supports object serialisation - decoding CBOR can cause calls
		1164	to I<any> C<THAW> method in I<any> package that exists in your process
		1165	(that is, CBOR::XS will not try to load modules, but any existing C<THAW>
		1166	method or function can be called, so they all have to be secure).
		1167
		1168	Less obviously, it will also invoke C<TO_CBOR> and C<FREEZE> methods -
		1169	even if all your C<THAW> methods are secure, encoding data structures from
		1170	untrusted sources can invoke those and trigger bugs in those.
		1171
		1172	So, if you are not sure about the security of all the modules you
		1173	have loaded (you shouldn't), you should disable this part using
		1174	C<forbid_objects> or using C<new_safe>.
		1175
		1176	=item CBOR can be extended with tags that call library code
		1177
		1178	CBOR can be extended with tags, and C<CBOR::XS> has a registry of
		1179	conversion functions for many existing tags that can be extended via
		1180	third-party modules (see the C<filter> method).
		1181
		1182	If you don't trust these, you should configure the "safe" filter function,
		1183	C<CBOR::XS::safe_filter> (C<new_safe> does this), which by default only
		1184	includes conversion functions that are considered "safe" by the author
		1185	(but again, they can be extended by third party modules).
		1186
		1187	Depending on your level of paranoia, you can use the "safe" filter:
		1188
		1189	$cbor->filter (\&CBOR::XS::safe_filter);
		1190
		1191	... your own filter...
		1192
		1193	$cbor->filter (sub { ... do your stuffs here ... });
		1194
		1195	... or even no filter at all, disabling all tag decoding:
		1196
		1197	$cbor->filter (sub { });
		1198
		1199	This is never a problem for encoding, as the tag mechanism only exists in
		1200	CBOR texts.
		1201
		1202	=item Resource-starving attacks: object memory usage
		1203
788	Second, you need to avoid resource-starving attacks. That means you should	1204	You need to avoid resource-starving attacks. That means you should limit
789	limit the size of CBOR data you accept, or make sure then when your	1205	the size of CBOR data you accept, or make sure then when your resources
790	resources run out, that's just fine (e.g. by using a separate process that	1206	run out, that's just fine (e.g. by using a separate process that can
791	can crash safely). The size of a CBOR string in octets is usually a good	1207	crash safely). The size of a CBOR string in octets is usually a good
792	indication of the size of the resources required to decode it into a Perl	1208	indication of the size of the resources required to decode it into a Perl
793	structure. While CBOR::XS can check the size of the CBOR text, it might be	1209	structure. While CBOR::XS can check the size of the CBOR text (using
794	too late when you already have it in memory, so you might want to check	1210	C<max_size> - done by C<new_safe>), it might be too late when you already
795	the size before you accept the string.	1211	have it in memory, so you might want to check the size before you accept
		1212	the string.
796		1213
		1214	As for encoding, it is possible to construct data structures that are
		1215	relatively small but result in large CBOR texts (for example by having an
		1216	array full of references to the same big data structure, which will all be
		1217	deep-cloned during encoding by default). This is rarely an actual issue
		1218	(and the worst case is still just running out of memory), but you can
		1219	reduce this risk by using C<allow_sharing>.
		1220
		1221	=item Resource-starving attacks: stack overflows
		1222
797	Third, CBOR::XS recurses using the C stack when decoding objects and	1223	CBOR::XS recurses using the C stack when decoding objects and arrays. The
798	arrays. The C stack is a limited resource: for instance, on my amd64	1224	C stack is a limited resource: for instance, on my amd64 machine with 8MB
799	machine with 8MB of stack size I can decode around 180k nested arrays but	1225	of stack size I can decode around 180k nested arrays but only 14k nested
800	only 14k nested CBOR objects (due to perl itself recursing deeply on croak	1226	CBOR objects (due to perl itself recursing deeply on croak to free the
801	to free the temporary). If that is exceeded, the program crashes. To be	1227	temporary). If that is exceeded, the program crashes. To be conservative,
802	conservative, the default nesting limit is set to 512. If your process	1228	the default nesting limit is set to 512. If your process has a smaller
803	has a smaller stack, you should adjust this setting accordingly with the	1229	stack, you should adjust this setting accordingly with the C<max_depth>
804	C<max_depth> method.	1230	method.
		1231
		1232	=item Resource-starving attacks: CPU en-/decoding complexity
		1233
		1234	CBOR::XS will use the L<Math::BigInt>, L<Math::BigFloat> and
		1235	L<Math::BigRat> libraries to represent encode/decode bignums. These can be
		1236	very slow (as in, centuries of CPU time) and can even crash your program
		1237	(and are generally not very trustworthy). See the next section on bignum
		1238	security for details.
		1239
		1240	=item Data breaches: leaking information in error messages
		1241
		1242	CBOR::XS might leak contents of your Perl data structures in its error
		1243	messages, so when you serialise sensitive information you might want to
		1244	make sure that exceptions thrown by CBOR::XS will not end up in front of
		1245	untrusted eyes.
		1246
		1247	=item Something else...
805		1248
806	Something else could bomb you, too, that I forgot to think of. In that	1249	Something else could bomb you, too, that I forgot to think of. In that
807	case, you get to keep the pieces. I am always open for hints, though...	1250	case, you get to keep the pieces. I am always open for hints, though...
808		1251
809	Also keep in mind that CBOR::XS might leak contents of your Perl data	1252	=back
810	structures in its error messages, so when you serialise sensitive	1253
811	information you might want to make sure that exceptions thrown by CBOR::XS	1254
812	will not end up in front of untrusted eyes.	1255	=head1 BIGNUM SECURITY CONSIDERATIONS
		1256
		1257	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
		1258	L<Math::BigFloat> that tries to encode the number in the simplest possible
		1259	way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
		1260	4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
		1261	(L<Math::BigRat>, tag 30) can also contain bignums as members.
		1262
		1263	CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
		1264	bigfloats (tags 5 and 265), but it will never generate these on its own.
		1265
		1266	Using the built-in L<Math::BigInt::Calc> support, encoding and decoding
		1267	decimal fractions is generally fast. Decoding bigints can be slow for very
		1268	big numbers (tens of thousands of digits, something that could potentially
		1269	be caught by limiting the size of CBOR texts), and decoding bigfloats or
		1270	arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades)
		1271	for large exponents (roughly 40 bit and longer).
		1272
		1273	Additionally, L<Math::BigInt> can take advantage of other bignum
		1274	libraries, such as L<Math::GMP>, which cannot handle big floats with large
		1275	exponents, and might simply abort or crash your program, due to their code
		1276	quality.
		1277
		1278	This can be a concern if you want to parse untrusted CBOR. If it is, you
		1279	might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
		1280	types. You should also disable types 5 and 265, as these can be slow even
		1281	without bigints.
		1282
		1283	Disabling bigints will also partially or fully disable types that rely on
		1284	them, e.g. rational numbers that use bignums.
		1285
813		1286
814	=head1 CBOR IMPLEMENTATION NOTES	1287	=head1 CBOR IMPLEMENTATION NOTES
815		1288
816	This section contains some random implementation notes. They do not	1289	This section contains some random implementation notes. They do not
817	describe guaranteed behaviour, but merely behaviour as-is implemented	1290	describe guaranteed behaviour, but merely behaviour as-is implemented
…		…
826	Only the double data type is supported for NV data types - when Perl uses	1299	Only the double data type is supported for NV data types - when Perl uses
827	long double to represent floating point values, they might not be encoded	1300	long double to represent floating point values, they might not be encoded
828	properly. Half precision types are accepted, but not encoded.	1301	properly. Half precision types are accepted, but not encoded.
829		1302
830	Strict mode and canonical mode are not implemented.	1303	Strict mode and canonical mode are not implemented.
		1304
		1305
		1306	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
		1307
		1308	On perls that were built without 64 bit integer support (these are rare
		1309	nowadays, even on 32 bit architectures, as all major Perl distributions
		1310	are built with 64 bit integer support), support for any kind of 64 bit
		1311	value in CBOR is very limited - most likely, these 64 bit values will
		1312	be truncated, corrupted, or otherwise not decoded correctly. This also
		1313	includes string, float, array and map sizes that are stored as 64 bit
		1314	integers.
831		1315
832		1316
833	=head1 THREADS	1317	=head1 THREADS
834		1318
835	This module is I<not> guaranteed to be thread safe and there are no	1319	This module is I<not> guaranteed to be thread safe and there are no
…		…
849	Please refrain from using rt.cpan.org or any other bug reporting	1333	Please refrain from using rt.cpan.org or any other bug reporting
850	service. I put the contact address into my modules for a reason.	1334	service. I put the contact address into my modules for a reason.
851		1335
852	=cut	1336	=cut
853		1337
		1338	# clumsy and slow hv_store-in-hash helper function
		1339	sub _hv_store {
		1340	$_[0]{$_[1]} = $_[2];
		1341	}
		1342
854	our %FILTER = (	1343	our %FILTER = (
855	# 0 # rfc4287 datetime, utf-8	1344	0 => sub { # rfc4287 datetime, utf-8
856	# 1 # unix timestamp, any	1345	require Time::Piece;
		1346	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
		1347	# from FreeBSD, which can't parse ISO 8601, RFC3339, RFC4287 or much of anything
		1348	# else either. Whats incredibe over standard strptime totally escapes me.
		1349	# doesn't do fractional times, either. sigh.
		1350	# In fact, it's all a lie, it uses whatever strptime it wants, and of course,
		1351	# they are all incompatible. The openbsd one simply ignores %z (but according to the
		1352	# docs, it would be much more incredibly flexible indeed. If it worked, that is.).
		1353	scalar eval {
		1354	my $s = $_[1];
		1355
		1356	$s =~ s/Z$/+00:00/;
		1357	$s =~ s/(\.[0-9]+)?([+-][0-9][0-9]):([0-9][0-9])$//
		1358	or die;
		1359
		1360	my $b = $1 - ($2 * 60 + $3) * 60; # fractional part + offset. hopefully
		1361	my $d = Time::Piece->strptime ($s, "%Y-%m-%dT%H:%M:%S");
		1362
		1363	Time::Piece::gmtime ($d->epoch + $b)
		1364	} \|\| die "corrupted CBOR date/time string ($_[0])";
		1365	},
		1366
		1367	1 => sub { # seconds since the epoch, possibly fractional
		1368	require Time::Piece;
		1369	scalar Time::Piece::gmtime (pop)
		1370	},
857		1371
858	2 => sub { # pos bigint	1372	2 => sub { # pos bigint
859	require Math::BigInt;	1373	require Math::BigInt;
860	Math::BigInt->new ("0x" . unpack "H*", pop)	1374	Math::BigInt->new ("0x" . unpack "H*", pop)
861	},	1375	},
…		…
868	4 => sub { # decimal fraction, array	1382	4 => sub { # decimal fraction, array
869	require Math::BigFloat;	1383	require Math::BigFloat;
870	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])	1384	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
871	},	1385	},
872		1386
		1387	264 => sub { # decimal fraction with arbitrary exponent
		1388	require Math::BigFloat;
		1389	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1390	},
		1391
873	5 => sub { # bigfloat, array	1392	5 => sub { # bigfloat, array
874	require Math::BigFloat;	1393	require Math::BigFloat;
875	scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2)	1394	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1395	},
		1396
		1397	265 => sub { # bigfloat with arbitrary exponent
		1398	require Math::BigFloat;
		1399	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1400	},
		1401
		1402	30 => sub { # rational number
		1403	require Math::BigRat;
		1404	Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons
876	},	1405	},
877		1406
878	21 => sub { pop }, # expected conversion to base64url encoding	1407	21 => sub { pop }, # expected conversion to base64url encoding
879	22 => sub { pop }, # expected conversion to base64 encoding	1408	22 => sub { pop }, # expected conversion to base64 encoding
880	23 => sub { pop }, # expected conversion to base16 encoding	1409	23 => sub { pop }, # expected conversion to base16 encoding
…		…
890	# 34 # base64 rfc46484, utf-8	1419	# 34 # base64 rfc46484, utf-8
891	# 35 # regex pcre/ecma262, utf-8	1420	# 35 # regex pcre/ecma262, utf-8
892	# 36 # mime message rfc2045, utf-8	1421	# 36 # mime message rfc2045, utf-8
893	);	1422	);
894		1423
895	sub CBOR::XS::default_filter {	1424	sub default_filter {
896	&{ $FILTER{$_[0]} or return }	1425	&{ $FILTER{$_[0]} or return }
		1426	}
		1427
		1428	our %SAFE_FILTER = map { $_ => $FILTER{$_} } 0, 1, 21, 22, 23, 32;
		1429
		1430	sub safe_filter {
		1431	&{ $SAFE_FILTER{$_[0]} or return }
897	}	1432	}
898		1433
899	sub URI::TO_CBOR {	1434	sub URI::TO_CBOR {
900	my $uri = $_[0]->as_string;	1435	my $uri = $_[0]->as_string;
901	utf8::upgrade $uri;	1436	utf8::upgrade $uri;
902	CBOR::XS::tag 32, $uri	1437	tag 32, $uri
903	}	1438	}
904		1439
905	sub Math::BigInt::TO_CBOR {	1440	sub Math::BigInt::TO_CBOR {
906	if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {	1441	if (-2147483648 <= $_[0] && $_[0] <= 2147483647) {
907	$_[0]->numify	1442	$_[0]->numify
908	} else {	1443	} else {
909	my $hex = substr $_[0]->as_hex, 2;	1444	my $hex = substr $_[0]->as_hex, 2;
910	$hex = "0$hex" if 1 & length $hex; # sigh	1445	$hex = "0$hex" if 1 & length $hex; # sigh
911	CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex	1446	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
912	}	1447	}
913	}	1448	}
914		1449
915	sub Math::BigFloat::TO_CBOR {	1450	sub Math::BigFloat::TO_CBOR {
916	my ($m, $e) = $_[0]->parts;	1451	my ($m, $e) = $_[0]->parts;
		1452
		1453	-9223372036854775808 <= $e && $e <= 18446744073709551615
917	CBOR::XS::tag 4, [$e->numify, $m]	1454	? tag 4, [$e->numify, $m]
		1455	: tag 264, [$e, $m]
		1456	}
		1457
		1458	sub Math::BigRat::TO_CBOR {
		1459	my ($n, $d) = $_[0]->parts;
		1460
		1461	# older versions of BigRat need *1, as they not always return numbers
		1462
		1463	$d*1 == 1
		1464	? $n*1
		1465	: tag 30, [$n1, $d1]
		1466	}
		1467
		1468	sub Time::Piece::TO_CBOR {
		1469	tag 1, 0 + $_[0]->epoch
918	}	1470	}
919		1471
920	XSLoader::load "CBOR::XS", $VERSION;	1472	XSLoader::load "CBOR::XS", $VERSION;
921		1473
922	=head1 SEE ALSO	1474	=head1 SEE ALSO

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.23 by root, Fri Nov 22 16:00:30 2013 UTC vs. Revision 1.76 by root, Tue Dec 1 01:49:47 2020 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.23 by root, Fri Nov 22 16:00:30 2013 UTC vs.
Revision 1.76 by root, Tue Dec 1 01:49:47 2020 UTC