[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.3 by root, Sat Oct 26 11:08:34 2013 UTC vs.
Revision 1.71 by root, Sun Nov 29 21:32:01 2020 UTC

…		…
12	$perl_value = decode_cbor $binary_cbor_data;	12	$perl_value = decode_cbor $binary_cbor_data;
13		13
14	# OO-interface	14	# OO-interface
15		15
16	$coder = CBOR::XS->new;	16	$coder = CBOR::XS->new;
17	#TODO	17	$binary_cbor_data = $coder->encode ($perl_value);
		18	$perl_value = $coder->decode ($binary_cbor_data);
		19
		20	# prefix decoding
		21
		22	my $many_cbor_strings = ...;
		23	while (length $many_cbor_strings) {
		24	my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
		25	# data was decoded
		26	substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
		27	}
18		28
19	=head1 DESCRIPTION	29	=head1 DESCRIPTION
20		30
21	WARNING! THIS IS A PRE-ALPHA RELEASE! IT WILL CRASH, CORRUPT YOUR DATA AND	31	This module converts Perl data structures to the Concise Binary Object
22	EAT YOUR CHILDREN!	32	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
		33	format that aims to use an (almost) superset of the JSON data model, i.e.
		34	when you can represent something useful in JSON, you should be able to
		35	represent it in CBOR.
23		36
24	This module converts Perl data structures to CBOR and vice versa. Its	37	In short, CBOR is a faster and quite compact binary alternative to JSON,
		38	with the added ability of supporting serialisation of Perl objects. (JSON
		39	often compresses better than CBOR though, so if you plan to compress the
		40	data later and speed is less important you might want to compare both
		41	formats first).
		42
25	primary goal is to be I<correct> and its secondary goal is to be	43	The primary goal of this module is to be I<correct> and the secondary goal
26	I<fast>. To reach the latter goal it was written in C.	44	is to be I<fast>. To reach the latter goal it was written in C.
		45
		46	To give you a general idea about speed, with texts in the megabyte range,
		47	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
		48	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
		49	data, the worse L<Storable> performs in comparison.
		50
		51	Regarding compactness, C<CBOR::XS>-encoded data structures are usually
		52	about 20% smaller than the same data encoded as (compact) JSON or
		53	L<Storable>.
		54
		55	In addition to the core CBOR data format, this module implements a
		56	number of extensions, to support cyclic and shared data structures
		57	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
		58	C<pack_strings>) and scalar references (always enabled).
27		59
28	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
29	vice versa.	61	vice versa.
30		62
31	=cut	63	=cut
32		64
33	package CBOR::XS;	65	package CBOR::XS;
34		66
35	use common::sense;	67	use common::sense;
36		68
37	our $VERSION = 0.02;	69	our $VERSION = 1.71;
38	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
39		71
40	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
41		73
42	use Exporter;	74	use Exporter;
43	use XSLoader;	75	use XSLoader;
44		76
		77	use Types::Serialiser;
		78
45	our $MAGIC = "\xd9\xd9\xf7";	79	our $MAGIC = "\xd9\xd9\xf7";
46		80
47	=head1 FUNCTIONAL INTERFACE	81	=head1 FUNCTIONAL INTERFACE
48		82
49	The following convenience methods are provided by this module. They are	83	The following convenience methods are provided by this module. They are
…		…
77	strings. All boolean flags described below are by default I<disabled>.	111	strings. All boolean flags described below are by default I<disabled>.
78		112
79	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
80	be chained:	114	be chained:
81		115
82	#TODO
83	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
		117
		118	=item $cbor = new_safe CBOR::XS
		119
		120	Create a new, safe/secure CBOR::XS object. This is similar to C<new>,
		121	but configures the coder object to be safe to use with untrusted
		122	data. Currently, this is equivalent to:
		123
		124	my $cbor = CBOR::XS
		125	->new
		126	->forbid_objects
		127	->filter (\&CBOR::XS::safe_filter)
		128	->max_size (1e8);
		129
		130	But is more future proof (it is better to crash because of a change than
		131	to be exploited in other ways).
		132
		133	=cut
		134
		135	sub new_safe {
		136	CBOR::XS
		137	->new
		138	->forbid_objects
		139	->filter (\&CBOR::XS::safe_filter)
		140	->max_size (1e8)
		141	}
84		142
85	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	143	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
86		144
87	=item $max_depth = $cbor->get_max_depth	145	=item $max_depth = $cbor->get_max_depth
88		146
…		…
104		162
105	Note that nesting is implemented by recursion in C. The default value has	163	Note that nesting is implemented by recursion in C. The default value has
106	been chosen to be as large as typical operating systems allow without	164	been chosen to be as large as typical operating systems allow without
107	crashing.	165	crashing.
108		166
109	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	167	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
110		168
111	=item $cbor = $cbor->max_size ([$maximum_string_size])	169	=item $cbor = $cbor->max_size ([$maximum_string_size])
112		170
113	=item $max_size = $cbor->get_max_size	171	=item $max_size = $cbor->get_max_size
114		172
…		…
119	effect on C<encode> (yet).	177	effect on C<encode> (yet).
120		178
121	If no argument is given, the limit check will be deactivated (same as when	179	If no argument is given, the limit check will be deactivated (same as when
122	C<0> is specified).	180	C<0> is specified).
123		181
124	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	182	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
		183
		184	=item $cbor = $cbor->allow_unknown ([$enable])
		185
		186	=item $enabled = $cbor->get_allow_unknown
		187
		188	If C<$enable> is true (or missing), then C<encode> will I<not> throw an
		189	exception when it encounters values it cannot represent in CBOR (for
		190	example, filehandles) but instead will encode a CBOR C<error> value.
		191
		192	If C<$enable> is false (the default), then C<encode> will throw an
		193	exception when it encounters anything it cannot encode as CBOR.
		194
		195	This option does not affect C<decode> in any way, and it is recommended to
		196	leave it off unless you know your communications partner.
		197
		198	=item $cbor = $cbor->allow_sharing ([$enable])
		199
		200	=item $enabled = $cbor->get_allow_sharing
		201
		202	If C<$enable> is true (or missing), then C<encode> will not double-encode
		203	values that have been referenced before (e.g. when the same object, such
		204	as an array, is referenced multiple times), but instead will emit a
		205	reference to the earlier value.
		206
		207	This means that such values will only be encoded once, and will not result
		208	in a deep cloning of the value on decode, in decoders supporting the value
		209	sharing extension. This also makes it possible to encode cyclic data
		210	structures (which need C<allow_cycles> to be enabled to be decoded by this
		211	module).
		212
		213	It is recommended to leave it off unless you know your
		214	communication partner supports the value sharing extensions to CBOR
		215	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
		216	resulting data structure might be unusable.
		217
		218	Detecting shared values incurs a runtime overhead when values are encoded
		219	that have a reference counter large than one, and might unnecessarily
		220	increase the encoded size, as potentially shared values are encoded as
		221	shareable whether or not they are actually shared.
		222
		223	At the moment, only targets of references can be shared (e.g. scalars,
		224	arrays or hashes pointed to by a reference). Weirder constructs, such as
		225	an array with multiple "copies" of the I<same> string, which are hard but
		226	not impossible to create in Perl, are not supported (this is the same as
		227	with L<Storable>).
		228
		229	If C<$enable> is false (the default), then C<encode> will encode shared
		230	data structures repeatedly, unsharing them in the process. Cyclic data
		231	structures cannot be encoded in this mode.
		232
		233	This option does not affect C<decode> in any way - shared values and
		234	references will always be decoded properly if present.
		235
		236	=item $cbor = $cbor->allow_cycles ([$enable])
		237
		238	=item $enabled = $cbor->get_allow_cycles
		239
		240	If C<$enable> is true (or missing), then C<decode> will happily decode
		241	self-referential (cyclic) data structures. By default these will not be
		242	decoded, as they need manual cleanup to avoid memory leaks, so code that
		243	isn't prepared for this will not leak memory.
		244
		245	If C<$enable> is false (the default), then C<decode> will throw an error
		246	when it encounters a self-referential/cyclic data structure.
		247
		248	FUTURE DIRECTION: the motivation behind this option is to avoid I<real>
		249	cycles - future versions of this module might chose to decode cyclic data
		250	structures using weak references when this option is off, instead of
		251	throwing an error.
		252
		253	This option does not affect C<encode> in any way - shared values and
		254	references will always be encoded properly if present.
		255
		256	=item $cbor = $cbor->forbid_objects ([$enable])
		257
		258	=item $enabled = $cbor->get_forbid_objects
		259
		260	Disables the use of the object serialiser protocol.
		261
		262	If C<$enable> is true (or missing), then C<encode> will will throw an
		263	exception when it encounters perl objects that would be encoded using the
		264	perl-object tag (26). When C<decode> encounters such tags, it will fall
		265	back to the general filter/tagged logic as if this were an unknown tag (by
		266	default resulting in a C<CBOR::XC::Tagged> object).
		267
		268	If C<$enable> is false (the default), then C<encode> will use the
		269	L<Types::Serialiser> object serialisation protocol to serialise objects
		270	into perl-object tags, and C<decode> will do the same to decode such tags.
		271
		272	See L<SECURITY CONSIDERATIONS>, below, for more info on why forbidding this
		273	protocol can be useful.
		274
		275	=item $cbor = $cbor->pack_strings ([$enable])
		276
		277	=item $enabled = $cbor->get_pack_strings
		278
		279	If C<$enable> is true (or missing), then C<encode> will try not to encode
		280	the same string twice, but will instead encode a reference to the string
		281	instead. Depending on your data format, this can save a lot of space, but
		282	also results in a very large runtime overhead (expect encoding times to be
		283	2-4 times as high as without).
		284
		285	It is recommended to leave it off unless you know your
		286	communications partner supports the stringref extension to CBOR
		287	(L<http://cbor.schmorp.de/stringref>), as without decoder support, the
		288	resulting data structure might not be usable.
		289
		290	If C<$enable> is false (the default), then C<encode> will encode strings
		291	the standard CBOR way.
		292
		293	This option does not affect C<decode> in any way - string references will
		294	always be decoded properly if present.
		295
		296	=item $cbor = $cbor->text_keys ([$enable])
		297
		298	=item $enabled = $cbor->get_text_keys
		299
		300	If C<$enabled> is true (or missing), then C<encode> will encode all
		301	perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
		302
		303	If C<$enable> is false (the default), then C<encode> will encode hash keys
		304	normally - upgraded perl strings (strings internally encoded as UTF-8) as
		305	CBOR text strings, and downgraded perl strings as CBOR byte strings.
		306
		307	This option does not affect C<decode> in any way.
		308
		309	This option is useful for interoperability with CBOR decoders that don't
		310	treat byte strings as a form of text. It is especially useful as Perl
		311	gives very little control over hash keys.
		312
		313	Enabling this option can be slow, as all downgraded hash keys that are
		314	encoded need to be scanned and converted to UTF-8.
		315
		316	=item $cbor = $cbor->text_strings ([$enable])
		317
		318	=item $enabled = $cbor->get_text_strings
		319
		320	This option works similar to C<text_keys>, above, but works on all strings
		321	(including hash keys), so C<text_keys> has no further effect after
		322	enabling C<text_strings>.
		323
		324	If C<$enabled> is true (or missing), then C<encode> will encode all perl
		325	strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
		326
		327	If C<$enable> is false (the default), then C<encode> will encode strings
		328	normally (but see C<text_keys>) - upgraded perl strings (strings
		329	internally encoded as UTF-8) as CBOR text strings, and downgraded perl
		330	strings as CBOR byte strings.
		331
		332	This option does not affect C<decode> in any way.
		333
		334	This option has similar advantages and disadvantages as C<text_keys>. In
		335	addition, this option effectively removes the ability to automatically
		336	encode byte strings, which might break some C<FREEZE> and C<TO_CBOR>
		337	methods that rely on this.
		338
		339	A workaround is to use explicit type casts, which are unaffected by this option.
		340
		341	=item $cbor = $cbor->validate_utf8 ([$enable])
		342
		343	=item $enabled = $cbor->get_validate_utf8
		344
		345	If C<$enable> is true (or missing), then C<decode> will validate that
		346	elements (text strings) containing UTF-8 data in fact contain valid UTF-8
		347	data (instead of blindly accepting it). This validation obviously takes
		348	extra time during decoding.
		349
		350	The concept of "valid UTF-8" used is perl's concept, which is a superset
		351	of the official UTF-8.
		352
		353	If C<$enable> is false (the default), then C<decode> will blindly accept
		354	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
		355	regardless of whether that's true or not.
		356
		357	Perl isn't too happy about corrupted UTF-8 in strings, but should
		358	generally not crash or do similarly evil things. Extensions might be not
		359	so forgiving, so it's recommended to turn on this setting if you receive
		360	untrusted CBOR.
		361
		362	This option does not affect C<encode> in any way - strings that are
		363	supposedly valid UTF-8 will simply be dumped into the resulting CBOR
		364	string without checking whether that is, in fact, true or not.
		365
		366	=item $cbor = $cbor->filter ([$cb->($tag, $value)])
		367
		368	=item $cb_or_undef = $cbor->get_filter
		369
		370	Sets or replaces the tagged value decoding filter (when C<$cb> is
		371	specified) or clears the filter (if no argument or C<undef> is provided).
		372
		373	The filter callback is called only during decoding, when a non-enforced
		374	tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
		375	list of enforced tags). For specific tags, it's often better to provide a
		376	default converter using the C<%CBOR::XS::FILTER> hash (see below).
		377
		378	The first argument is the numerical tag, the second is the (decoded) value
		379	that has been tagged.
		380
		381	The filter function should return either exactly one value, which will
		382	replace the tagged value in the decoded data structure, or no values,
		383	which will result in default handling, which currently means the decoder
		384	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
		385
		386	When the filter is cleared (the default state), the default filter
		387	function, C<CBOR::XS::default_filter>, is used. This function simply
		388	looks up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists
		389	it must be a code reference that is called with tag and value, and is
		390	responsible for decoding the value. If no entry exists, it returns no
		391	values. C<CBOR::XS> provides a number of default filter functions already,
		392	the the C<%CBOR::XS::FILTER> hash can be freely extended with more.
		393
		394	C<CBOR::XS> additionally provides an alternative filter function that is
		395	supposed to be safe to use with untrusted data (which the default filter
		396	might not), called C<CBOR::XS::safe_filter>, which works the same as
		397	the C<default_filter> but uses the C<%CBOR::XS::SAFE_FILTER> variable
		398	instead. It is prepopulated with the tag decoding functions that are
		399	deemed safe (basically the same as C<%CBOR::XS::FILTER> without all
		400	the bignum tags), and can be extended by user code as wlel, although,
		401	obviously, one should be very careful about adding decoding functions
		402	here, since the expectation is that they are safe to use on untrusted
		403	data, after all.
		404
		405	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
		406	objects, with no other special handling (useful when working with
		407	potentially "unsafe" CBOR data).
		408
		409	CBOR::XS->new->filter (sub { })->decode ($cbor_data);
		410
		411	Example: provide a global filter for tag 1347375694, converting the value
		412	into some string form.
		413
		414	$CBOR::XS::FILTER{1347375694} = sub {
		415	my ($tag, $value);
		416
		417	"tag 1347375694 value $value"
		418	};
		419
		420	Example: provide your own filter function that looks up tags in your own
		421	hash:
		422
		423	my %my_filter = (
		424	998347484 => sub {
		425	my ($tag, $value);
		426
		427	"tag 998347484 value $value"
		428	};
		429	);
		430
		431	my $coder = CBOR::XS->new->filter (sub {
		432	&{ $my_filter{$_[0]} or return }
		433	});
		434
		435
		436	Example: use the safe filter function (see L<SECURITY CONSIDERATIONS> for
		437	more considerations on security).
		438
		439	CBOR::XS->new->filter (\&CBOR::XS::safe_filter)->decode ($cbor_data);
125		440
126	=item $cbor_data = $cbor->encode ($perl_scalar)	441	=item $cbor_data = $cbor->encode ($perl_scalar)
127		442
128	Converts the given Perl data structure (a scalar value) to its CBOR	443	Converts the given Perl data structure (a scalar value) to its CBOR
129	representation.	444	representation.
…		…
139	when there is trailing garbage after the CBOR string, it will silently	454	when there is trailing garbage after the CBOR string, it will silently
140	stop parsing there and return the number of characters consumed so far.	455	stop parsing there and return the number of characters consumed so far.
141		456
142	This is useful if your CBOR texts are not delimited by an outer protocol	457	This is useful if your CBOR texts are not delimited by an outer protocol
143	and you need to know where the first CBOR string ends amd the next one	458	and you need to know where the first CBOR string ends amd the next one
144	starts.	459	starts - CBOR strings are self-delimited, so it is possible to concatenate
		460	CBOR strings without any delimiters or size fields and recover their data.
145		461
146	CBOR::XS->new->decode_prefix ("......")	462	CBOR::XS->new->decode_prefix ("......")
147	=> ("...", 3)	463	=> ("...", 3)
		464
		465	=back
		466
		467	=head2 INCREMENTAL PARSING
		468
		469	In some cases, there is the need for incremental parsing of JSON
		470	texts. While this module always has to keep both CBOR text and resulting
		471	Perl data structure in memory at one time, it does allow you to parse a
		472	CBOR stream incrementally, using a similar to using "decode_prefix" to see
		473	if a full CBOR object is available, but is much more efficient.
		474
		475	It basically works by parsing as much of a CBOR string as possible - if
		476	the CBOR data is not complete yet, the pasrer will remember where it was,
		477	to be able to restart when more data has been accumulated. Once enough
		478	data is available to either decode a complete CBOR value or raise an
		479	error, a real decode will be attempted.
		480
		481	A typical use case would be a network protocol that consists of sending
		482	and receiving CBOR-encoded messages. The solution that works with CBOR and
		483	about anything else is by prepending a length to every CBOR value, so the
		484	receiver knows how many octets to read. More compact (and slightly slower)
		485	would be to just send CBOR values back-to-back, as C<CBOR::XS> knows where
		486	a CBOR value ends, and doesn't need an explicit length.
		487
		488	The following methods help with this:
		489
		490	=over 4
		491
		492	=item @decoded = $cbor->incr_parse ($buffer)
		493
		494	This method attempts to decode exactly one CBOR value from the beginning
		495	of the given C<$buffer>. The value is removed from the C<$buffer> on
		496	success. When C<$buffer> doesn't contain a complete value yet, it returns
		497	nothing. Finally, when the C<$buffer> doesn't start with something
		498	that could ever be a valid CBOR value, it raises an exception, just as
		499	C<decode> would. In the latter case the decoder state is undefined and
		500	must be reset before being able to parse further.
		501
		502	This method modifies the C<$buffer> in place. When no CBOR value can be
		503	decoded, the decoder stores the current string offset. On the next call,
		504	continues decoding at the place where it stopped before. For this to make
		505	sense, the C<$buffer> must begin with the same octets as on previous
		506	unsuccessful calls.
		507
		508	You can call this method in scalar context, in which case it either
		509	returns a decoded value or C<undef>. This makes it impossible to
		510	distinguish between CBOR null values (which decode to C<undef>) and an
		511	unsuccessful decode, which is often acceptable.
		512
		513	=item @decoded = $cbor->incr_parse_multiple ($buffer)
		514
		515	Same as C<incr_parse>, but attempts to decode as many CBOR values as
		516	possible in one go, instead of at most one. Calls to C<incr_parse> and
		517	C<incr_parse_multiple> can be interleaved.
		518
		519	=item $cbor->incr_reset
		520
		521	Resets the incremental decoder. This throws away any saved state, so that
		522	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
		523	a new CBOR value from the beginning of the C<$buffer> again.
		524
		525	This method can be called at any time, but it I<must> be called if you want
		526	to change your C<$buffer> or there was a decoding error and you want to
		527	reuse the C<$cbor> object for future incremental parsings.
148		528
149	=back	529	=back
150		530
151		531
152	=head1 MAPPING	532	=head1 MAPPING
…		…
163		543
164	=head2 CBOR -> PERL	544	=head2 CBOR -> PERL
165		545
166	=over 4	546	=over 4
167		547
168	=item True, False	548	=item integers
169		549
170	These CBOR values become C<CBOR::XS::true> and C<CBOR::XS::false>,	550	CBOR integers become (numeric) perl scalars. On perls without 64 bit
		551	support, 64 bit integers will be truncated or otherwise corrupted.
		552
		553	=item byte strings
		554
		555	Byte strings will become octet strings in Perl (the Byte values 0..255
		556	will simply become characters of the same value in Perl).
		557
		558	=item UTF-8 strings
		559
		560	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
		561	decoded into proper Unicode code points. At the moment, the validity of
		562	the UTF-8 octets will not be validated - corrupt input will result in
		563	corrupted Perl strings.
		564
		565	=item arrays, maps
		566
		567	CBOR arrays and CBOR maps will be converted into references to a Perl
		568	array or hash, respectively. The keys of the map will be stringified
		569	during this process.
		570
		571	=item null
		572
		573	CBOR null becomes C<undef> in Perl.
		574
		575	=item true, false, undefined
		576
		577	These CBOR values become C<Types:Serialiser::true>,
		578	C<Types:Serialiser::false> and C<Types::Serialiser::error>,
171	respectively. They are overloaded to act almost exactly like the numbers	579	respectively. They are overloaded to act almost exactly like the numbers
172	C<1> and C<0>. You can check whether a scalar is a CBOR boolean by using	580	C<1> and C<0> (for true and false) or to throw an exception on access (for
173	the C<CBOR::XS::is_bool> function.	581	error). See the L<Types::Serialiser> manpage for details.
174		582
175	=item Null, Undefined	583	=item tagged values
176		584
177	CBOR Null and Undefined values becomes C<undef> in Perl (in the future,	585	Tagged items consists of a numeric tag and another CBOR value.
178	Undefined may raise an exception).	586
		587	See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
		588	for details on which tags are handled how.
		589
		590	=item anything else
		591
		592	Anything else (e.g. unsupported simple values) will raise a decoding
		593	error.
179		594
180	=back	595	=back
181		596
182		597
183	=head2 PERL -> CBOR	598	=head2 PERL -> CBOR
184		599
185	The mapping from Perl to CBOR is slightly more difficult, as Perl is a	600	The mapping from Perl to CBOR is slightly more difficult, as Perl is a
186	truly typeless language, so we can only guess which CBOR type is meant by	601	typeless language. That means this module can only guess which CBOR type
187	a Perl value.	602	is meant by a perl value.
188		603
189	=over 4	604	=over 4
190		605
191	=item hash references	606	=item hash references
192		607
193	Perl hash references become CBOR maps. As there is no inherent ordering	608	Perl hash references become CBOR maps. As there is no inherent ordering in
194	in hash keys (or CBOR maps), they will usually be encoded in a	609	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
195	pseudo-random order.	610	order. This order can be different each time a hash is encoded.
		611
		612	Currently, tied hashes will use the indefinite-length format, while normal
		613	hashes will use the fixed-length format.
196		614
197	=item array references	615	=item array references
198		616
199	Perl array references become CBOR arrays.	617	Perl array references become fixed-length CBOR arrays.
200		618
201	=item other references	619	=item other references
202		620
203	Other unblessed references are generally not allowed and will cause an	621	Other unblessed references will be represented using
204	exception to be thrown, except for references to the integers C<0> and	622	the indirection tag extension (tag value C<22098>,
205	C<1>, which get turned into C<False> and C<True> in CBOR.	623	L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
		624	to be able to decode these values somehow, by either "doing the right
		625	thing", decoding into a generic tagged object, simply ignoring the tag, or
		626	something else.
206		627
207	=item CBOR::XS::true, CBOR::XS::false	628	=item CBOR::XS::Tagged objects
208		629
		630	Objects of this type must be arrays consisting of a single C<[tag, value]>
		631	pair. The (numerical) tag will be encoded as a CBOR tag, the value will
		632	be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
		633	create such objects.
		634
		635	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
		636
209	These special values become CBOR True and CBOR False values,	637	These special values become CBOR true, CBOR false and CBOR undefined
210	respectively. You can also use C<\1> and C<\0> directly if you want.	638	values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
		639	if you want.
211		640
212	=item blessed objects	641	=item other blessed objects
213		642
214	Blessed objects are not directly representable in CBOR. TODO	643	Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
215	See the	644	L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
216	C<allow_blessed> and C<convert_blessed> methods on various options on	645	module, and L<OBJECT SERIALISATION> for generic object serialisation.
217	how to deal with this: basically, you can choose between throwing an
218	exception, encoding the reference as if it weren't blessed, or provide
219	your own serialiser method.
220		646
221	=item simple scalars	647	=item simple scalars
222		648
223	TODO
224	Simple Perl scalars (any scalar that is not a reference) are the most	649	Simple Perl scalars (any scalar that is not a reference) are the most
225	difficult objects to encode: CBOR::XS will encode undefined scalars as	650	difficult objects to encode: CBOR::XS will encode undefined scalars as
226	CBOR C<Null> values, scalars that have last been used in a string context	651	CBOR null values, scalars that have last been used in a string context
227	before encoding as CBOR strings, and anything else as number value:	652	before encoding as CBOR strings, and anything else as number value:
228		653
229	# dump as number	654	# dump as number
230	encode_cbor [2] # yields [2]	655	encode_cbor [2] # yields [2]
231	encode_cbor [-3.0e17] # yields [-3e+17]	656	encode_cbor [-3.0e17] # yields [-3e+17]
232	my $value = 5; encode_cbor [$value] # yields [5]	657	my $value = 5; encode_cbor [$value] # yields [5]
233		658
234	# used as string, so dump as string	659	# used as string, so dump as string (either byte or text)
235	print $value;	660	print $value;
236	encode_cbor [$value] # yields ["5"]	661	encode_cbor [$value] # yields ["5"]
237		662
238	# undef becomes null	663	# undef becomes null
239	encode_cbor [undef] # yields [null]	664	encode_cbor [undef] # yields [null]
…		…
243	my $x = 3.1; # some variable containing a number	668	my $x = 3.1; # some variable containing a number
244	"$x"; # stringified	669	"$x"; # stringified
245	$x .= ""; # another, more awkward way to stringify	670	$x .= ""; # another, more awkward way to stringify
246	print $x; # perl does it for you, too, quite often	671	print $x; # perl does it for you, too, quite often
247		672
		673	You can force whether a string is encoded as byte or text string by using
		674	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled).
		675
		676	utf8::upgrade $x; # encode $x as text string
		677	utf8::downgrade $x; # encode $x as byte string
		678
		679	More options are available, see L<TYPE CASTS>, below, and the C<text_keys>
		680	and C<text_strings> options.
		681
		682	Perl doesn't define what operations up- and downgrade strings, so if the
		683	difference between byte and text is important, you should up- or downgrade
		684	your string as late as possible before encoding. You can also force the
		685	use of CBOR text strings by using C<text_keys> or C<text_strings>.
		686
248	You can force the type to be a CBOR number by numifying it:	687	You can force the type to be a CBOR number by numifying it:
249		688
250	my $x = "3"; # some variable containing a string	689	my $x = "3"; # some variable containing a string
251	$x += 0; # numify it, ensuring it will be dumped as a number	690	$x += 0; # numify it, ensuring it will be dumped as a number
252	$x *= 1; # same thing, the choice is yours.	691	$x *= 1; # same thing, the choice is yours.
253		692
254	You can not currently force the type in other, less obscure, ways. Tell me	693	You can not currently force the type in other, less obscure, ways. Tell me
255	if you need this capability (but don't forget to explain why it's needed	694	if you need this capability (but don't forget to explain why it's needed
256	:).	695	:).
257		696
258	Note that numerical precision has the same meaning as under Perl (so	697	Perl values that seem to be integers generally use the shortest possible
259	binary to decimal conversion follows the same rules as in Perl, which	698	representation. Floating-point values will use either the IEEE single
260	can differ to other languages). Also, your perl interpreter might expose	699	format if possible without loss of precision, otherwise the IEEE double
261	extensions to the floating point numbers of your platform, such as	700	format will be used. Perls that use formats other than IEEE double to
262	infinities or NaN's - these cannot be represented in CBOR, and it is an	701	represent numerical values are supported, but might suffer loss of
263	error to pass those in.	702	precision.
264		703
265	=back	704	=back
266		705
		706	=head2 TYPE CASTS
267		707
		708	B<EXPERIMENTAL>: As an experimental extension, C<CBOR::XS> allows you to
		709	force specific cbor types to be used when encoding. That allows you to
		710	encode types not normally accessible (e.g. half floats) as well as force
		711	string types even when C<text_strings> is in effect.
		712
		713	Type forcing is done by calling a special "cast" function which keeps a
		714	copy of the value and returns a new value that can be handed over to any
		715	CBOR encoder function.
		716
		717	The following casts are currently available (all of which are unary operators):
		718
		719	=over
		720
		721	=item CBOR::XS::as_text $value
		722
		723	Forces the value to be encoded as (UTF-8) text values.
		724
		725	=item CBOR::XS::as_bytes $value
		726
		727	Forces the value to be encoded as a (binary) string value.
		728
		729	=item CBOR::XS::as_float16 $value
		730
		731	Forces half-float (IEEE 754 binary16) encoding of the given value.
		732
		733	=item CBOR::XS::as_float32 $value
		734
		735	Forces single-float (IEEE 754 binary32) encoding of the given value.
		736
		737	=item CBOR::XS::as_float64 $value
		738
		739	Forces double-float (IEEE 754 binary64) encoding of the given value.
		740
		741	=item, CBOR::XS::as_cbor $cbor_text
		742
		743	Bot a type cast per-se, this type cast forces the argument to eb encoded
		744	as-is. This can be used to embed pre-encoded CBOR data.
		745
		746	Note that no checking on the validity of the C<$cbor_text> is done - it's
		747	the callers responsibility to correctly encode values.
		748
		749	=back
		750
		751	Example: encode a perl string as binary even though C<text_strings> is in
		752	effect.
		753
		754	CBOR::XS->new->text_strings->encode ([4, "text", CBOR::XS::bytes "bytevalue"]);
		755
		756	=cut
		757
		758	sub CBOR::XS::as_cbor ($) { bless [$_[0], 0, undef], CBOR::XS::Tagged:: }
		759	sub CBOR::XS::as_bytes ($) { bless [$_[0], 1, undef], CBOR::XS::Tagged:: }
		760	sub CBOR::XS::as_text ($) { bless [$_[0], 2, undef], CBOR::XS::Tagged:: }
		761	sub CBOR::XS::as_float16 ($) { bless [$_[0], 3, undef], CBOR::XS::Tagged:: }
		762	sub CBOR::XS::as_float32 ($) { bless [$_[0], 4, undef], CBOR::XS::Tagged:: }
		763	sub CBOR::XS::as_float64 ($) { bless [$_[0], 5, undef], CBOR::XS::Tagged:: }
		764
		765	=head2 OBJECT SERIALISATION
		766
		767	This module implements both a CBOR-specific and the generic
		768	L<Types::Serialier> object serialisation protocol. The following
		769	subsections explain both methods.
		770
		771	=head3 ENCODING
		772
		773	This module knows two way to serialise a Perl object: The CBOR-specific
		774	way, and the generic way.
		775
		776	Whenever the encoder encounters a Perl object that it cannot serialise
		777	directly (most of them), it will first look up the C<TO_CBOR> method on
		778	it.
		779
		780	If it has a C<TO_CBOR> method, it will call it with the object as only
		781	argument, and expects exactly one return value, which it will then
		782	substitute and encode it in the place of the object.
		783
		784	Otherwise, it will look up the C<FREEZE> method. If it exists, it will
		785	call it with the object as first argument, and the constant string C<CBOR>
		786	as the second argument, to distinguish it from other serialisers.
		787
		788	The C<FREEZE> method can return any number of values (i.e. zero or
		789	more). These will be encoded as CBOR perl object, together with the
		790	classname.
		791
		792	These methods I<MUST NOT> change the data structure that is being
		793	serialised. Failure to comply to this can result in memory corruption -
		794	and worse.
		795
		796	If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
		797	with an error.
		798
		799	=head3 DECODING
		800
		801	Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
		802	but objects encoded via C<FREEZE> can be decoded using the following
		803	protocol:
		804
		805	When an encoded CBOR perl object is encountered by the decoder, it will
		806	look up the C<THAW> method, by using the stored classname, and will fail
		807	if the method cannot be found.
		808
		809	After the lookup it will call the C<THAW> method with the stored classname
		810	as first argument, the constant string C<CBOR> as second argument, and all
		811	values returned by C<FREEZE> as remaining arguments.
		812
		813	=head3 EXAMPLES
		814
		815	Here is an example C<TO_CBOR> method:
		816
		817	sub My::Object::TO_CBOR {
		818	my ($obj) = @_;
		819
		820	["this is a serialised My::Object object", $obj->{id}]
		821	}
		822
		823	When a C<My::Object> is encoded to CBOR, it will instead encode a simple
		824	array with two members: a string, and the "object id". Decoding this CBOR
		825	string will yield a normal perl array reference in place of the object.
		826
		827	A more useful and practical example would be a serialisation method for
		828	the URI module. CBOR has a custom tag value for URIs, namely 32:
		829
		830	sub URI::TO_CBOR {
		831	my ($self) = @_;
		832	my $uri = "$self"; # stringify uri
		833	utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
		834	CBOR::XS::tag 32, "$_[0]"
		835	}
		836
		837	This will encode URIs as a UTF-8 string with tag 32, which indicates an
		838	URI.
		839
		840	Decoding such an URI will not (currently) give you an URI object, but
		841	instead a CBOR::XS::Tagged object with tag number 32 and the string -
		842	exactly what was returned by C<TO_CBOR>.
		843
		844	To serialise an object so it can automatically be deserialised, you need
		845	to use C<FREEZE> and C<THAW>. To take the URI module as example, this
		846	would be a possible implementation:
		847
		848	sub URI::FREEZE {
		849	my ($self, $serialiser) = @_;
		850	"$self" # encode url string
		851	}
		852
		853	sub URI::THAW {
		854	my ($class, $serialiser, $uri) = @_;
		855	$class->new ($uri)
		856	}
		857
		858	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
		859	example, a C<FREEZE> method that returns "type", "id" and "variant" values
		860	would cause an invocation of C<THAW> with 5 arguments:
		861
		862	sub My::Object::FREEZE {
		863	my ($self, $serialiser) = @_;
		864
		865	($self->{type}, $self->{id}, $self->{variant})
		866	}
		867
		868	sub My::Object::THAW {
		869	my ($class, $serialiser, $type, $id, $variant) = @_;
		870
		871	$class-<new (type => $type, id => $id, variant => $variant)
		872	}
		873
		874
268	=head2 MAGIC HEADER	875	=head1 MAGIC HEADER
269		876
270	There is no way to distinguish CBOR from other formats	877	There is no way to distinguish CBOR from other formats
271	programmatically. To make it easier to distinguish CBOR from other	878	programmatically. To make it easier to distinguish CBOR from other
272	formats, the CBOR specification has a special "magic string" that can be	879	formats, the CBOR specification has a special "magic string" that can be
273	prepended to any CBOR string without changing it's meaning.	880	prepended to any CBOR string without changing its meaning.
274		881
275	This string is available as C<$CBOR::XS::MAGIC>. This module does not	882	This string is available as C<$CBOR::XS::MAGIC>. This module does not
276	prepend this string tot he CBOR data it generates, but it will ignroe it	883	prepend this string to the CBOR data it generates, but it will ignore it
277	if present, so users can prepend this string as a "file type" indicator as	884	if present, so users can prepend this string as a "file type" indicator as
278	required.	885	required.
279		886
280		887
		888	=head1 THE CBOR::XS::Tagged CLASS
		889
		890	CBOR has the concept of tagged values - any CBOR value can be tagged with
		891	a numeric 64 bit number, which are centrally administered.
		892
		893	C<CBOR::XS> handles a few tags internally when en- or decoding. You can
		894	also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
		895	decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
		896	unknown tag.
		897
		898	These objects are simply blessed array references - the first member of
		899	the array being the numerical tag, the second being the value.
		900
		901	You can interact with C<CBOR::XS::Tagged> objects in the following ways:
		902
		903	=over 4
		904
		905	=item $tagged = CBOR::XS::tag $tag, $value
		906
		907	This function(!) creates a new C<CBOR::XS::Tagged> object using the given
		908	C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
		909	value that can be encoded in CBOR, including serialisable Perl objects and
		910	C<CBOR::XS::Tagged> objects).
		911
		912	=item $tagged->[0]
		913
		914	=item $tagged->[0] = $new_tag
		915
		916	=item $tag = $tagged->tag
		917
		918	=item $new_tag = $tagged->tag ($new_tag)
		919
		920	Access/mutate the tag.
		921
		922	=item $tagged->[1]
		923
		924	=item $tagged->[1] = $new_value
		925
		926	=item $value = $tagged->value
		927
		928	=item $new_value = $tagged->value ($new_value)
		929
		930	Access/mutate the tagged value.
		931
		932	=back
		933
		934	=cut
		935
		936	sub tag($$) {
		937	bless [@_], CBOR::XS::Tagged::;
		938	}
		939
		940	sub CBOR::XS::Tagged::tag {
		941	$_[0][0] = $_[1] if $#_;
		942	$_[0][0]
		943	}
		944
		945	sub CBOR::XS::Tagged::value {
		946	$_[0][1] = $_[1] if $#_;
		947	$_[0][1]
		948	}
		949
		950	=head2 EXAMPLES
		951
		952	Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
		953
		954	You can look up CBOR tag value and emanings in the IANA registry at
		955	L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
		956
		957	Prepend a magic header (C<$CBOR::XS::MAGIC>):
		958
		959	my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
		960	# same as:
		961	my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
		962
		963	Serialise some URIs and a regex in an array:
		964
		965	my $cbor = encode_cbor [
		966	(CBOR::XS::tag 32, "http://www.nethype.de/"),
		967	(CBOR::XS::tag 32, "http://software.schmorp.de/"),
		968	(CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
		969	];
		970
		971	Wrap CBOR data in CBOR:
		972
		973	my $cbor_cbor = encode_cbor
		974	CBOR::XS::tag 24,
		975	encode_cbor [1, 2, 3];
		976
		977	=head1 TAG HANDLING AND EXTENSIONS
		978
		979	This section describes how this module handles specific tagged values
		980	and extensions. If a tag is not mentioned here and no additional filters
		981	are provided for it, then the default handling applies (creating a
		982	CBOR::XS::Tagged object on decoding, and only encoding the tag when
		983	explicitly requested).
		984
		985	Tags not handled specifically are currently converted into a
		986	L<CBOR::XS::Tagged> object, which is simply a blessed array reference
		987	consisting of the numeric tag value followed by the (decoded) CBOR value.
		988
		989	Future versions of this module reserve the right to special case
		990	additional tags (such as base64url).
		991
		992	=head2 ENFORCED TAGS
		993
		994	These tags are always handled when decoding, and their handling cannot be
		995	overridden by the user.
		996
		997	=over 4
		998
		999	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
		1000
		1001	These tags are automatically created (and decoded) for serialisable
		1002	objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
		1003	serialisation protocol). See L<OBJECT SERIALISATION> for details.
		1004
		1005	=item 28, 29 (shareable, sharedref, L<http://cbor.schmorp.de/value-sharing>)
		1006
		1007	These tags are automatically decoded when encountered (and they do not
		1008	result in a cyclic data structure, see C<allow_cycles>), resulting in
		1009	shared values in the decoded object. They are only encoded, however, when
		1010	C<allow_sharing> is enabled.
		1011
		1012	Not all shared values can be successfully decoded: values that reference
		1013	themselves will I<currently> decode as C<undef> (this is not the same
		1014	as a reference pointing to itself, which will be represented as a value
		1015	that contains an indirect reference to itself - these will be decoded
		1016	properly).
		1017
		1018	Note that considerably more shared value data structures can be decoded
		1019	than will be encoded - currently, only values pointed to by references
		1020	will be shared, others will not. While non-reference shared values can be
		1021	generated in Perl with some effort, they were considered too unimportant
		1022	to be supported in the encoder. The decoder, however, will decode these
		1023	values as shared values.
		1024
		1025	=item 256, 25 (stringref-namespace, stringref, L<http://cbor.schmorp.de/stringref>)
		1026
		1027	These tags are automatically decoded when encountered. They are only
		1028	encoded, however, when C<pack_strings> is enabled.
		1029
		1030	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
		1031
		1032	This tag is automatically generated when a reference are encountered (with
		1033	the exception of hash and array references). It is converted to a reference
		1034	when decoding.
		1035
		1036	=item 55799 (self-describe CBOR, RFC 7049)
		1037
		1038	This value is not generated on encoding (unless explicitly requested by
		1039	the user), and is simply ignored when decoding.
		1040
		1041	=back
		1042
		1043	=head2 NON-ENFORCED TAGS
		1044
		1045	These tags have default filters provided when decoding. Their handling can
		1046	be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
		1047	providing a custom C<filter> callback when decoding.
		1048
		1049	When they result in decoding into a specific Perl class, the module
		1050	usually provides a corresponding C<TO_CBOR> method as well.
		1051
		1052	When any of these need to load additional modules that are not part of the
		1053	perl core distribution (e.g. L<URI>), it is (currently) up to the user to
		1054	provide these modules. The decoding usually fails with an exception if the
		1055	required module cannot be loaded.
		1056
		1057	=over 4
		1058
		1059	=item 0, 1 (date/time string, seconds since the epoch)
		1060
		1061	These tags are decoded into L<Time::Piece> objects. The corresponding
		1062	C<Time::Piece::TO_CBOR> method always encodes into tag 1 values currently.
		1063
		1064	The L<Time::Piece> API is generally surprisingly bad, and fractional
		1065	seconds are only accidentally kept intact, so watch out. On the plus side,
		1066	the module comes with perl since 5.10, which has to count for something.
		1067
		1068	=item 2, 3 (positive/negative bignum)
		1069
		1070	These tags are decoded into L<Math::BigInt> objects. The corresponding
		1071	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
		1072	integers, and others into positive/negative CBOR bignums.
		1073
		1074	=item 4, 5, 264, 265 (decimal fraction/bigfloat)
		1075
		1076	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
		1077	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
		1078	encodes into a decimal fraction (either tag 4 or 264).
		1079
		1080	NaN and infinities are not encoded properly, as they cannot be represented
		1081	in CBOR.
		1082
		1083	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1084
		1085	=item 30 (rational numbers)
		1086
		1087	These tags are decoded into L<Math::BigRat> objects. The corresponding
		1088	C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator
		1089	C<1> via their numerator only, i.e., they become normal integers or
		1090	C<bignums>.
		1091
		1092	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1093
		1094	=item 21, 22, 23 (expected later JSON conversion)
		1095
		1096	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
		1097	tags.
		1098
		1099	=item 32 (URI)
		1100
		1101	These objects decode into L<URI> objects. The corresponding
		1102	C<URI::TO_CBOR> method again results in a CBOR URI value.
		1103
		1104	=back
		1105
		1106	=cut
		1107
281	=head2 CBOR and JSON	1108	=head1 CBOR and JSON
282		1109
283	TODO	1110	CBOR is supposed to implement a superset of the JSON data model, and is,
		1111	with some coercion, able to represent all JSON texts (something that other
		1112	"binary JSON" formats such as BSON generally do not support).
		1113
		1114	CBOR implements some extra hints and support for JSON interoperability,
		1115	and the spec offers further guidance for conversion between CBOR and
		1116	JSON. None of this is currently implemented in CBOR, and the guidelines
		1117	in the spec do not result in correct round-tripping of data. If JSON
		1118	interoperability is improved in the future, then the goal will be to
		1119	ensure that decoded JSON data will round-trip encoding and decoding to
		1120	CBOR intact.
284		1121
285		1122
286	=head1 SECURITY CONSIDERATIONS	1123	=head1 SECURITY CONSIDERATIONS
287		1124
288	When you are using CBOR in a protocol, talking to untrusted potentially	1125	Tl;dr... if you want to decode or encode CBOR from untrusted sources, you
289	hostile creatures requires relatively few measures.	1126	should start with a coder object created via C<new_safe> (which implements
		1127	the mitigations explained below):
290		1128
		1129	my $coder = CBOR::XS->new_safe;
		1130
		1131	my $data = $coder->decode ($cbor_text);
		1132	my $cbor = $coder->encode ($data);
		1133
		1134	Longer version: When you are using CBOR in a protocol, talking to
		1135	untrusted potentially hostile creatures requires some thought:
		1136
		1137	=over 4
		1138
		1139	=item Security of the CBOR decoder itself
		1140
291	First of all, your CBOR decoder should be secure, that is, should not have	1141	First and foremost, your CBOR decoder should be secure, that is, should
		1142	not have any buffer overflows or similar bugs that could potentially be
292	any buffer overflows. Obviously, this module should ensure that and I am	1143	exploited. Obviously, this module should ensure that and I am trying hard
293	trying hard on making that true, but you never know.	1144	on making that true, but you never know.
294		1145
		1146	=item CBOR::XS can invoke almost arbitrary callbacks during decoding
		1147
		1148	CBOR::XS supports object serialisation - decoding CBOR can cause calls
		1149	to I<any> C<THAW> method in I<any> package that exists in your process
		1150	(that is, CBOR::XS will not try to load modules, but any existing C<THAW>
		1151	method or function can be called, so they all have to be secure).
		1152
		1153	Less obviously, it will also invoke C<TO_CBOR> and C<FREEZE> methods -
		1154	even if all your C<THAW> methods are secure, encoding data structures from
		1155	untrusted sources can invoke those and trigger bugs in those.
		1156
		1157	So, if you are not sure about the security of all the modules you
		1158	have loaded (you shouldn't), you should disable this part using
		1159	C<forbid_objects> or using C<new_safe>.
		1160
		1161	=item CBOR can be extended with tags that call library code
		1162
		1163	CBOR can be extended with tags, and C<CBOR::XS> has a registry of
		1164	conversion functions for many existing tags that can be extended via
		1165	third-party modules (see the C<filter> method).
		1166
		1167	If you don't trust these, you should configure the "safe" filter function,
		1168	C<CBOR::XS::safe_filter> (C<new_safe> does this), which by default only
		1169	includes conversion functions that are considered "safe" by the author
		1170	(but again, they can be extended by third party modules).
		1171
		1172	Depending on your level of paranoia, you can use the "safe" filter:
		1173
		1174	$cbor->filter (\&CBOR::XS::safe_filter);
		1175
		1176	... your own filter...
		1177
		1178	$cbor->filter (sub { ... do your stuffs here ... });
		1179
		1180	... or even no filter at all, disabling all tag decoding:
		1181
		1182	$cbor->filter (sub { });
		1183
		1184	This is never a problem for encoding, as the tag mechanism only exists in
		1185	CBOR texts.
		1186
		1187	=item Resource-starving attacks: object memory usage
		1188
295	Second, you need to avoid resource-starving attacks. That means you should	1189	You need to avoid resource-starving attacks. That means you should limit
296	limit the size of CBOR data you accept, or make sure then when your	1190	the size of CBOR data you accept, or make sure then when your resources
297	resources run out, that's just fine (e.g. by using a separate process that	1191	run out, that's just fine (e.g. by using a separate process that can
298	can crash safely). The size of a CBOR string in octets is usually a good	1192	crash safely). The size of a CBOR string in octets is usually a good
299	indication of the size of the resources required to decode it into a Perl	1193	indication of the size of the resources required to decode it into a Perl
300	structure. While CBOR::XS can check the size of the CBOR text, it might be	1194	structure. While CBOR::XS can check the size of the CBOR text (using
301	too late when you already have it in memory, so you might want to check	1195	C<max_size> - done by C<new_safe>), it might be too late when you already
302	the size before you accept the string.	1196	have it in memory, so you might want to check the size before you accept
		1197	the string.
303		1198
		1199	As for encoding, it is possible to construct data structures that are
		1200	relatively small but result in large CBOR texts (for example by having an
		1201	array full of references to the same big data structure, which will all be
		1202	deep-cloned during encoding by default). This is rarely an actual issue
		1203	(and the worst case is still just running out of memory), but you can
		1204	reduce this risk by using C<allow_sharing>.
		1205
		1206	=item Resource-starving attacks: stack overflows
		1207
304	Third, CBOR::XS recurses using the C stack when decoding objects and	1208	CBOR::XS recurses using the C stack when decoding objects and arrays. The
305	arrays. The C stack is a limited resource: for instance, on my amd64	1209	C stack is a limited resource: for instance, on my amd64 machine with 8MB
306	machine with 8MB of stack size I can decode around 180k nested arrays but	1210	of stack size I can decode around 180k nested arrays but only 14k nested
307	only 14k nested CBOR objects (due to perl itself recursing deeply on croak	1211	CBOR objects (due to perl itself recursing deeply on croak to free the
308	to free the temporary). If that is exceeded, the program crashes. To be	1212	temporary). If that is exceeded, the program crashes. To be conservative,
309	conservative, the default nesting limit is set to 512. If your process	1213	the default nesting limit is set to 512. If your process has a smaller
310	has a smaller stack, you should adjust this setting accordingly with the	1214	stack, you should adjust this setting accordingly with the C<max_depth>
311	C<max_depth> method.	1215	method.
		1216
		1217	=item Resource-starving attacks: CPU en-/decoding complexity
		1218
		1219	CBOR::XS will use the L<Math::BigInt>, L<Math::BigFloat> and
		1220	L<Math::BigRat> libraries to represent encode/decode bignums. These can be
		1221	very slow (as in, centuries of CPU time) and can even crash your program
		1222	(and are generally not very trustworthy). See the next section on bignum
		1223	security for details.
		1224
		1225	=item Data breaches: leaking information in error messages
		1226
		1227	CBOR::XS might leak contents of your Perl data structures in its error
		1228	messages, so when you serialise sensitive information you might want to
		1229	make sure that exceptions thrown by CBOR::XS will not end up in front of
		1230	untrusted eyes.
		1231
		1232	=item Something else...
312		1233
313	Something else could bomb you, too, that I forgot to think of. In that	1234	Something else could bomb you, too, that I forgot to think of. In that
314	case, you get to keep the pieces. I am always open for hints, though...	1235	case, you get to keep the pieces. I am always open for hints, though...
315		1236
316	Also keep in mind that CBOR::XS might leak contents of your Perl data	1237	=back
317	structures in its error messages, so when you serialise sensitive	1238
318	information you might want to make sure that exceptions thrown by CBOR::XS	1239
319	will not end up in front of untrusted eyes.	1240	=head1 BIGNUM SECURITY CONSIDERATIONS
		1241
		1242	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
		1243	L<Math::BigFloat> that tries to encode the number in the simplest possible
		1244	way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
		1245	4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
		1246	(L<Math::BigRat>, tag 30) can also contain bignums as members.
		1247
		1248	CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
		1249	bigfloats (tags 5 and 265), but it will never generate these on its own.
		1250
		1251	Using the built-in L<Math::BigInt::Calc> support, encoding and decoding
		1252	decimal fractions is generally fast. Decoding bigints can be slow for very
		1253	big numbers (tens of thousands of digits, something that could potentially
		1254	be caught by limiting the size of CBOR texts), and decoding bigfloats or
		1255	arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades)
		1256	for large exponents (roughly 40 bit and longer).
		1257
		1258	Additionally, L<Math::BigInt> can take advantage of other bignum
		1259	libraries, such as L<Math::GMP>, which cannot handle big floats with large
		1260	exponents, and might simply abort or crash your program, due to their code
		1261	quality.
		1262
		1263	This can be a concern if you want to parse untrusted CBOR. If it is, you
		1264	might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
		1265	types. You should also disable types 5 and 265, as these can be slow even
		1266	without bigints.
		1267
		1268	Disabling bigints will also partially or fully disable types that rely on
		1269	them, e.g. rational numbers that use bignums.
		1270
320		1271
321	=head1 CBOR IMPLEMENTATION NOTES	1272	=head1 CBOR IMPLEMENTATION NOTES
322		1273
323	This section contains some random implementation notes. They do not	1274	This section contains some random implementation notes. They do not
324	describe guaranteed behaviour, but merely behaviour as-is implemented	1275	describe guaranteed behaviour, but merely behaviour as-is implemented
…		…
333	Only the double data type is supported for NV data types - when Perl uses	1284	Only the double data type is supported for NV data types - when Perl uses
334	long double to represent floating point values, they might not be encoded	1285	long double to represent floating point values, they might not be encoded
335	properly. Half precision types are accepted, but not encoded.	1286	properly. Half precision types are accepted, but not encoded.
336		1287
337	Strict mode and canonical mode are not implemented.	1288	Strict mode and canonical mode are not implemented.
		1289
		1290
		1291	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
		1292
		1293	On perls that were built without 64 bit integer support (these are rare
		1294	nowadays, even on 32 bit architectures, as all major Perl distributions
		1295	are built with 64 bit integer support), support for any kind of 64 bit
		1296	value in CBOR is very limited - most likely, these 64 bit values will
		1297	be truncated, corrupted, or otherwise not decoded correctly. This also
		1298	includes string, float, array and map sizes that are stored as 64 bit
		1299	integers.
338		1300
339		1301
340	=head1 THREADS	1302	=head1 THREADS
341		1303
342	This module is I<not> guaranteed to be thread safe and there are no	1304	This module is I<not> guaranteed to be thread safe and there are no
…		…
356	Please refrain from using rt.cpan.org or any other bug reporting	1318	Please refrain from using rt.cpan.org or any other bug reporting
357	service. I put the contact address into my modules for a reason.	1319	service. I put the contact address into my modules for a reason.
358		1320
359	=cut	1321	=cut
360		1322
361	our $true = do { bless \(my $dummy = 1), "CBOR::XS::Boolean" };	1323	# clumsy and slow hv_store-in-hash helper function
362	our $false = do { bless \(my $dummy = 0), "CBOR::XS::Boolean" };	1324	sub _hv_store {
363		1325	$_[0]{$_[1]} = $_[2];
364	sub true() { $true }
365	sub false() { $false }
366
367	sub is_bool($) {
368	UNIVERSAL::isa $_[0], "CBOR::XS::Boolean"
369	# or UNIVERSAL::isa $_[0], "CBOR::Literal"
370	}	1326	}
371		1327
		1328	our %FILTER = (
		1329	0 => sub { # rfc4287 datetime, utf-8
		1330	require Time::Piece;
		1331	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
		1332	# from FreeBSD, which can't parse ISO 8601, RFC3339, RFC4287 or much of anything
		1333	# else either. Whats incredibe over standard strptime totally escapes me.
		1334	# doesn't do fractional times, either. sigh.
		1335	# In fact, it's all a lie, it uses whatever strptime it wants, and of course,
		1336	# they are all incompatible. The openbsd one simply ignores %z (but according to the
		1337	# docs, it would be much more incredibly flexible indeed. If it worked, that is.).
		1338	scalar eval {
		1339	my $s = $_[1];
		1340
		1341	$s =~ s/Z$/+00:00/;
		1342	$s =~ s/(\.[0-9]+)?([+-][0-9][0-9]):([0-9][0-9])$//
		1343	or die;
		1344
		1345	my $b = $1 - ($2 * 60 + $3) * 60; # fractional part + offset. hopefully
		1346	my $d = Time::Piece->strptime ($s, "%Y-%m-%dT%H:%M:%S");
		1347
		1348	Time::Piece::gmtime ($d->epoch + $b)
		1349	} \|\| die "corrupted CBOR date/time string ($_[0])";
		1350	},
		1351
		1352	1 => sub { # seconds since the epoch, possibly fractional
		1353	require Time::Piece;
		1354	scalar Time::Piece::gmtime (pop)
		1355	},
		1356
		1357	2 => sub { # pos bigint
		1358	require Math::BigInt;
		1359	Math::BigInt->new ("0x" . unpack "H*", pop)
		1360	},
		1361
		1362	3 => sub { # neg bigint
		1363	require Math::BigInt;
		1364	-Math::BigInt->new ("0x" . unpack "H*", pop)
		1365	},
		1366
		1367	4 => sub { # decimal fraction, array
		1368	require Math::BigFloat;
		1369	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1370	},
		1371
		1372	264 => sub { # decimal fraction with arbitrary exponent
		1373	require Math::BigFloat;
		1374	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1375	},
		1376
		1377	5 => sub { # bigfloat, array
		1378	require Math::BigFloat;
		1379	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1380	},
		1381
		1382	265 => sub { # bigfloat with arbitrary exponent
		1383	require Math::BigFloat;
		1384	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1385	},
		1386
		1387	30 => sub { # rational number
		1388	require Math::BigRat;
		1389	Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons
		1390	},
		1391
		1392	21 => sub { pop }, # expected conversion to base64url encoding
		1393	22 => sub { pop }, # expected conversion to base64 encoding
		1394	23 => sub { pop }, # expected conversion to base16 encoding
		1395
		1396	# 24 # embedded cbor, byte string
		1397
		1398	32 => sub {
		1399	require URI;
		1400	URI->new (pop)
		1401	},
		1402
		1403	# 33 # base64url rfc4648, utf-8
		1404	# 34 # base64 rfc46484, utf-8
		1405	# 35 # regex pcre/ecma262, utf-8
		1406	# 36 # mime message rfc2045, utf-8
		1407	);
		1408
		1409	sub default_filter {
		1410	&{ $FILTER{$_[0]} or return }
		1411	}
		1412
		1413	our %SAFE_FILTER = map { $_ => $FILTER{$_} } 0, 1, 21, 22, 23, 32;
		1414
		1415	sub safe_filter {
		1416	&{ $SAFE_FILTER{$_[0]} or return }
		1417	}
		1418
		1419	sub URI::TO_CBOR {
		1420	my $uri = $_[0]->as_string;
		1421	utf8::upgrade $uri;
		1422	tag 32, $uri
		1423	}
		1424
		1425	sub Math::BigInt::TO_CBOR {
		1426	if (-2147483648 <= $_[0] && $_[0] <= 2147483647) {
		1427	$_[0]->numify
		1428	} else {
		1429	my $hex = substr $_[0]->as_hex, 2;
		1430	$hex = "0$hex" if 1 & length $hex; # sigh
		1431	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
		1432	}
		1433	}
		1434
		1435	sub Math::BigFloat::TO_CBOR {
		1436	my ($m, $e) = $_[0]->parts;
		1437
		1438	-9223372036854775808 <= $e && $e <= 18446744073709551615
		1439	? tag 4, [$e->numify, $m]
		1440	: tag 264, [$e, $m]
		1441	}
		1442
		1443	sub Math::BigRat::TO_CBOR {
		1444	my ($n, $d) = $_[0]->parts;
		1445
		1446	# older versions of BigRat need *1, as they not always return numbers
		1447
		1448	$d*1 == 1
		1449	? $n*1
		1450	: tag 30, [$n1, $d1]
		1451	}
		1452
		1453	sub Time::Piece::TO_CBOR {
		1454	tag 1, 0 + $_[0]->epoch
		1455	}
		1456
372	XSLoader::load "CBOR::XS", $VERSION;	1457	XSLoader::load "CBOR::XS", $VERSION;
373
374	package CBOR::XS::Boolean;
375
376	use overload
377	"0+" => sub { ${$_[0]} },
378	"++" => sub { $_[0] = ${$_[0]} + 1 },
379	"--" => sub { $_[0] = ${$_[0]} - 1 },
380	fallback => 1;
381
382	1;
383		1458
384	=head1 SEE ALSO	1459	=head1 SEE ALSO
385		1460
386	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,	1461	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
387	serialisation.	1462	serialisation.
388		1463
		1464	The L<Types::Serialiser> module provides the data model for true, false
		1465	and error values.
		1466
389	=head1 AUTHOR	1467	=head1 AUTHOR
390		1468
391	Marc Lehmann <schmorp@schmorp.de>	1469	Marc Lehmann <schmorp@schmorp.de>
392	http://home.schmorp.de/	1470	http://home.schmorp.de/
393		1471
394	=cut	1472	=cut
395		1473
		1474	1
		1475

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.3 by root, Sat Oct 26 11:08:34 2013 UTC vs. Revision 1.71 by root, Sun Nov 29 21:32:01 2020 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.3 by root, Sat Oct 26 11:08:34 2013 UTC vs.
Revision 1.71 by root, Sun Nov 29 21:32:01 2020 UTC