[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.5 by root, Sat Oct 26 23:02:55 2013 UTC vs.
Revision 1.68 by root, Wed Jul 17 09:37:16 2019 UTC

…		…
12	$perl_value = decode_cbor $binary_cbor_data;	12	$perl_value = decode_cbor $binary_cbor_data;
13		13
14	# OO-interface	14	# OO-interface
15		15
16	$coder = CBOR::XS->new;	16	$coder = CBOR::XS->new;
17	#TODO	17	$binary_cbor_data = $coder->encode ($perl_value);
		18	$perl_value = $coder->decode ($binary_cbor_data);
		19
		20	# prefix decoding
		21
		22	my $many_cbor_strings = ...;
		23	while (length $many_cbor_strings) {
		24	my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
		25	# data was decoded
		26	substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
		27	}
18		28
19	=head1 DESCRIPTION	29	=head1 DESCRIPTION
20
21	WARNING! THIS IS A PRE-ALPHA RELEASE! IT WILL CRASH, CORRUPT YOUR DATA
22	AND EAT YOUR CHILDREN! (Actually, apart from being untested and a bit
23	feature-limited, it might already be useful).
24		30
25	This module converts Perl data structures to the Concise Binary Object	31	This module converts Perl data structures to the Concise Binary Object
26	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation	32	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
27	format that aims to use a superset of the JSON data model, i.e. when you	33	format that aims to use an (almost) superset of the JSON data model, i.e.
28	can represent something in JSON, you should be able to represent it in	34	when you can represent something useful in JSON, you should be able to
29	CBOR.	35	represent it in CBOR.
30		36
31	This makes it a faster and more compact binary alternative to JSON.	37	In short, CBOR is a faster and quite compact binary alternative to JSON,
		38	with the added ability of supporting serialisation of Perl objects. (JSON
		39	often compresses better than CBOR though, so if you plan to compress the
		40	data later and speed is less important you might want to compare both
		41	formats first).
32		42
33	The primary goal of this module is to be I<correct> and the secondary goal	43	The primary goal of this module is to be I<correct> and the secondary goal
34	is to be I<fast>. To reach the latter goal it was written in C.	44	is to be I<fast>. To reach the latter goal it was written in C.
35		45
		46	To give you a general idea about speed, with texts in the megabyte range,
		47	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
		48	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
		49	data, the worse L<Storable> performs in comparison.
		50
		51	Regarding compactness, C<CBOR::XS>-encoded data structures are usually
		52	about 20% smaller than the same data encoded as (compact) JSON or
		53	L<Storable>.
		54
		55	In addition to the core CBOR data format, this module implements a
		56	number of extensions, to support cyclic and shared data structures
		57	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
		58	C<pack_strings>) and scalar references (always enabled).
		59
36	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
37	vice versa.	61	vice versa.
38		62
39	=cut	63	=cut
40		64
41	package CBOR::XS;	65	package CBOR::XS;
42		66
43	use common::sense;	67	use common::sense;
44		68
45	our $VERSION = 0.03;	69	our $VERSION = 1.71;
46	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
47		71
48	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
49		73
50	use Exporter;	74	use Exporter;
51	use XSLoader;	75	use XSLoader;
52		76
		77	use Types::Serialiser;
		78
53	our $MAGIC = "\xd9\xd9\xf7";	79	our $MAGIC = "\xd9\xd9\xf7";
54		80
55	=head1 FUNCTIONAL INTERFACE	81	=head1 FUNCTIONAL INTERFACE
56		82
57	The following convenience methods are provided by this module. They are	83	The following convenience methods are provided by this module. They are
…		…
85	strings. All boolean flags described below are by default I<disabled>.	111	strings. All boolean flags described below are by default I<disabled>.
86		112
87	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
88	be chained:	114	be chained:
89		115
90	#TODO
91	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
		117
		118	=item $cbor = new_safe CBOR::XS
		119
		120	Create a new, safe/secure CBOR::XS object. This is similar to C<new>,
		121	but configures the coder object to be safe to use with untrusted
		122	data. Currently, this is equivalent to:
		123
		124	my $cbor = CBOR::XS
		125	->new
		126	->forbid_objects
		127	->filter (\&CBOR::XS::safe_filter)
		128	->max_size (1e8);
		129
		130	But is more future proof (it is better to crash because of a change than
		131	to be exploited in other ways).
		132
		133	=cut
		134
		135	sub new_safe {
		136	CBOR::XS
		137	->new
		138	->forbid_objects
		139	->filter (\&CBOR::XS::safe_filter)
		140	->max_size (1e8)
		141	}
92		142
93	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	143	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
94		144
95	=item $max_depth = $cbor->get_max_depth	145	=item $max_depth = $cbor->get_max_depth
96		146
…		…
112		162
113	Note that nesting is implemented by recursion in C. The default value has	163	Note that nesting is implemented by recursion in C. The default value has
114	been chosen to be as large as typical operating systems allow without	164	been chosen to be as large as typical operating systems allow without
115	crashing.	165	crashing.
116		166
117	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	167	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
118		168
119	=item $cbor = $cbor->max_size ([$maximum_string_size])	169	=item $cbor = $cbor->max_size ([$maximum_string_size])
120		170
121	=item $max_size = $cbor->get_max_size	171	=item $max_size = $cbor->get_max_size
122		172
…		…
127	effect on C<encode> (yet).	177	effect on C<encode> (yet).
128		178
129	If no argument is given, the limit check will be deactivated (same as when	179	If no argument is given, the limit check will be deactivated (same as when
130	C<0> is specified).	180	C<0> is specified).
131		181
132	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	182	See L<SECURITY CONSIDERATIONS>, below, for more info on why this is useful.
		183
		184	=item $cbor = $cbor->allow_unknown ([$enable])
		185
		186	=item $enabled = $cbor->get_allow_unknown
		187
		188	If C<$enable> is true (or missing), then C<encode> will I<not> throw an
		189	exception when it encounters values it cannot represent in CBOR (for
		190	example, filehandles) but instead will encode a CBOR C<error> value.
		191
		192	If C<$enable> is false (the default), then C<encode> will throw an
		193	exception when it encounters anything it cannot encode as CBOR.
		194
		195	This option does not affect C<decode> in any way, and it is recommended to
		196	leave it off unless you know your communications partner.
		197
		198	=item $cbor = $cbor->allow_sharing ([$enable])
		199
		200	=item $enabled = $cbor->get_allow_sharing
		201
		202	If C<$enable> is true (or missing), then C<encode> will not double-encode
		203	values that have been referenced before (e.g. when the same object, such
		204	as an array, is referenced multiple times), but instead will emit a
		205	reference to the earlier value.
		206
		207	This means that such values will only be encoded once, and will not result
		208	in a deep cloning of the value on decode, in decoders supporting the value
		209	sharing extension. This also makes it possible to encode cyclic data
		210	structures (which need C<allow_cycles> to be enabled to be decoded by this
		211	module).
		212
		213	It is recommended to leave it off unless you know your
		214	communication partner supports the value sharing extensions to CBOR
		215	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
		216	resulting data structure might be unusable.
		217
		218	Detecting shared values incurs a runtime overhead when values are encoded
		219	that have a reference counter large than one, and might unnecessarily
		220	increase the encoded size, as potentially shared values are encode as
		221	shareable whether or not they are actually shared.
		222
		223	At the moment, only targets of references can be shared (e.g. scalars,
		224	arrays or hashes pointed to by a reference). Weirder constructs, such as
		225	an array with multiple "copies" of the I<same> string, which are hard but
		226	not impossible to create in Perl, are not supported (this is the same as
		227	with L<Storable>).
		228
		229	If C<$enable> is false (the default), then C<encode> will encode shared
		230	data structures repeatedly, unsharing them in the process. Cyclic data
		231	structures cannot be encoded in this mode.
		232
		233	This option does not affect C<decode> in any way - shared values and
		234	references will always be decoded properly if present.
		235
		236	=item $cbor = $cbor->allow_cycles ([$enable])
		237
		238	=item $enabled = $cbor->get_allow_cycles
		239
		240	If C<$enable> is true (or missing), then C<decode> will happily decode
		241	self-referential (cyclic) data structures. By default these will not be
		242	decoded, as they need manual cleanup to avoid memory leaks, so code that
		243	isn't prepared for this will not leak memory.
		244
		245	If C<$enable> is false (the default), then C<decode> will throw an error
		246	when it encounters a self-referential/cyclic data structure.
		247
		248	FUTURE DIRECTION: the motivation behind this option is to avoid I<real>
		249	cycles - future versions of this module might chose to decode cyclic data
		250	structures using weak references when this option is off, instead of
		251	throwing an error.
		252
		253	This option does not affect C<encode> in any way - shared values and
		254	references will always be encoded properly if present.
		255
		256	=item $cbor = $cbor->forbid_objects ([$enable])
		257
		258	=item $enabled = $cbor->get_forbid_objects
		259
		260	Disables the use of the object serialiser protocol.
		261
		262	If C<$enable> is true (or missing), then C<encode> will will throw an
		263	exception when it encounters perl objects that would be encoded using the
		264	perl-object tag (26). When C<decode> encounters such tags, it will fall
		265	back to the general filter/tagged logic as if this were an unknown tag (by
		266	default resulting in a C<CBOR::XC::Tagged> object).
		267
		268	If C<$enable> is false (the default), then C<encode> will use the
		269	L<Types::Serialiser> object serialisation protocol to serialise objects
		270	into perl-object tags, and C<decode> will do the same to decode such tags.
		271
		272	See L<SECURITY CONSIDERATIONS>, below, for more info on why forbidding this
		273	protocol can be useful.
		274
		275	=item $cbor = $cbor->pack_strings ([$enable])
		276
		277	=item $enabled = $cbor->get_pack_strings
		278
		279	If C<$enable> is true (or missing), then C<encode> will try not to encode
		280	the same string twice, but will instead encode a reference to the string
		281	instead. Depending on your data format, this can save a lot of space, but
		282	also results in a very large runtime overhead (expect encoding times to be
		283	2-4 times as high as without).
		284
		285	It is recommended to leave it off unless you know your
		286	communications partner supports the stringref extension to CBOR
		287	(L<http://cbor.schmorp.de/stringref>), as without decoder support, the
		288	resulting data structure might not be usable.
		289
		290	If C<$enable> is false (the default), then C<encode> will encode strings
		291	the standard CBOR way.
		292
		293	This option does not affect C<decode> in any way - string references will
		294	always be decoded properly if present.
		295
		296	=item $cbor = $cbor->text_keys ([$enable])
		297
		298	=item $enabled = $cbor->get_text_keys
		299
		300	If C<$enabled> is true (or missing), then C<encode> will encode all
		301	perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
		302
		303	If C<$enable> is false (the default), then C<encode> will encode hash keys
		304	normally - upgraded perl strings (strings internally encoded as UTF-8) as
		305	CBOR text strings, and downgraded perl strings as CBOR byte strings.
		306
		307	This option does not affect C<decode> in any way.
		308
		309	This option is useful for interoperability with CBOR decoders that don't
		310	treat byte strings as a form of text. It is especially useful as Perl
		311	gives very little control over hash keys.
		312
		313	Enabling this option can be slow, as all downgraded hash keys that are
		314	encoded need to be scanned and converted to UTF-8.
		315
		316	=item $cbor = $cbor->text_strings ([$enable])
		317
		318	=item $enabled = $cbor->get_text_strings
		319
		320	This option works similar to C<text_keys>, above, but works on all strings
		321	(including hash keys), so C<text_keys> has no further effect after
		322	enabling C<text_strings>.
		323
		324	If C<$enabled> is true (or missing), then C<encode> will encode all perl
		325	strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
		326
		327	If C<$enable> is false (the default), then C<encode> will encode strings
		328	normally (but see C<text_keys>) - upgraded perl strings (strings
		329	internally encoded as UTF-8) as CBOR text strings, and downgraded perl
		330	strings as CBOR byte strings.
		331
		332	This option does not affect C<decode> in any way.
		333
		334	This option has similar advantages and disadvantages as C<text_keys>. In
		335	addition, this option effectively removes the ability to encode byte
		336	strings, which might break some C<FREEZE> and C<TO_CBOR> methods that rely
		337	on this, such as bignum encoding, so this option is mainly useful for very
		338	simple data.
		339
		340	=item $cbor = $cbor->validate_utf8 ([$enable])
		341
		342	=item $enabled = $cbor->get_validate_utf8
		343
		344	If C<$enable> is true (or missing), then C<decode> will validate that
		345	elements (text strings) containing UTF-8 data in fact contain valid UTF-8
		346	data (instead of blindly accepting it). This validation obviously takes
		347	extra time during decoding.
		348
		349	The concept of "valid UTF-8" used is perl's concept, which is a superset
		350	of the official UTF-8.
		351
		352	If C<$enable> is false (the default), then C<decode> will blindly accept
		353	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
		354	regardless of whether that's true or not.
		355
		356	Perl isn't too happy about corrupted UTF-8 in strings, but should
		357	generally not crash or do similarly evil things. Extensions might be not
		358	so forgiving, so it's recommended to turn on this setting if you receive
		359	untrusted CBOR.
		360
		361	This option does not affect C<encode> in any way - strings that are
		362	supposedly valid UTF-8 will simply be dumped into the resulting CBOR
		363	string without checking whether that is, in fact, true or not.
		364
		365	=item $cbor = $cbor->filter ([$cb->($tag, $value)])
		366
		367	=item $cb_or_undef = $cbor->get_filter
		368
		369	Sets or replaces the tagged value decoding filter (when C<$cb> is
		370	specified) or clears the filter (if no argument or C<undef> is provided).
		371
		372	The filter callback is called only during decoding, when a non-enforced
		373	tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
		374	list of enforced tags). For specific tags, it's often better to provide a
		375	default converter using the C<%CBOR::XS::FILTER> hash (see below).
		376
		377	The first argument is the numerical tag, the second is the (decoded) value
		378	that has been tagged.
		379
		380	The filter function should return either exactly one value, which will
		381	replace the tagged value in the decoded data structure, or no values,
		382	which will result in default handling, which currently means the decoder
		383	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
		384
		385	When the filter is cleared (the default state), the default filter
		386	function, C<CBOR::XS::default_filter>, is used. This function simply
		387	looks up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists
		388	it must be a code reference that is called with tag and value, and is
		389	responsible for decoding the value. If no entry exists, it returns no
		390	values. C<CBOR::XS> provides a number of default filter functions already,
		391	the the C<%CBOR::XS::FILTER> hash can be freely extended with more.
		392
		393	C<CBOR::XS> additionally provides an alternative filter function that is
		394	supposed to be safe to use with untrusted data (which the default filter
		395	might not), called C<CBOR::XS::safe_filter>, which works the same as
		396	the C<default_filter> but uses the C<%CBOR::XS::SAFE_FILTER> variable
		397	instead. It is prepopulated with the tag decoding functions that are
		398	deemed safe (basically the same as C<%CBOR::XS::FILTER> without all
		399	the bignum tags), and can be extended by user code as wlel, although,
		400	obviously, one should be very careful about adding decoding functions
		401	here, since the expectation is that they are safe to use on untrusted
		402	data, after all.
		403
		404	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
		405	objects, with no other special handling (useful when working with
		406	potentially "unsafe" CBOR data).
		407
		408	CBOR::XS->new->filter (sub { })->decode ($cbor_data);
		409
		410	Example: provide a global filter for tag 1347375694, converting the value
		411	into some string form.
		412
		413	$CBOR::XS::FILTER{1347375694} = sub {
		414	my ($tag, $value);
		415
		416	"tag 1347375694 value $value"
		417	};
		418
		419	Example: provide your own filter function that looks up tags in your own
		420	hash:
		421
		422	my %my_filter = (
		423	998347484 => sub {
		424	my ($tag, $value);
		425
		426	"tag 998347484 value $value"
		427	};
		428	);
		429
		430	my $coder = CBOR::XS->new->filter (sub {
		431	&{ $my_filter{$_[0]} or return }
		432	});
		433
		434
		435	Example: use the safe filter function (see L<SECURITY CONSIDERATIONS> for
		436	more considerations on security).
		437
		438	CBOR::XS->new->filter (\&CBOR::XS::safe_filter)->decode ($cbor_data);
133		439
134	=item $cbor_data = $cbor->encode ($perl_scalar)	440	=item $cbor_data = $cbor->encode ($perl_scalar)
135		441
136	Converts the given Perl data structure (a scalar value) to its CBOR	442	Converts the given Perl data structure (a scalar value) to its CBOR
137	representation.	443	representation.
…		…
151	and you need to know where the first CBOR string ends amd the next one	457	and you need to know where the first CBOR string ends amd the next one
152	starts.	458	starts.
153		459
154	CBOR::XS->new->decode_prefix ("......")	460	CBOR::XS->new->decode_prefix ("......")
155	=> ("...", 3)	461	=> ("...", 3)
		462
		463	=back
		464
		465	=head2 INCREMENTAL PARSING
		466
		467	In some cases, there is the need for incremental parsing of JSON
		468	texts. While this module always has to keep both CBOR text and resulting
		469	Perl data structure in memory at one time, it does allow you to parse a
		470	CBOR stream incrementally, using a similar to using "decode_prefix" to see
		471	if a full CBOR object is available, but is much more efficient.
		472
		473	It basically works by parsing as much of a CBOR string as possible - if
		474	the CBOR data is not complete yet, the pasrer will remember where it was,
		475	to be able to restart when more data has been accumulated. Once enough
		476	data is available to either decode a complete CBOR value or raise an
		477	error, a real decode will be attempted.
		478
		479	A typical use case would be a network protocol that consists of sending
		480	and receiving CBOR-encoded messages. The solution that works with CBOR and
		481	about anything else is by prepending a length to every CBOR value, so the
		482	receiver knows how many octets to read. More compact (and slightly slower)
		483	would be to just send CBOR values back-to-back, as C<CBOR::XS> knows where
		484	a CBOR value ends, and doesn't need an explicit length.
		485
		486	The following methods help with this:
		487
		488	=over 4
		489
		490	=item @decoded = $cbor->incr_parse ($buffer)
		491
		492	This method attempts to decode exactly one CBOR value from the beginning
		493	of the given C<$buffer>. The value is removed from the C<$buffer> on
		494	success. When C<$buffer> doesn't contain a complete value yet, it returns
		495	nothing. Finally, when the C<$buffer> doesn't start with something
		496	that could ever be a valid CBOR value, it raises an exception, just as
		497	C<decode> would. In the latter case the decoder state is undefined and
		498	must be reset before being able to parse further.
		499
		500	This method modifies the C<$buffer> in place. When no CBOR value can be
		501	decoded, the decoder stores the current string offset. On the next call,
		502	continues decoding at the place where it stopped before. For this to make
		503	sense, the C<$buffer> must begin with the same octets as on previous
		504	unsuccessful calls.
		505
		506	You can call this method in scalar context, in which case it either
		507	returns a decoded value or C<undef>. This makes it impossible to
		508	distinguish between CBOR null values (which decode to C<undef>) and an
		509	unsuccessful decode, which is often acceptable.
		510
		511	=item @decoded = $cbor->incr_parse_multiple ($buffer)
		512
		513	Same as C<incr_parse>, but attempts to decode as many CBOR values as
		514	possible in one go, instead of at most one. Calls to C<incr_parse> and
		515	C<incr_parse_multiple> can be interleaved.
		516
		517	=item $cbor->incr_reset
		518
		519	Resets the incremental decoder. This throws away any saved state, so that
		520	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
		521	a new CBOR value from the beginning of the C<$buffer> again.
		522
		523	This method can be called at any time, but it I<must> be called if you want
		524	to change your C<$buffer> or there was a decoding error and you want to
		525	reuse the C<$cbor> object for future incremental parsings.
156		526
157	=back	527	=back
158		528
159		529
160	=head1 MAPPING	530	=head1 MAPPING
…		…
178	CBOR integers become (numeric) perl scalars. On perls without 64 bit	548	CBOR integers become (numeric) perl scalars. On perls without 64 bit
179	support, 64 bit integers will be truncated or otherwise corrupted.	549	support, 64 bit integers will be truncated or otherwise corrupted.
180		550
181	=item byte strings	551	=item byte strings
182		552
183	Byte strings will become octet strings in Perl (the byte values 0..255	553	Byte strings will become octet strings in Perl (the Byte values 0..255
184	will simply become characters of the same value in Perl).	554	will simply become characters of the same value in Perl).
185		555
186	=item UTF-8 strings	556	=item UTF-8 strings
187		557
188	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be	558	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
…		…
194		564
195	CBOR arrays and CBOR maps will be converted into references to a Perl	565	CBOR arrays and CBOR maps will be converted into references to a Perl
196	array or hash, respectively. The keys of the map will be stringified	566	array or hash, respectively. The keys of the map will be stringified
197	during this process.	567	during this process.
198		568
		569	=item null
		570
		571	CBOR null becomes C<undef> in Perl.
		572
199	=item true, false	573	=item true, false, undefined
200		574
201	These CBOR values become C<CBOR::XS::true> and C<CBOR::XS::false>,	575	These CBOR values become C<Types:Serialiser::true>,
		576	C<Types:Serialiser::false> and C<Types::Serialiser::error>,
202	respectively. They are overloaded to act almost exactly like the numbers	577	respectively. They are overloaded to act almost exactly like the numbers
203	C<1> and C<0>. You can check whether a scalar is a CBOR boolean by using	578	C<1> and C<0> (for true and false) or to throw an exception on access (for
204	the C<CBOR::XS::is_bool> function.	579	error). See the L<Types::Serialiser> manpage for details.
205		580
206	=item null, undefined	581	=item tagged values
207		582
208	CBOR null and undefined values becomes C<undef> in Perl (in the future,
209	Undefined may raise an exception or something else).
210
211	=item tags
212
213	Tagged items consists of a numeric tag and another CBOR value. The tag	583	Tagged items consists of a numeric tag and another CBOR value.
214	55799 is ignored (this tag implements the magic header).
215		584
216	All other tags are currently converted into a L<CBOR::XS::Tagged> object,	585	See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
217	which is simply a blessed array reference consistsing of the numeric tag	586	for details on which tags are handled how.
218	value followed by the (decoded) BOR value.
219		587
220	=item anything else	588	=item anything else
221		589
222	Anything else (e.g. unsupported simple values) will raise a decoding	590	Anything else (e.g. unsupported simple values) will raise a decoding
223	error.	591	error.
…		…
226		594
227		595
228	=head2 PERL -> CBOR	596	=head2 PERL -> CBOR
229		597
230	The mapping from Perl to CBOR is slightly more difficult, as Perl is a	598	The mapping from Perl to CBOR is slightly more difficult, as Perl is a
231	truly typeless language, so we can only guess which CBOR type is meant by	599	typeless language. That means this module can only guess which CBOR type
232	a Perl value.	600	is meant by a perl value.
233		601
234	=over 4	602	=over 4
235		603
236	=item hash references	604	=item hash references
237		605
238	Perl hash references become CBOR maps. As there is no inherent ordering in	606	Perl hash references become CBOR maps. As there is no inherent ordering in
239	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random	607	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
240	order.	608	order. This order can be different each time a hash is encoded.
241		609
242	Currently, tied hashes will use the indefinite-length format, while normal	610	Currently, tied hashes will use the indefinite-length format, while normal
243	hashes will use the fixed-length format.	611	hashes will use the fixed-length format.
244		612
245	=item array references	613	=item array references
246		614
247	Perl array references become fixed-length CBOR arrays.	615	Perl array references become fixed-length CBOR arrays.
248		616
249	=item other references	617	=item other references
250		618
251	Other unblessed references are generally not allowed and will cause an	619	Other unblessed references will be represented using
252	exception to be thrown, except for references to the integers C<0> and	620	the indirection tag extension (tag value C<22098>,
253	C<1>, which get turned into false and true in CBOR.	621	L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
		622	to be able to decode these values somehow, by either "doing the right
		623	thing", decoding into a generic tagged object, simply ignoring the tag, or
		624	something else.
254		625
255	=item CBOR::XS::Tagged objects	626	=item CBOR::XS::Tagged objects
256		627
257	Objects of this type must be arrays consisting of a single C<[tag, value]>	628	Objects of this type must be arrays consisting of a single C<[tag, value]>
258	pair. The (numerical) tag will be encoded as a CBOR tag, the value will be	629	pair. The (numerical) tag will be encoded as a CBOR tag, the value will
259	encoded as appropriate for the value.	630	be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
		631	create such objects.
260		632
261	=item CBOR::XS::true, CBOR::XS::false	633	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
262		634
263	These special values become CBOR true and CBOR false values,	635	These special values become CBOR true, CBOR false and CBOR undefined
264	respectively. You can also use C<\1> and C<\0> directly if you want.	636	values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
		637	if you want.
265		638
266	=item blessed objects	639	=item other blessed objects
267		640
268	Other blessed objects currently need to have a C<TO_CBOR> method. It	641	Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
269	will be called on every object that is being serialised, and must return	642	L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
270	something that can be encoded in CBOR.	643	module, and L<OBJECT SERIALISATION> for generic object serialisation.
271		644
272	=item simple scalars	645	=item simple scalars
273		646
274	TODO
275	Simple Perl scalars (any scalar that is not a reference) are the most	647	Simple Perl scalars (any scalar that is not a reference) are the most
276	difficult objects to encode: CBOR::XS will encode undefined scalars as	648	difficult objects to encode: CBOR::XS will encode undefined scalars as
277	CBOR null values, scalars that have last been used in a string context	649	CBOR null values, scalars that have last been used in a string context
278	before encoding as CBOR strings, and anything else as number value:	650	before encoding as CBOR strings, and anything else as number value:
279		651
280	# dump as number	652	# dump as number
281	encode_cbor [2] # yields [2]	653	encode_cbor [2] # yields [2]
282	encode_cbor [-3.0e17] # yields [-3e+17]	654	encode_cbor [-3.0e17] # yields [-3e+17]
283	my $value = 5; encode_cbor [$value] # yields [5]	655	my $value = 5; encode_cbor [$value] # yields [5]
284		656
285	# used as string, so dump as string	657	# used as string, so dump as string (either byte or text)
286	print $value;	658	print $value;
287	encode_cbor [$value] # yields ["5"]	659	encode_cbor [$value] # yields ["5"]
288		660
289	# undef becomes null	661	# undef becomes null
290	encode_cbor [undef] # yields [null]	662	encode_cbor [undef] # yields [null]
…		…
293		665
294	my $x = 3.1; # some variable containing a number	666	my $x = 3.1; # some variable containing a number
295	"$x"; # stringified	667	"$x"; # stringified
296	$x .= ""; # another, more awkward way to stringify	668	$x .= ""; # another, more awkward way to stringify
297	print $x; # perl does it for you, too, quite often	669	print $x; # perl does it for you, too, quite often
		670
		671	You can force whether a string is encoded as byte or text string by using
		672	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled):
		673
		674	utf8::upgrade $x; # encode $x as text string
		675	utf8::downgrade $x; # encode $x as byte string
		676
		677	Perl doesn't define what operations up- and downgrade strings, so if the
		678	difference between byte and text is important, you should up- or downgrade
		679	your string as late as possible before encoding. You can also force the
		680	use of CBOR text strings by using C<text_keys> or C<text_strings>.
298		681
299	You can force the type to be a CBOR number by numifying it:	682	You can force the type to be a CBOR number by numifying it:
300		683
301	my $x = "3"; # some variable containing a string	684	my $x = "3"; # some variable containing a string
302	$x += 0; # numify it, ensuring it will be dumped as a number	685	$x += 0; # numify it, ensuring it will be dumped as a number
…		…
313	represent numerical values are supported, but might suffer loss of	696	represent numerical values are supported, but might suffer loss of
314	precision.	697	precision.
315		698
316	=back	699	=back
317		700
		701	=head2 OBJECT SERIALISATION
318		702
		703	This module implements both a CBOR-specific and the generic
		704	L<Types::Serialier> object serialisation protocol. The following
		705	subsections explain both methods.
		706
		707	=head3 ENCODING
		708
		709	This module knows two way to serialise a Perl object: The CBOR-specific
		710	way, and the generic way.
		711
		712	Whenever the encoder encounters a Perl object that it cannot serialise
		713	directly (most of them), it will first look up the C<TO_CBOR> method on
		714	it.
		715
		716	If it has a C<TO_CBOR> method, it will call it with the object as only
		717	argument, and expects exactly one return value, which it will then
		718	substitute and encode it in the place of the object.
		719
		720	Otherwise, it will look up the C<FREEZE> method. If it exists, it will
		721	call it with the object as first argument, and the constant string C<CBOR>
		722	as the second argument, to distinguish it from other serialisers.
		723
		724	The C<FREEZE> method can return any number of values (i.e. zero or
		725	more). These will be encoded as CBOR perl object, together with the
		726	classname.
		727
		728	These methods I<MUST NOT> change the data structure that is being
		729	serialised. Failure to comply to this can result in memory corruption -
		730	and worse.
		731
		732	If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
		733	with an error.
		734
		735	=head3 DECODING
		736
		737	Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
		738	but objects encoded via C<FREEZE> can be decoded using the following
		739	protocol:
		740
		741	When an encoded CBOR perl object is encountered by the decoder, it will
		742	look up the C<THAW> method, by using the stored classname, and will fail
		743	if the method cannot be found.
		744
		745	After the lookup it will call the C<THAW> method with the stored classname
		746	as first argument, the constant string C<CBOR> as second argument, and all
		747	values returned by C<FREEZE> as remaining arguments.
		748
		749	=head3 EXAMPLES
		750
		751	Here is an example C<TO_CBOR> method:
		752
		753	sub My::Object::TO_CBOR {
		754	my ($obj) = @_;
		755
		756	["this is a serialised My::Object object", $obj->{id}]
		757	}
		758
		759	When a C<My::Object> is encoded to CBOR, it will instead encode a simple
		760	array with two members: a string, and the "object id". Decoding this CBOR
		761	string will yield a normal perl array reference in place of the object.
		762
		763	A more useful and practical example would be a serialisation method for
		764	the URI module. CBOR has a custom tag value for URIs, namely 32:
		765
		766	sub URI::TO_CBOR {
		767	my ($self) = @_;
		768	my $uri = "$self"; # stringify uri
		769	utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
		770	CBOR::XS::tag 32, "$_[0]"
		771	}
		772
		773	This will encode URIs as a UTF-8 string with tag 32, which indicates an
		774	URI.
		775
		776	Decoding such an URI will not (currently) give you an URI object, but
		777	instead a CBOR::XS::Tagged object with tag number 32 and the string -
		778	exactly what was returned by C<TO_CBOR>.
		779
		780	To serialise an object so it can automatically be deserialised, you need
		781	to use C<FREEZE> and C<THAW>. To take the URI module as example, this
		782	would be a possible implementation:
		783
		784	sub URI::FREEZE {
		785	my ($self, $serialiser) = @_;
		786	"$self" # encode url string
		787	}
		788
		789	sub URI::THAW {
		790	my ($class, $serialiser, $uri) = @_;
		791	$class->new ($uri)
		792	}
		793
		794	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
		795	example, a C<FREEZE> method that returns "type", "id" and "variant" values
		796	would cause an invocation of C<THAW> with 5 arguments:
		797
		798	sub My::Object::FREEZE {
		799	my ($self, $serialiser) = @_;
		800
		801	($self->{type}, $self->{id}, $self->{variant})
		802	}
		803
		804	sub My::Object::THAW {
		805	my ($class, $serialiser, $type, $id, $variant) = @_;
		806
		807	$class-<new (type => $type, id => $id, variant => $variant)
		808	}
		809
		810
319	=head2 MAGIC HEADER	811	=head1 MAGIC HEADER
320		812
321	There is no way to distinguish CBOR from other formats	813	There is no way to distinguish CBOR from other formats
322	programmatically. To make it easier to distinguish CBOR from other	814	programmatically. To make it easier to distinguish CBOR from other
323	formats, the CBOR specification has a special "magic string" that can be	815	formats, the CBOR specification has a special "magic string" that can be
324	prepended to any CBOR string without changing it's meaning.	816	prepended to any CBOR string without changing its meaning.
325		817
326	This string is available as C<$CBOR::XS::MAGIC>. This module does not	818	This string is available as C<$CBOR::XS::MAGIC>. This module does not
327	prepend this string tot he CBOR data it generates, but it will ignroe it	819	prepend this string to the CBOR data it generates, but it will ignore it
328	if present, so users can prepend this string as a "file type" indicator as	820	if present, so users can prepend this string as a "file type" indicator as
329	required.	821	required.
330		822
331		823
		824	=head1 THE CBOR::XS::Tagged CLASS
		825
		826	CBOR has the concept of tagged values - any CBOR value can be tagged with
		827	a numeric 64 bit number, which are centrally administered.
		828
		829	C<CBOR::XS> handles a few tags internally when en- or decoding. You can
		830	also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
		831	decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
		832	unknown tag.
		833
		834	These objects are simply blessed array references - the first member of
		835	the array being the numerical tag, the second being the value.
		836
		837	You can interact with C<CBOR::XS::Tagged> objects in the following ways:
		838
		839	=over 4
		840
		841	=item $tagged = CBOR::XS::tag $tag, $value
		842
		843	This function(!) creates a new C<CBOR::XS::Tagged> object using the given
		844	C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
		845	value that can be encoded in CBOR, including serialisable Perl objects and
		846	C<CBOR::XS::Tagged> objects).
		847
		848	=item $tagged->[0]
		849
		850	=item $tagged->[0] = $new_tag
		851
		852	=item $tag = $tagged->tag
		853
		854	=item $new_tag = $tagged->tag ($new_tag)
		855
		856	Access/mutate the tag.
		857
		858	=item $tagged->[1]
		859
		860	=item $tagged->[1] = $new_value
		861
		862	=item $value = $tagged->value
		863
		864	=item $new_value = $tagged->value ($new_value)
		865
		866	Access/mutate the tagged value.
		867
		868	=back
		869
		870	=cut
		871
		872	sub tag($$) {
		873	bless [@_], CBOR::XS::Tagged::;
		874	}
		875
		876	sub CBOR::XS::Tagged::tag {
		877	$_[0][0] = $_[1] if $#_;
		878	$_[0][0]
		879	}
		880
		881	sub CBOR::XS::Tagged::value {
		882	$_[0][1] = $_[1] if $#_;
		883	$_[0][1]
		884	}
		885
		886	=head2 EXAMPLES
		887
		888	Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
		889
		890	You can look up CBOR tag value and emanings in the IANA registry at
		891	L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
		892
		893	Prepend a magic header (C<$CBOR::XS::MAGIC>):
		894
		895	my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
		896	# same as:
		897	my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
		898
		899	Serialise some URIs and a regex in an array:
		900
		901	my $cbor = encode_cbor [
		902	(CBOR::XS::tag 32, "http://www.nethype.de/"),
		903	(CBOR::XS::tag 32, "http://software.schmorp.de/"),
		904	(CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
		905	];
		906
		907	Wrap CBOR data in CBOR:
		908
		909	my $cbor_cbor = encode_cbor
		910	CBOR::XS::tag 24,
		911	encode_cbor [1, 2, 3];
		912
		913	=head1 TAG HANDLING AND EXTENSIONS
		914
		915	This section describes how this module handles specific tagged values
		916	and extensions. If a tag is not mentioned here and no additional filters
		917	are provided for it, then the default handling applies (creating a
		918	CBOR::XS::Tagged object on decoding, and only encoding the tag when
		919	explicitly requested).
		920
		921	Tags not handled specifically are currently converted into a
		922	L<CBOR::XS::Tagged> object, which is simply a blessed array reference
		923	consisting of the numeric tag value followed by the (decoded) CBOR value.
		924
		925	Future versions of this module reserve the right to special case
		926	additional tags (such as base64url).
		927
		928	=head2 ENFORCED TAGS
		929
		930	These tags are always handled when decoding, and their handling cannot be
		931	overridden by the user.
		932
		933	=over 4
		934
		935	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
		936
		937	These tags are automatically created (and decoded) for serialisable
		938	objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
		939	serialisation protocol). See L<OBJECT SERIALISATION> for details.
		940
		941	=item 28, 29 (shareable, sharedref, L<http://cbor.schmorp.de/value-sharing>)
		942
		943	These tags are automatically decoded when encountered (and they do not
		944	result in a cyclic data structure, see C<allow_cycles>), resulting in
		945	shared values in the decoded object. They are only encoded, however, when
		946	C<allow_sharing> is enabled.
		947
		948	Not all shared values can be successfully decoded: values that reference
		949	themselves will I<currently> decode as C<undef> (this is not the same
		950	as a reference pointing to itself, which will be represented as a value
		951	that contains an indirect reference to itself - these will be decoded
		952	properly).
		953
		954	Note that considerably more shared value data structures can be decoded
		955	than will be encoded - currently, only values pointed to by references
		956	will be shared, others will not. While non-reference shared values can be
		957	generated in Perl with some effort, they were considered too unimportant
		958	to be supported in the encoder. The decoder, however, will decode these
		959	values as shared values.
		960
		961	=item 256, 25 (stringref-namespace, stringref, L<http://cbor.schmorp.de/stringref>)
		962
		963	These tags are automatically decoded when encountered. They are only
		964	encoded, however, when C<pack_strings> is enabled.
		965
		966	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
		967
		968	This tag is automatically generated when a reference are encountered (with
		969	the exception of hash and array references). It is converted to a reference
		970	when decoding.
		971
		972	=item 55799 (self-describe CBOR, RFC 7049)
		973
		974	This value is not generated on encoding (unless explicitly requested by
		975	the user), and is simply ignored when decoding.
		976
		977	=back
		978
		979	=head2 NON-ENFORCED TAGS
		980
		981	These tags have default filters provided when decoding. Their handling can
		982	be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
		983	providing a custom C<filter> callback when decoding.
		984
		985	When they result in decoding into a specific Perl class, the module
		986	usually provides a corresponding C<TO_CBOR> method as well.
		987
		988	When any of these need to load additional modules that are not part of the
		989	perl core distribution (e.g. L<URI>), it is (currently) up to the user to
		990	provide these modules. The decoding usually fails with an exception if the
		991	required module cannot be loaded.
		992
		993	=over 4
		994
		995	=item 0, 1 (date/time string, seconds since the epoch)
		996
		997	These tags are decoded into L<Time::Piece> objects. The corresponding
		998	C<Time::Piece::TO_CBOR> method always encodes into tag 1 values currently.
		999
		1000	The L<Time::Piece> API is generally surprisingly bad, and fractional
		1001	seconds are only accidentally kept intact, so watch out. On the plus side,
		1002	the module comes with perl since 5.10, which has to count for something.
		1003
		1004	=item 2, 3 (positive/negative bignum)
		1005
		1006	These tags are decoded into L<Math::BigInt> objects. The corresponding
		1007	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
		1008	integers, and others into positive/negative CBOR bignums.
		1009
		1010	=item 4, 5, 264, 265 (decimal fraction/bigfloat)
		1011
		1012	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
		1013	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
		1014	encodes into a decimal fraction (either tag 4 or 264).
		1015
		1016	NaN and infinities are not encoded properly, as they cannot be represented
		1017	in CBOR.
		1018
		1019	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1020
		1021	=item 30 (rational numbers)
		1022
		1023	These tags are decoded into L<Math::BigRat> objects. The corresponding
		1024	C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator
		1025	C<1> via their numerator only, i.e., they become normal integers or
		1026	C<bignums>.
		1027
		1028	See L<BIGNUM SECURITY CONSIDERATIONS> for more info.
		1029
		1030	=item 21, 22, 23 (expected later JSON conversion)
		1031
		1032	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
		1033	tags.
		1034
		1035	=item 32 (URI)
		1036
		1037	These objects decode into L<URI> objects. The corresponding
		1038	C<URI::TO_CBOR> method again results in a CBOR URI value.
		1039
		1040	=back
		1041
		1042	=cut
		1043
332	=head2 CBOR and JSON	1044	=head1 CBOR and JSON
333		1045
334	CBOR is supposed to implement a superset of the JSON data model, and is,	1046	CBOR is supposed to implement a superset of the JSON data model, and is,
335	with some coercion, able to represent all JSON texts (something that other	1047	with some coercion, able to represent all JSON texts (something that other
336	"binary JSON" formats such as BSON generally do not support).	1048	"binary JSON" formats such as BSON generally do not support).
337		1049
…		…
344	CBOR intact.	1056	CBOR intact.
345		1057
346		1058
347	=head1 SECURITY CONSIDERATIONS	1059	=head1 SECURITY CONSIDERATIONS
348		1060
349	When you are using CBOR in a protocol, talking to untrusted potentially	1061	Tl;dr... if you want to decode or encode CBOR from untrusted sources, you
350	hostile creatures requires relatively few measures.	1062	should start with a coder object created via C<new_safe>:
351		1063
		1064	my $coder = CBOR::XS->new_safe;
		1065
		1066	my $data = $coder->decode ($cbor_text);
		1067	my $cbor = $coder->encode ($data);
		1068
		1069	Longer version: When you are using CBOR in a protocol, talking to
		1070	untrusted potentially hostile creatures requires some thought:
		1071
		1072	=over 4
		1073
		1074	=item Security of the CBOR decoder itself
		1075
352	First of all, your CBOR decoder should be secure, that is, should not have	1076	First and foremost, your CBOR decoder should be secure, that is, should
		1077	not have any buffer overflows or similar bugs that could potentially be
353	any buffer overflows. Obviously, this module should ensure that and I am	1078	exploited. Obviously, this module should ensure that and I am trying hard
354	trying hard on making that true, but you never know.	1079	on making that true, but you never know.
355		1080
		1081	=item CBOR::XS can invoke almost arbitrary callbacks during decoding
		1082
		1083	CBOR::XS supports object serialisation - decoding CBOR can cause calls
		1084	to I<any> C<THAW> method in I<any> package that exists in your process
		1085	(that is, CBOR::XS will not try to load modules, but any existing C<THAW>
		1086	method or function can be called, so they all have to be secure).
		1087
		1088	Less obviously, it will also invoke C<TO_CBOR> and C<FREEZE> methods -
		1089	even if all your C<THAW> methods are secure, encoding data structures from
		1090	untrusted sources can invoke those and trigger bugs in those.
		1091
		1092	So, if you are not sure about the security of all the modules you
		1093	have loaded (you shouldn't), you should disable this part using
		1094	C<forbid_objects>.
		1095
		1096	=item CBOR can be extended with tags that call library code
		1097
		1098	CBOR can be extended with tags, and C<CBOR::XS> has a registry of
		1099	conversion functions for many existing tags that can be extended via
		1100	third-party modules (see the C<filter> method).
		1101
		1102	If you don't trust these, you should configure the "safe" filter function,
		1103	C<CBOR::XS::safe_filter>, which by default only includes conversion
		1104	functions that are considered "safe" by the author (but again, they can be
		1105	extended by third party modules).
		1106
		1107	Depending on your level of paranoia, you can use the "safe" filter:
		1108
		1109	$cbor->filter (\&CBOR::XS::safe_filter);
		1110
		1111	... your own filter...
		1112
		1113	$cbor->filter (sub { ... do your stuffs here ... });
		1114
		1115	... or even no filter at all, disabling all tag decoding:
		1116
		1117	$cbor->filter (sub { });
		1118
		1119	This is never a problem for encoding, as the tag mechanism only exists in
		1120	CBOR texts.
		1121
		1122	=item Resource-starving attacks: object memory usage
		1123
356	Second, you need to avoid resource-starving attacks. That means you should	1124	You need to avoid resource-starving attacks. That means you should limit
357	limit the size of CBOR data you accept, or make sure then when your	1125	the size of CBOR data you accept, or make sure then when your resources
358	resources run out, that's just fine (e.g. by using a separate process that	1126	run out, that's just fine (e.g. by using a separate process that can
359	can crash safely). The size of a CBOR string in octets is usually a good	1127	crash safely). The size of a CBOR string in octets is usually a good
360	indication of the size of the resources required to decode it into a Perl	1128	indication of the size of the resources required to decode it into a Perl
361	structure. While CBOR::XS can check the size of the CBOR text, it might be	1129	structure. While CBOR::XS can check the size of the CBOR text (using
362	too late when you already have it in memory, so you might want to check	1130	C<max_size>), it might be too late when you already have it in memory, so
363	the size before you accept the string.	1131	you might want to check the size before you accept the string.
364		1132
		1133	As for encoding, it is possible to construct data structures that are
		1134	relatively small but result in large CBOR texts (for example by having an
		1135	array full of references to the same big data structure, which will all be
		1136	deep-cloned during encoding by default). This is rarely an actual issue
		1137	(and the worst case is still just running out of memory), but you can
		1138	reduce this risk by using C<allow_sharing>.
		1139
		1140	=item Resource-starving attacks: stack overflows
		1141
365	Third, CBOR::XS recurses using the C stack when decoding objects and	1142	CBOR::XS recurses using the C stack when decoding objects and arrays. The
366	arrays. The C stack is a limited resource: for instance, on my amd64	1143	C stack is a limited resource: for instance, on my amd64 machine with 8MB
367	machine with 8MB of stack size I can decode around 180k nested arrays but	1144	of stack size I can decode around 180k nested arrays but only 14k nested
368	only 14k nested CBOR objects (due to perl itself recursing deeply on croak	1145	CBOR objects (due to perl itself recursing deeply on croak to free the
369	to free the temporary). If that is exceeded, the program crashes. To be	1146	temporary). If that is exceeded, the program crashes. To be conservative,
370	conservative, the default nesting limit is set to 512. If your process	1147	the default nesting limit is set to 512. If your process has a smaller
371	has a smaller stack, you should adjust this setting accordingly with the	1148	stack, you should adjust this setting accordingly with the C<max_depth>
372	C<max_depth> method.	1149	method.
		1150
		1151	=item Resource-starving attacks: CPU en-/decoding complexity
		1152
		1153	CBOR::XS will use the L<Math::BigInt>, L<Math::BigFloat> and
		1154	L<Math::BigRat> libraries to represent encode/decode bignums. These can
		1155	be very slow (as in, centuries of CPU time) and can even crash your
		1156	program (and are generally not very trustworthy). See the next section for
		1157	details.
		1158
		1159	=item Data breaches: leaking information in error messages
		1160
		1161	CBOR::XS might leak contents of your Perl data structures in its error
		1162	messages, so when you serialise sensitive information you might want to
		1163	make sure that exceptions thrown by CBOR::XS will not end up in front of
		1164	untrusted eyes.
		1165
		1166	=item Something else...
373		1167
374	Something else could bomb you, too, that I forgot to think of. In that	1168	Something else could bomb you, too, that I forgot to think of. In that
375	case, you get to keep the pieces. I am always open for hints, though...	1169	case, you get to keep the pieces. I am always open for hints, though...
376		1170
377	Also keep in mind that CBOR::XS might leak contents of your Perl data	1171	=back
378	structures in its error messages, so when you serialise sensitive	1172
379	information you might want to make sure that exceptions thrown by CBOR::XS	1173
380	will not end up in front of untrusted eyes.	1174	=head1 BIGNUM SECURITY CONSIDERATIONS
		1175
		1176	CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and
		1177	L<Math::BigFloat> that tries to encode the number in the simplest possible
		1178	way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag
		1179	4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers
		1180	(L<Math::BigRat>, tag 30) can also contain bignums as members.
		1181
		1182	CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent
		1183	bigfloats (tags 5 and 265), but it will never generate these on its own.
		1184
		1185	Using the built-in L<Math::BigInt::Calc> support, encoding and decoding
		1186	decimal fractions is generally fast. Decoding bigints can be slow for very
		1187	big numbers (tens of thousands of digits, something that could potentially
		1188	be caught by limiting the size of CBOR texts), and decoding bigfloats or
		1189	arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades)
		1190	for large exponents (roughly 40 bit and longer).
		1191
		1192	Additionally, L<Math::BigInt> can take advantage of other bignum
		1193	libraries, such as L<Math::GMP>, which cannot handle big floats with large
		1194	exponents, and might simply abort or crash your program, due to their code
		1195	quality.
		1196
		1197	This can be a concern if you want to parse untrusted CBOR. If it is, you
		1198	might want to disable decoding of tag 2 (bigint) and 3 (negative bigint)
		1199	types. You should also disable types 5 and 265, as these can be slow even
		1200	without bigints.
		1201
		1202	Disabling bigints will also partially or fully disable types that rely on
		1203	them, e.g. rational numbers that use bignums.
		1204
381		1205
382	=head1 CBOR IMPLEMENTATION NOTES	1206	=head1 CBOR IMPLEMENTATION NOTES
383		1207
384	This section contains some random implementation notes. They do not	1208	This section contains some random implementation notes. They do not
385	describe guaranteed behaviour, but merely behaviour as-is implemented	1209	describe guaranteed behaviour, but merely behaviour as-is implemented
…		…
394	Only the double data type is supported for NV data types - when Perl uses	1218	Only the double data type is supported for NV data types - when Perl uses
395	long double to represent floating point values, they might not be encoded	1219	long double to represent floating point values, they might not be encoded
396	properly. Half precision types are accepted, but not encoded.	1220	properly. Half precision types are accepted, but not encoded.
397		1221
398	Strict mode and canonical mode are not implemented.	1222	Strict mode and canonical mode are not implemented.
		1223
		1224
		1225	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
		1226
		1227	On perls that were built without 64 bit integer support (these are rare
		1228	nowadays, even on 32 bit architectures, as all major Perl distributions
		1229	are built with 64 bit integer support), support for any kind of 64 bit
		1230	integer in CBOR is very limited - most likely, these 64 bit values will
		1231	be truncated, corrupted, or otherwise not decoded correctly. This also
		1232	includes string, array and map sizes that are stored as 64 bit integers.
399		1233
400		1234
401	=head1 THREADS	1235	=head1 THREADS
402		1236
403	This module is I<not> guaranteed to be thread safe and there are no	1237	This module is I<not> guaranteed to be thread safe and there are no
…		…
417	Please refrain from using rt.cpan.org or any other bug reporting	1251	Please refrain from using rt.cpan.org or any other bug reporting
418	service. I put the contact address into my modules for a reason.	1252	service. I put the contact address into my modules for a reason.
419		1253
420	=cut	1254	=cut
421		1255
422	our $true = do { bless \(my $dummy = 1), "CBOR::XS::Boolean" };	1256	# clumsy and slow hv_store-in-hash helper function
423	our $false = do { bless \(my $dummy = 0), "CBOR::XS::Boolean" };	1257	sub _hv_store {
424		1258	$_[0]{$_[1]} = $_[2];
425	sub true() { $true }
426	sub false() { $false }
427
428	sub is_bool($) {
429	UNIVERSAL::isa $_[0], "CBOR::XS::Boolean"
430	# or UNIVERSAL::isa $_[0], "CBOR::Literal"
431	}	1259	}
432		1260
		1261	our %FILTER = (
		1262	0 => sub { # rfc4287 datetime, utf-8
		1263	require Time::Piece;
		1264	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
		1265	# from FreeBSD, which can't parse ISO 8601, RFC3339, RFC4287 or much of anything
		1266	# else either. Whats incredibe over standard strptime totally escapes me.
		1267	# doesn't do fractional times, either. sigh.
		1268	# In fact, it's all a lie, it uses whatever strptime it wants, and of course,
		1269	# they are all incompatible. The openbsd one simply ignores %z (but according to the
		1270	# docs, it would be much more incredibly flexible indeed. If it worked, that is.).
		1271	scalar eval {
		1272	my $s = $_[1];
		1273
		1274	$s =~ s/Z$/+00:00/;
		1275	$s =~ s/(\.[0-9]+)?([+-][0-9][0-9]):([0-9][0-9])$//
		1276	or die;
		1277
		1278	my $b = $1 - ($2 * 60 + $3) * 60; # fractional part + offset. hopefully
		1279	my $d = Time::Piece->strptime ($s, "%Y-%m-%dT%H:%M:%S");
		1280
		1281	Time::Piece::gmtime ($d->epoch + $b)
		1282	} \|\| die "corrupted CBOR date/time string ($_[0])";
		1283	},
		1284
		1285	1 => sub { # seconds since the epoch, possibly fractional
		1286	require Time::Piece;
		1287	scalar Time::Piece::gmtime (pop)
		1288	},
		1289
		1290	2 => sub { # pos bigint
		1291	require Math::BigInt;
		1292	Math::BigInt->new ("0x" . unpack "H*", pop)
		1293	},
		1294
		1295	3 => sub { # neg bigint
		1296	require Math::BigInt;
		1297	-Math::BigInt->new ("0x" . unpack "H*", pop)
		1298	},
		1299
		1300	4 => sub { # decimal fraction, array
		1301	require Math::BigFloat;
		1302	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1303	},
		1304
		1305	264 => sub { # decimal fraction with arbitrary exponent
		1306	require Math::BigFloat;
		1307	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1308	},
		1309
		1310	5 => sub { # bigfloat, array
		1311	require Math::BigFloat;
		1312	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1313	},
		1314
		1315	265 => sub { # bigfloat with arbitrary exponent
		1316	require Math::BigFloat;
		1317	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1318	},
		1319
		1320	30 => sub { # rational number
		1321	require Math::BigRat;
		1322	Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons
		1323	},
		1324
		1325	21 => sub { pop }, # expected conversion to base64url encoding
		1326	22 => sub { pop }, # expected conversion to base64 encoding
		1327	23 => sub { pop }, # expected conversion to base16 encoding
		1328
		1329	# 24 # embedded cbor, byte string
		1330
		1331	32 => sub {
		1332	require URI;
		1333	URI->new (pop)
		1334	},
		1335
		1336	# 33 # base64url rfc4648, utf-8
		1337	# 34 # base64 rfc46484, utf-8
		1338	# 35 # regex pcre/ecma262, utf-8
		1339	# 36 # mime message rfc2045, utf-8
		1340	);
		1341
		1342	sub default_filter {
		1343	&{ $FILTER{$_[0]} or return }
		1344	}
		1345
		1346	our %SAFE_FILTER = map { $_ => $FILTER{$_} } 0, 1, 21, 22, 23, 32;
		1347
		1348	sub safe_filter {
		1349	&{ $SAFE_FILTER{$_[0]} or return }
		1350	}
		1351
		1352	sub URI::TO_CBOR {
		1353	my $uri = $_[0]->as_string;
		1354	utf8::upgrade $uri;
		1355	tag 32, $uri
		1356	}
		1357
		1358	sub Math::BigInt::TO_CBOR {
		1359	if (-2147483648 <= $_[0] && $_[0] <= 2147483647) {
		1360	$_[0]->numify
		1361	} else {
		1362	my $hex = substr $_[0]->as_hex, 2;
		1363	$hex = "0$hex" if 1 & length $hex; # sigh
		1364	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
		1365	}
		1366	}
		1367
		1368	sub Math::BigFloat::TO_CBOR {
		1369	my ($m, $e) = $_[0]->parts;
		1370
		1371	-9223372036854775808 <= $e && $e <= 18446744073709551615
		1372	? tag 4, [$e->numify, $m]
		1373	: tag 264, [$e, $m]
		1374	}
		1375
		1376	sub Math::BigRat::TO_CBOR {
		1377	my ($n, $d) = $_[0]->parts;
		1378
		1379	# older versions of BigRat need *1, as they not always return numbers
		1380
		1381	$d*1 == 1
		1382	? $n*1
		1383	: tag 30, [$n1, $d1]
		1384	}
		1385
		1386	sub Time::Piece::TO_CBOR {
		1387	tag 1, 0 + $_[0]->epoch
		1388	}
		1389
433	XSLoader::load "CBOR::XS", $VERSION;	1390	XSLoader::load "CBOR::XS", $VERSION;
434
435	package CBOR::XS::Boolean;
436
437	use overload
438	"0+" => sub { ${$_[0]} },
439	"++" => sub { $_[0] = ${$_[0]} + 1 },
440	"--" => sub { $_[0] = ${$_[0]} - 1 },
441	fallback => 1;
442
443	1;
444		1391
445	=head1 SEE ALSO	1392	=head1 SEE ALSO
446		1393
447	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,	1394	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
448	serialisation.	1395	serialisation.
449		1396
		1397	The L<Types::Serialiser> module provides the data model for true, false
		1398	and error values.
		1399
450	=head1 AUTHOR	1400	=head1 AUTHOR
451		1401
452	Marc Lehmann <schmorp@schmorp.de>	1402	Marc Lehmann <schmorp@schmorp.de>
453	http://home.schmorp.de/	1403	http://home.schmorp.de/
454		1404
455	=cut	1405	=cut
456		1406
		1407	1
		1408

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.5 by root, Sat Oct 26 23:02:55 2013 UTC vs. Revision 1.68 by root, Wed Jul 17 09:37:16 2019 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.5 by root, Sat Oct 26 23:02:55 2013 UTC vs.
Revision 1.68 by root, Wed Jul 17 09:37:16 2019 UTC