[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.4 by root, Sat Oct 26 22:25:47 2013 UTC vs.
Revision 1.54 by root, Mon Apr 25 18:24:44 2016 UTC

…		…
12	$perl_value = decode_cbor $binary_cbor_data;	12	$perl_value = decode_cbor $binary_cbor_data;
13		13
14	# OO-interface	14	# OO-interface
15		15
16	$coder = CBOR::XS->new;	16	$coder = CBOR::XS->new;
17	#TODO	17	$binary_cbor_data = $coder->encode ($perl_value);
		18	$perl_value = $coder->decode ($binary_cbor_data);
		19
		20	# prefix decoding
		21
		22	my $many_cbor_strings = ...;
		23	while (length $many_cbor_strings) {
		24	my ($data, $length) = $cbor->decode_prefix ($many_cbor_strings);
		25	# data was decoded
		26	substr $many_cbor_strings, 0, $length, ""; # remove decoded cbor string
		27	}
18		28
19	=head1 DESCRIPTION	29	=head1 DESCRIPTION
20		30
21	WARNING! THIS IS A PRE-ALPHA RELEASE! IT WILL CRASH, CORRUPT YOUR DATA AND	31	This module converts Perl data structures to the Concise Binary Object
22	EAT YOUR CHILDREN!	32	Representation (CBOR) and vice versa. CBOR is a fast binary serialisation
		33	format that aims to use an (almost) superset of the JSON data model, i.e.
		34	when you can represent something useful in JSON, you should be able to
		35	represent it in CBOR.
23		36
24	This module converts Perl data structures to CBOR and vice versa. Its	37	In short, CBOR is a faster and quite compact binary alternative to JSON,
		38	with the added ability of supporting serialisation of Perl objects. (JSON
		39	often compresses better than CBOR though, so if you plan to compress the
		40	data later and speed is less important you might want to compare both
		41	formats first).
		42
		43	To give you a general idea about speed, with texts in the megabyte range,
		44	C<CBOR::XS> usually encodes roughly twice as fast as L<Storable> or
		45	L<JSON::XS> and decodes about 15%-30% faster than those. The shorter the
		46	data, the worse L<Storable> performs in comparison.
		47
		48	Regarding compactness, C<CBOR::XS>-encoded data structures are usually
		49	about 20% smaller than the same data encoded as (compact) JSON or
		50	L<Storable>.
		51
		52	In addition to the core CBOR data format, this module implements a
		53	number of extensions, to support cyclic and shared data structures
		54	(see C<allow_sharing> and C<allow_cycles>), string deduplication (see
		55	C<pack_strings>) and scalar references (always enabled).
		56
25	primary goal is to be I<correct> and its secondary goal is to be	57	The primary goal of this module is to be I<correct> and the secondary goal
26	I<fast>. To reach the latter goal it was written in C.	58	is to be I<fast>. To reach the latter goal it was written in C.
27		59
28	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and	60	See MAPPING, below, on how CBOR::XS maps perl values to CBOR values and
29	vice versa.	61	vice versa.
30		62
31	=cut	63	=cut
32		64
33	package CBOR::XS;	65	package CBOR::XS;
34		66
35	use common::sense;	67	use common::sense;
36		68
37	our $VERSION = 0.02;	69	our $VERSION = 1.5;
38	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
39		71
40	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
41		73
42	use Exporter;	74	use Exporter;
43	use XSLoader;	75	use XSLoader;
44		76
		77	use Types::Serialiser;
		78
45	our $MAGIC = "\xd9\xd9\xf7";	79	our $MAGIC = "\xd9\xd9\xf7";
46		80
47	=head1 FUNCTIONAL INTERFACE	81	=head1 FUNCTIONAL INTERFACE
48		82
49	The following convenience methods are provided by this module. They are	83	The following convenience methods are provided by this module. They are
…		…
77	strings. All boolean flags described below are by default I<disabled>.	111	strings. All boolean flags described below are by default I<disabled>.
78		112
79	The mutators for flags all return the CBOR object again and thus calls can	113	The mutators for flags all return the CBOR object again and thus calls can
80	be chained:	114	be chained:
81		115
82	#TODO
83	my $cbor = CBOR::XS->new->encode ({a => [1,2]});	116	my $cbor = CBOR::XS->new->encode ({a => [1,2]});
84		117
85	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])	118	=item $cbor = $cbor->max_depth ([$maximum_nesting_depth])
86		119
87	=item $max_depth = $cbor->get_max_depth	120	=item $max_depth = $cbor->get_max_depth
…		…
121	If no argument is given, the limit check will be deactivated (same as when	154	If no argument is given, the limit check will be deactivated (same as when
122	C<0> is specified).	155	C<0> is specified).
123		156
124	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.	157	See SECURITY CONSIDERATIONS, below, for more info on why this is useful.
125		158
		159	=item $cbor = $cbor->allow_unknown ([$enable])
		160
		161	=item $enabled = $cbor->get_allow_unknown
		162
		163	If C<$enable> is true (or missing), then C<encode> will I<not> throw an
		164	exception when it encounters values it cannot represent in CBOR (for
		165	example, filehandles) but instead will encode a CBOR C<error> value.
		166
		167	If C<$enable> is false (the default), then C<encode> will throw an
		168	exception when it encounters anything it cannot encode as CBOR.
		169
		170	This option does not affect C<decode> in any way, and it is recommended to
		171	leave it off unless you know your communications partner.
		172
		173	=item $cbor = $cbor->allow_sharing ([$enable])
		174
		175	=item $enabled = $cbor->get_allow_sharing
		176
		177	If C<$enable> is true (or missing), then C<encode> will not double-encode
		178	values that have been referenced before (e.g. when the same object, such
		179	as an array, is referenced multiple times), but instead will emit a
		180	reference to the earlier value.
		181
		182	This means that such values will only be encoded once, and will not result
		183	in a deep cloning of the value on decode, in decoders supporting the value
		184	sharing extension. This also makes it possible to encode cyclic data
		185	structures (which need C<allow_cycles> to ne enabled to be decoded by this
		186	module).
		187
		188	It is recommended to leave it off unless you know your
		189	communication partner supports the value sharing extensions to CBOR
		190	(L<http://cbor.schmorp.de/value-sharing>), as without decoder support, the
		191	resulting data structure might be unusable.
		192
		193	Detecting shared values incurs a runtime overhead when values are encoded
		194	that have a reference counter large than one, and might unnecessarily
		195	increase the encoded size, as potentially shared values are encode as
		196	shareable whether or not they are actually shared.
		197
		198	At the moment, only targets of references can be shared (e.g. scalars,
		199	arrays or hashes pointed to by a reference). Weirder constructs, such as
		200	an array with multiple "copies" of the I<same> string, which are hard but
		201	not impossible to create in Perl, are not supported (this is the same as
		202	with L<Storable>).
		203
		204	If C<$enable> is false (the default), then C<encode> will encode shared
		205	data structures repeatedly, unsharing them in the process. Cyclic data
		206	structures cannot be encoded in this mode.
		207
		208	This option does not affect C<decode> in any way - shared values and
		209	references will always be decoded properly if present.
		210
		211	=item $cbor = $cbor->allow_cycles ([$enable])
		212
		213	=item $enabled = $cbor->get_allow_cycles
		214
		215	If C<$enable> is true (or missing), then C<decode> will happily decode
		216	self-referential (cyclic) data structures. By default these will not be
		217	decoded, as they need manual cleanup to avoid memory leaks, so code that
		218	isn't prepared for this will not leak memory.
		219
		220	If C<$enable> is false (the default), then C<decode> will throw an error
		221	when it encounters a self-referential/cyclic data structure.
		222
		223	FUTURE DIRECTION: the motivation behind this option is to avoid I<real>
		224	cycles - future versions of this module might chose to decode cyclic data
		225	structures using weak references when this option is off, instead of
		226	throwing an error.
		227
		228	This option does not affect C<encode> in any way - shared values and
		229	references will always be encoded properly if present.
		230
		231	=item $cbor = $cbor->pack_strings ([$enable])
		232
		233	=item $enabled = $cbor->get_pack_strings
		234
		235	If C<$enable> is true (or missing), then C<encode> will try not to encode
		236	the same string twice, but will instead encode a reference to the string
		237	instead. Depending on your data format, this can save a lot of space, but
		238	also results in a very large runtime overhead (expect encoding times to be
		239	2-4 times as high as without).
		240
		241	It is recommended to leave it off unless you know your
		242	communications partner supports the stringref extension to CBOR
		243	(L<http://cbor.schmorp.de/stringref>), as without decoder support, the
		244	resulting data structure might not be usable.
		245
		246	If C<$enable> is false (the default), then C<encode> will encode strings
		247	the standard CBOR way.
		248
		249	This option does not affect C<decode> in any way - string references will
		250	always be decoded properly if present.
		251
		252	=item $cbor = $cbor->text_keys ([$enable])
		253
		254	=item $enabled = $cbor->get_text_keys
		255
		256	If C<$enabled> is true (or missing), then C<encode> will encode all
		257	perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed.
		258
		259	If C<$enable> is false (the default), then C<encode> will encode hash keys
		260	normally - upgraded perl strings (strings internally encoded as UTF-8) as
		261	CBOR text strings, and downgraded perl strings as CBOR byte strings.
		262
		263	This option does not affect C<decode> in any way.
		264
		265	This option is useful for interoperability with CBOR decoders that don't
		266	treat byte strings as a form of text. It is especially useful as Perl
		267	gives very little control over hash keys.
		268
		269	Enabling this option can be slow, as all downgraded hash keys that are
		270	encoded need to be scanned and converted to UTF-8.
		271
		272	=item $cbor = $cbor->text_strings ([$enable])
		273
		274	=item $enabled = $cbor->get_text_strings
		275
		276	This option works similar to C<text_keys>, above, but works on all strings
		277	(including hash keys), so C<text_keys> has no further effect after
		278	enabling C<text_strings>.
		279
		280	If C<$enabled> is true (or missing), then C<encode> will encode all perl
		281	strings as CBOR text strings/UTF-8 strings, upgrading them as needed.
		282
		283	If C<$enable> is false (the default), then C<encode> will encode strings
		284	normally (but see C<text_keys>) - upgraded perl strings (strings
		285	internally encoded as UTF-8) as CBOR text strings, and downgraded perl
		286	strings as CBOR byte strings.
		287
		288	This option does not affect C<decode> in any way.
		289
		290	This option has similar advantages and disadvantages as C<text_keys>. In
		291	addition, this option effectively removes the ability to encode byte
		292	strings, which might break some C<FREEZE> and C<TO_CBOR> methods that rely
		293	on this, such as bignum encoding, so this option is mainly useful for very
		294	simple data.
		295
		296	=item $cbor = $cbor->validate_utf8 ([$enable])
		297
		298	=item $enabled = $cbor->get_validate_utf8
		299
		300	If C<$enable> is true (or missing), then C<decode> will validate that
		301	elements (text strings) containing UTF-8 data in fact contain valid UTF-8
		302	data (instead of blindly accepting it). This validation obviously takes
		303	extra time during decoding.
		304
		305	The concept of "valid UTF-8" used is perl's concept, which is a superset
		306	of the official UTF-8.
		307
		308	If C<$enable> is false (the default), then C<decode> will blindly accept
		309	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
		310	regardless of whether that's true or not.
		311
		312	Perl isn't too happy about corrupted UTF-8 in strings, but should
		313	generally not crash or do similarly evil things. Extensions might be not
		314	so forgiving, so it's recommended to turn on this setting if you receive
		315	untrusted CBOR.
		316
		317	This option does not affect C<encode> in any way - strings that are
		318	supposedly valid UTF-8 will simply be dumped into the resulting CBOR
		319	string without checking whether that is, in fact, true or not.
		320
		321	=item $cbor = $cbor->filter ([$cb->($tag, $value)])
		322
		323	=item $cb_or_undef = $cbor->get_filter
		324
		325	Sets or replaces the tagged value decoding filter (when C<$cb> is
		326	specified) or clears the filter (if no argument or C<undef> is provided).
		327
		328	The filter callback is called only during decoding, when a non-enforced
		329	tagged value has been decoded (see L<TAG HANDLING AND EXTENSIONS> for a
		330	list of enforced tags). For specific tags, it's often better to provide a
		331	default converter using the C<%CBOR::XS::FILTER> hash (see below).
		332
		333	The first argument is the numerical tag, the second is the (decoded) value
		334	that has been tagged.
		335
		336	The filter function should return either exactly one value, which will
		337	replace the tagged value in the decoded data structure, or no values,
		338	which will result in default handling, which currently means the decoder
		339	creates a C<CBOR::XS::Tagged> object to hold the tag and the value.
		340
		341	When the filter is cleared (the default state), the default filter
		342	function, C<CBOR::XS::default_filter>, is used. This function simply looks
		343	up the tag in the C<%CBOR::XS::FILTER> hash. If an entry exists it must be
		344	a code reference that is called with tag and value, and is responsible for
		345	decoding the value. If no entry exists, it returns no values.
		346
		347	Example: decode all tags not handled internally into C<CBOR::XS::Tagged>
		348	objects, with no other special handling (useful when working with
		349	potentially "unsafe" CBOR data).
		350
		351	CBOR::XS->new->filter (sub { })->decode ($cbor_data);
		352
		353	Example: provide a global filter for tag 1347375694, converting the value
		354	into some string form.
		355
		356	$CBOR::XS::FILTER{1347375694} = sub {
		357	my ($tag, $value);
		358
		359	"tag 1347375694 value $value"
		360	};
		361
126	=item $cbor_data = $cbor->encode ($perl_scalar)	362	=item $cbor_data = $cbor->encode ($perl_scalar)
127		363
128	Converts the given Perl data structure (a scalar value) to its CBOR	364	Converts the given Perl data structure (a scalar value) to its CBOR
129	representation.	365	representation.
130		366
…		…
143	and you need to know where the first CBOR string ends amd the next one	379	and you need to know where the first CBOR string ends amd the next one
144	starts.	380	starts.
145		381
146	CBOR::XS->new->decode_prefix ("......")	382	CBOR::XS->new->decode_prefix ("......")
147	=> ("...", 3)	383	=> ("...", 3)
		384
		385	=back
		386
		387	=head2 INCREMENTAL PARSING
		388
		389	In some cases, there is the need for incremental parsing of JSON
		390	texts. While this module always has to keep both CBOR text and resulting
		391	Perl data structure in memory at one time, it does allow you to parse a
		392	CBOR stream incrementally, using a similar to using "decode_prefix" to see
		393	if a full CBOR object is available, but is much more efficient.
		394
		395	It basically works by parsing as much of a CBOR string as possible - if
		396	the CBOR data is not complete yet, the pasrer will remember where it was,
		397	to be able to restart when more data has been accumulated. Once enough
		398	data is available to either decode a complete CBOR value or raise an
		399	error, a real decode will be attempted.
		400
		401	A typical use case would be a network protocol that consists of sending
		402	and receiving CBOR-encoded messages. The solution that works with CBOR and
		403	about anything else is by prepending a length to every CBOR value, so the
		404	receiver knows how many octets to read. More compact (and slightly slower)
		405	would be to just send CBOR values back-to-back, as C<CBOR::XS> knows where
		406	a CBOR value ends, and doesn't need an explicit length.
		407
		408	The following methods help with this:
		409
		410	=over 4
		411
		412	=item @decoded = $cbor->incr_parse ($buffer)
		413
		414	This method attempts to decode exactly one CBOR value from the beginning
		415	of the given C<$buffer>. The value is removed from the C<$buffer> on
		416	success. When C<$buffer> doesn't contain a complete value yet, it returns
		417	nothing. Finally, when the C<$buffer> doesn't start with something
		418	that could ever be a valid CBOR value, it raises an exception, just as
		419	C<decode> would. In the latter case the decoder state is undefined and
		420	must be reset before being able to parse further.
		421
		422	This method modifies the C<$buffer> in place. When no CBOR value can be
		423	decoded, the decoder stores the current string offset. On the next call,
		424	continues decoding at the place where it stopped before. For this to make
		425	sense, the C<$buffer> must begin with the same octets as on previous
		426	unsuccessful calls.
		427
		428	You can call this method in scalar context, in which case it either
		429	returns a decoded value or C<undef>. This makes it impossible to
		430	distinguish between CBOR null values (which decode to C<undef>) and an
		431	unsuccessful decode, which is often acceptable.
		432
		433	=item @decoded = $cbor->incr_parse_multiple ($buffer)
		434
		435	Same as C<incr_parse>, but attempts to decode as many CBOR values as
		436	possible in one go, instead of at most one. Calls to C<incr_parse> and
		437	C<incr_parse_multiple> can be interleaved.
		438
		439	=item $cbor->incr_reset
		440
		441	Resets the incremental decoder. This throws away any saved state, so that
		442	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
		443	a new CBOR value from the beginning of the C<$buffer> again.
		444
		445	This method can be caled at any time, but it I<must> be called if you want
		446	to change your C<$buffer> or there was a decoding error and you want to
		447	reuse the C<$cbor> object for future incremental parsings.
148		448
149	=back	449	=back
150		450
151		451
152	=head1 MAPPING	452	=head1 MAPPING
…		…
170	CBOR integers become (numeric) perl scalars. On perls without 64 bit	470	CBOR integers become (numeric) perl scalars. On perls without 64 bit
171	support, 64 bit integers will be truncated or otherwise corrupted.	471	support, 64 bit integers will be truncated or otherwise corrupted.
172		472
173	=item byte strings	473	=item byte strings
174		474
175	Byte strings will become octet strings in Perl (the byte values 0..255	475	Byte strings will become octet strings in Perl (the Byte values 0..255
176	will simply become characters of the same value in Perl).	476	will simply become characters of the same value in Perl).
177		477
178	=item UTF-8 strings	478	=item UTF-8 strings
179		479
180	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be	480	UTF-8 strings in CBOR will be decoded, i.e. the UTF-8 octets will be
…		…
186		486
187	CBOR arrays and CBOR maps will be converted into references to a Perl	487	CBOR arrays and CBOR maps will be converted into references to a Perl
188	array or hash, respectively. The keys of the map will be stringified	488	array or hash, respectively. The keys of the map will be stringified
189	during this process.	489	during this process.
190		490
		491	=item null
		492
		493	CBOR null becomes C<undef> in Perl.
		494
191	=item true, false	495	=item true, false, undefined
192		496
193	These CBOR values become C<CBOR::XS::true> and C<CBOR::XS::false>,	497	These CBOR values become C<Types:Serialiser::true>,
		498	C<Types:Serialiser::false> and C<Types::Serialiser::error>,
194	respectively. They are overloaded to act almost exactly like the numbers	499	respectively. They are overloaded to act almost exactly like the numbers
195	C<1> and C<0>. You can check whether a scalar is a CBOR boolean by using	500	C<1> and C<0> (for true and false) or to throw an exception on access (for
196	the C<CBOR::XS::is_bool> function.	501	error). See the L<Types::Serialiser> manpage for details.
197		502
198	=item null, undefined	503	=item tagged values
199		504
200	CBOR null and undefined values becomes C<undef> in Perl (in the future,
201	Undefined may raise an exception or something else).
202
203	=item tags
204
205	Tagged items consists of a numeric tag and another CBOR value. The tag	505	Tagged items consists of a numeric tag and another CBOR value.
206	55799 is ignored (this tag implements the magic header).
207		506
208	All other tags are currently converted into a L<CBOR::XS::Tagged> object,	507	See L<TAG HANDLING AND EXTENSIONS> and the description of C<< ->filter >>
209	which is simply a blessed array reference consistsing of the numeric tag	508	for details on which tags are handled how.
210	value followed by the (decoded) BOR value.
211		509
212	=item anything else	510	=item anything else
213		511
214	Anything else (e.g. unsupported simple values) will raise a decoding	512	Anything else (e.g. unsupported simple values) will raise a decoding
215	error.	513	error.
…		…
218		516
219		517
220	=head2 PERL -> CBOR	518	=head2 PERL -> CBOR
221		519
222	The mapping from Perl to CBOR is slightly more difficult, as Perl is a	520	The mapping from Perl to CBOR is slightly more difficult, as Perl is a
223	truly typeless language, so we can only guess which CBOR type is meant by	521	typeless language. That means this module can only guess which CBOR type
224	a Perl value.	522	is meant by a perl value.
225		523
226	=over 4	524	=over 4
227		525
228	=item hash references	526	=item hash references
229		527
230	Perl hash references become CBOR maps. As there is no inherent ordering in	528	Perl hash references become CBOR maps. As there is no inherent ordering in
231	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random	529	hash keys (or CBOR maps), they will usually be encoded in a pseudo-random
232	order.	530	order. This order can be different each time a hash is encoded.
233		531
234	Currently, tied hashes will use the indefinite-length format, while normal	532	Currently, tied hashes will use the indefinite-length format, while normal
235	hashes will use the fixed-length format.	533	hashes will use the fixed-length format.
236		534
237	=item array references	535	=item array references
238		536
239	Perl array references become fixed-length CBOR arrays.	537	Perl array references become fixed-length CBOR arrays.
240		538
241	=item other references	539	=item other references
242		540
243	Other unblessed references are generally not allowed and will cause an	541	Other unblessed references will be represented using
244	exception to be thrown, except for references to the integers C<0> and	542	the indirection tag extension (tag value C<22098>,
245	C<1>, which get turned into false and true in CBOR.	543	L<http://cbor.schmorp.de/indirection>). CBOR decoders are guaranteed
		544	to be able to decode these values somehow, by either "doing the right
		545	thing", decoding into a generic tagged object, simply ignoring the tag, or
		546	something else.
246		547
247	=item CBOR::XS::Tagged objects	548	=item CBOR::XS::Tagged objects
248		549
249	Objects of this type must be arrays consisting of a single C<[tag, value]>	550	Objects of this type must be arrays consisting of a single C<[tag, value]>
250	pair. The (numerical) tag will be encoded as a CBOR tag, the value will be	551	pair. The (numerical) tag will be encoded as a CBOR tag, the value will
251	encoded as appropriate for the value.	552	be encoded as appropriate for the value. You must use C<CBOR::XS::tag> to
		553	create such objects.
252		554
253	=item CBOR::XS::true, CBOR::XS::false	555	=item Types::Serialiser::true, Types::Serialiser::false, Types::Serialiser::error
254		556
255	These special values become CBOR true and CBOR false values,	557	These special values become CBOR true, CBOR false and CBOR undefined
256	respectively. You can also use C<\1> and C<\0> directly if you want.	558	values, respectively. You can also use C<\1>, C<\0> and C<\undef> directly
		559	if you want.
257		560
258	=item blessed objects	561	=item other blessed objects
259		562
260	Other blessed objects currently need to have a C<TO_CBOR> method. It	563	Other blessed objects are serialised via C<TO_CBOR> or C<FREEZE>. See
261	will be called on every object that is being serialised, and must return	564	L<TAG HANDLING AND EXTENSIONS> for specific classes handled by this
262	something that can be encoded in CBOR.	565	module, and L<OBJECT SERIALISATION> for generic object serialisation.
263		566
264	=item simple scalars	567	=item simple scalars
265		568
266	TODO
267	Simple Perl scalars (any scalar that is not a reference) are the most	569	Simple Perl scalars (any scalar that is not a reference) are the most
268	difficult objects to encode: CBOR::XS will encode undefined scalars as	570	difficult objects to encode: CBOR::XS will encode undefined scalars as
269	CBOR null values, scalars that have last been used in a string context	571	CBOR null values, scalars that have last been used in a string context
270	before encoding as CBOR strings, and anything else as number value:	572	before encoding as CBOR strings, and anything else as number value:
271		573
272	# dump as number	574	# dump as number
273	encode_cbor [2] # yields [2]	575	encode_cbor [2] # yields [2]
274	encode_cbor [-3.0e17] # yields [-3e+17]	576	encode_cbor [-3.0e17] # yields [-3e+17]
275	my $value = 5; encode_cbor [$value] # yields [5]	577	my $value = 5; encode_cbor [$value] # yields [5]
276		578
277	# used as string, so dump as string	579	# used as string, so dump as string (either byte or text)
278	print $value;	580	print $value;
279	encode_cbor [$value] # yields ["5"]	581	encode_cbor [$value] # yields ["5"]
280		582
281	# undef becomes null	583	# undef becomes null
282	encode_cbor [undef] # yields [null]	584	encode_cbor [undef] # yields [null]
…		…
285		587
286	my $x = 3.1; # some variable containing a number	588	my $x = 3.1; # some variable containing a number
287	"$x"; # stringified	589	"$x"; # stringified
288	$x .= ""; # another, more awkward way to stringify	590	$x .= ""; # another, more awkward way to stringify
289	print $x; # perl does it for you, too, quite often	591	print $x; # perl does it for you, too, quite often
		592
		593	You can force whether a string is encoded as byte or text string by using
		594	C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled):
		595
		596	utf8::upgrade $x; # encode $x as text string
		597	utf8::downgrade $x; # encode $x as byte string
		598
		599	Perl doesn't define what operations up- and downgrade strings, so if the
		600	difference between byte and text is important, you should up- or downgrade
		601	your string as late as possible before encoding. You can also force the
		602	use of CBOR text strings by using C<text_keys> or C<text_strings>.
290		603
291	You can force the type to be a CBOR number by numifying it:	604	You can force the type to be a CBOR number by numifying it:
292		605
293	my $x = "3"; # some variable containing a string	606	my $x = "3"; # some variable containing a string
294	$x += 0; # numify it, ensuring it will be dumped as a number	607	$x += 0; # numify it, ensuring it will be dumped as a number
…		…
305	represent numerical values are supported, but might suffer loss of	618	represent numerical values are supported, but might suffer loss of
306	precision.	619	precision.
307		620
308	=back	621	=back
309		622
		623	=head2 OBJECT SERIALISATION
310		624
		625	This module implements both a CBOR-specific and the generic
		626	L<Types::Serialier> object serialisation protocol. The following
		627	subsections explain both methods.
		628
		629	=head3 ENCODING
		630
		631	This module knows two way to serialise a Perl object: The CBOR-specific
		632	way, and the generic way.
		633
		634	Whenever the encoder encounters a Perl object that it cannot serialise
		635	directly (most of them), it will first look up the C<TO_CBOR> method on
		636	it.
		637
		638	If it has a C<TO_CBOR> method, it will call it with the object as only
		639	argument, and expects exactly one return value, which it will then
		640	substitute and encode it in the place of the object.
		641
		642	Otherwise, it will look up the C<FREEZE> method. If it exists, it will
		643	call it with the object as first argument, and the constant string C<CBOR>
		644	as the second argument, to distinguish it from other serialisers.
		645
		646	The C<FREEZE> method can return any number of values (i.e. zero or
		647	more). These will be encoded as CBOR perl object, together with the
		648	classname.
		649
		650	These methods I<MUST NOT> change the data structure that is being
		651	serialised. Failure to comply to this can result in memory corruption -
		652	and worse.
		653
		654	If an object supports neither C<TO_CBOR> nor C<FREEZE>, encoding will fail
		655	with an error.
		656
		657	=head3 DECODING
		658
		659	Objects encoded via C<TO_CBOR> cannot (normally) be automatically decoded,
		660	but objects encoded via C<FREEZE> can be decoded using the following
		661	protocol:
		662
		663	When an encoded CBOR perl object is encountered by the decoder, it will
		664	look up the C<THAW> method, by using the stored classname, and will fail
		665	if the method cannot be found.
		666
		667	After the lookup it will call the C<THAW> method with the stored classname
		668	as first argument, the constant string C<CBOR> as second argument, and all
		669	values returned by C<FREEZE> as remaining arguments.
		670
		671	=head3 EXAMPLES
		672
		673	Here is an example C<TO_CBOR> method:
		674
		675	sub My::Object::TO_CBOR {
		676	my ($obj) = @_;
		677
		678	["this is a serialised My::Object object", $obj->{id}]
		679	}
		680
		681	When a C<My::Object> is encoded to CBOR, it will instead encode a simple
		682	array with two members: a string, and the "object id". Decoding this CBOR
		683	string will yield a normal perl array reference in place of the object.
		684
		685	A more useful and practical example would be a serialisation method for
		686	the URI module. CBOR has a custom tag value for URIs, namely 32:
		687
		688	sub URI::TO_CBOR {
		689	my ($self) = @_;
		690	my $uri = "$self"; # stringify uri
		691	utf8::upgrade $uri; # make sure it will be encoded as UTF-8 string
		692	CBOR::XS::tag 32, "$_[0]"
		693	}
		694
		695	This will encode URIs as a UTF-8 string with tag 32, which indicates an
		696	URI.
		697
		698	Decoding such an URI will not (currently) give you an URI object, but
		699	instead a CBOR::XS::Tagged object with tag number 32 and the string -
		700	exactly what was returned by C<TO_CBOR>.
		701
		702	To serialise an object so it can automatically be deserialised, you need
		703	to use C<FREEZE> and C<THAW>. To take the URI module as example, this
		704	would be a possible implementation:
		705
		706	sub URI::FREEZE {
		707	my ($self, $serialiser) = @_;
		708	"$self" # encode url string
		709	}
		710
		711	sub URI::THAW {
		712	my ($class, $serialiser, $uri) = @_;
		713	$class->new ($uri)
		714	}
		715
		716	Unlike C<TO_CBOR>, multiple values can be returned by C<FREEZE>. For
		717	example, a C<FREEZE> method that returns "type", "id" and "variant" values
		718	would cause an invocation of C<THAW> with 5 arguments:
		719
		720	sub My::Object::FREEZE {
		721	my ($self, $serialiser) = @_;
		722
		723	($self->{type}, $self->{id}, $self->{variant})
		724	}
		725
		726	sub My::Object::THAW {
		727	my ($class, $serialiser, $type, $id, $variant) = @_;
		728
		729	$class-<new (type => $type, id => $id, variant => $variant)
		730	}
		731
		732
311	=head2 MAGIC HEADER	733	=head1 MAGIC HEADER
312		734
313	There is no way to distinguish CBOR from other formats	735	There is no way to distinguish CBOR from other formats
314	programmatically. To make it easier to distinguish CBOR from other	736	programmatically. To make it easier to distinguish CBOR from other
315	formats, the CBOR specification has a special "magic string" that can be	737	formats, the CBOR specification has a special "magic string" that can be
316	prepended to any CBOR string without changing it's meaning.	738	prepended to any CBOR string without changing its meaning.
317		739
318	This string is available as C<$CBOR::XS::MAGIC>. This module does not	740	This string is available as C<$CBOR::XS::MAGIC>. This module does not
319	prepend this string tot he CBOR data it generates, but it will ignroe it	741	prepend this string to the CBOR data it generates, but it will ignore it
320	if present, so users can prepend this string as a "file type" indicator as	742	if present, so users can prepend this string as a "file type" indicator as
321	required.	743	required.
322		744
323		745
		746	=head1 THE CBOR::XS::Tagged CLASS
		747
		748	CBOR has the concept of tagged values - any CBOR value can be tagged with
		749	a numeric 64 bit number, which are centrally administered.
		750
		751	C<CBOR::XS> handles a few tags internally when en- or decoding. You can
		752	also create tags yourself by encoding C<CBOR::XS::Tagged> objects, and the
		753	decoder will create C<CBOR::XS::Tagged> objects itself when it hits an
		754	unknown tag.
		755
		756	These objects are simply blessed array references - the first member of
		757	the array being the numerical tag, the second being the value.
		758
		759	You can interact with C<CBOR::XS::Tagged> objects in the following ways:
		760
		761	=over 4
		762
		763	=item $tagged = CBOR::XS::tag $tag, $value
		764
		765	This function(!) creates a new C<CBOR::XS::Tagged> object using the given
		766	C<$tag> (0..2**64-1) to tag the given C<$value> (which can be any Perl
		767	value that can be encoded in CBOR, including serialisable Perl objects and
		768	C<CBOR::XS::Tagged> objects).
		769
		770	=item $tagged->[0]
		771
		772	=item $tagged->[0] = $new_tag
		773
		774	=item $tag = $tagged->tag
		775
		776	=item $new_tag = $tagged->tag ($new_tag)
		777
		778	Access/mutate the tag.
		779
		780	=item $tagged->[1]
		781
		782	=item $tagged->[1] = $new_value
		783
		784	=item $value = $tagged->value
		785
		786	=item $new_value = $tagged->value ($new_value)
		787
		788	Access/mutate the tagged value.
		789
		790	=back
		791
		792	=cut
		793
		794	sub tag($$) {
		795	bless [@_], CBOR::XS::Tagged::;
		796	}
		797
		798	sub CBOR::XS::Tagged::tag {
		799	$_[0][0] = $_[1] if $#_;
		800	$_[0][0]
		801	}
		802
		803	sub CBOR::XS::Tagged::value {
		804	$_[0][1] = $_[1] if $#_;
		805	$_[0][1]
		806	}
		807
		808	=head2 EXAMPLES
		809
		810	Here are some examples of C<CBOR::XS::Tagged> uses to tag objects.
		811
		812	You can look up CBOR tag value and emanings in the IANA registry at
		813	L<http://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>.
		814
		815	Prepend a magic header (C<$CBOR::XS::MAGIC>):
		816
		817	my $cbor = encode_cbor CBOR::XS::tag 55799, $value;
		818	# same as:
		819	my $cbor = $CBOR::XS::MAGIC . encode_cbor $value;
		820
		821	Serialise some URIs and a regex in an array:
		822
		823	my $cbor = encode_cbor [
		824	(CBOR::XS::tag 32, "http://www.nethype.de/"),
		825	(CBOR::XS::tag 32, "http://software.schmorp.de/"),
		826	(CBOR::XS::tag 35, "^[Pp][Ee][Rr][lL]\$"),
		827	];
		828
		829	Wrap CBOR data in CBOR:
		830
		831	my $cbor_cbor = encode_cbor
		832	CBOR::XS::tag 24,
		833	encode_cbor [1, 2, 3];
		834
		835	=head1 TAG HANDLING AND EXTENSIONS
		836
		837	This section describes how this module handles specific tagged values
		838	and extensions. If a tag is not mentioned here and no additional filters
		839	are provided for it, then the default handling applies (creating a
		840	CBOR::XS::Tagged object on decoding, and only encoding the tag when
		841	explicitly requested).
		842
		843	Tags not handled specifically are currently converted into a
		844	L<CBOR::XS::Tagged> object, which is simply a blessed array reference
		845	consisting of the numeric tag value followed by the (decoded) CBOR value.
		846
		847	Future versions of this module reserve the right to special case
		848	additional tags (such as base64url).
		849
		850	=head2 ENFORCED TAGS
		851
		852	These tags are always handled when decoding, and their handling cannot be
		853	overridden by the user.
		854
		855	=over 4
		856
		857	=item 26 (perl-object, L<http://cbor.schmorp.de/perl-object>)
		858
		859	These tags are automatically created (and decoded) for serialisable
		860	objects using the C<FREEZE/THAW> methods (the L<Types::Serialier> object
		861	serialisation protocol). See L<OBJECT SERIALISATION> for details.
		862
		863	=item 28, 29 (shareable, sharedref, L<http://cbor.schmorp.de/value-sharing>)
		864
		865	These tags are automatically decoded when encountered (and they do not
		866	result in a cyclic data structure, see C<allow_cycles>), resulting in
		867	shared values in the decoded object. They are only encoded, however, when
		868	C<allow_sharing> is enabled.
		869
		870	Not all shared values can be successfully decoded: values that reference
		871	themselves will I<currently> decode as C<undef> (this is not the same
		872	as a reference pointing to itself, which will be represented as a value
		873	that contains an indirect reference to itself - these will be decoded
		874	properly).
		875
		876	Note that considerably more shared value data structures can be decoded
		877	than will be encoded - currently, only values pointed to by references
		878	will be shared, others will not. While non-reference shared values can be
		879	generated in Perl with some effort, they were considered too unimportant
		880	to be supported in the encoder. The decoder, however, will decode these
		881	values as shared values.
		882
		883	=item 256, 25 (stringref-namespace, stringref, L<http://cbor.schmorp.de/stringref>)
		884
		885	These tags are automatically decoded when encountered. They are only
		886	encoded, however, when C<pack_strings> is enabled.
		887
		888	=item 22098 (indirection, L<http://cbor.schmorp.de/indirection>)
		889
		890	This tag is automatically generated when a reference are encountered (with
		891	the exception of hash and array references). It is converted to a reference
		892	when decoding.
		893
		894	=item 55799 (self-describe CBOR, RFC 7049)
		895
		896	This value is not generated on encoding (unless explicitly requested by
		897	the user), and is simply ignored when decoding.
		898
		899	=back
		900
		901	=head2 NON-ENFORCED TAGS
		902
		903	These tags have default filters provided when decoding. Their handling can
		904	be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by
		905	providing a custom C<filter> callback when decoding.
		906
		907	When they result in decoding into a specific Perl class, the module
		908	usually provides a corresponding C<TO_CBOR> method as well.
		909
		910	When any of these need to load additional modules that are not part of the
		911	perl core distribution (e.g. L<URI>), it is (currently) up to the user to
		912	provide these modules. The decoding usually fails with an exception if the
		913	required module cannot be loaded.
		914
		915	=over 4
		916
		917	=item 0, 1 (date/time string, seconds since the epoch)
		918
		919	These tags are decoded into L<Time::Piece> objects. The corresponding
		920	C<Time::Piece::TO_CBOR> method always encodes into tag 1 values currently.
		921
		922	The L<Time::Piece> API is generally surprisingly bad, and fractional
		923	seconds are only accidentally kept intact, so watch out. On the plus side,
		924	the module comes with perl since 5.10, which has to count for something.
		925
		926	=item 2, 3 (positive/negative bignum)
		927
		928	These tags are decoded into L<Math::BigInt> objects. The corresponding
		929	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
		930	integers, and others into positive/negative CBOR bignums.
		931
		932	=item 4, 5 (decimal fraction/bigfloat)
		933
		934	Both decimal fractions and bigfloats are decoded into L<Math::BigFloat>
		935	objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always>
		936	encodes into a decimal fraction.
		937
		938	CBOR cannot represent bigfloats with I<very> large exponents - conversion
		939	of such big float objects is undefined.
		940
		941	Also, NaN and infinities are not encoded properly.
		942
		943	=item 21, 22, 23 (expected later JSON conversion)
		944
		945	CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore these
		946	tags.
		947
		948	=item 32 (URI)
		949
		950	These objects decode into L<URI> objects. The corresponding
		951	C<URI::TO_CBOR> method again results in a CBOR URI value.
		952
		953	=back
		954
		955	=cut
		956
324	=head2 CBOR and JSON	957	=head1 CBOR and JSON
325		958
326	CBOR is supposed to implement a superset of the JSON data model, and is,	959	CBOR is supposed to implement a superset of the JSON data model, and is,
327	with some coercion, able to represent all JSON texts (something that other	960	with some coercion, able to represent all JSON texts (something that other
328	"binary JSON" formats such as BSON generally do not support).	961	"binary JSON" formats such as BSON generally do not support).
329		962
…		…
388	properly. Half precision types are accepted, but not encoded.	1021	properly. Half precision types are accepted, but not encoded.
389		1022
390	Strict mode and canonical mode are not implemented.	1023	Strict mode and canonical mode are not implemented.
391		1024
392		1025
		1026	=head1 LIMITATIONS ON PERLS WITHOUT 64-BIT INTEGER SUPPORT
		1027
		1028	On perls that were built without 64 bit integer support (these are rare
		1029	nowadays, even on 32 bit architectures, as all major Perl distributions
		1030	are built with 64 bit integer support), support for any kind of 64 bit
		1031	integer in CBOR is very limited - most likely, these 64 bit values will
		1032	be truncated, corrupted, or otherwise not decoded correctly. This also
		1033	includes string, array and map sizes that are stored as 64 bit integers.
		1034
		1035
393	=head1 THREADS	1036	=head1 THREADS
394		1037
395	This module is I<not> guaranteed to be thread safe and there are no	1038	This module is I<not> guaranteed to be thread safe and there are no
396	plans to change this until Perl gets thread support (as opposed to the	1039	plans to change this until Perl gets thread support (as opposed to the
397	horribly slow so-called "threads" which are simply slow and bloated	1040	horribly slow so-called "threads" which are simply slow and bloated
…		…
409	Please refrain from using rt.cpan.org or any other bug reporting	1052	Please refrain from using rt.cpan.org or any other bug reporting
410	service. I put the contact address into my modules for a reason.	1053	service. I put the contact address into my modules for a reason.
411		1054
412	=cut	1055	=cut
413		1056
414	our $true = do { bless \(my $dummy = 1), "CBOR::XS::Boolean" };	1057	our %FILTER = (
415	our $false = do { bless \(my $dummy = 0), "CBOR::XS::Boolean" };	1058	0 => sub { # rfc4287 datetime, utf-8
		1059	require Time::Piece;
		1060	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
		1061	# from FreeBSD, which can't parse ISO 8601, RFC3339, RFC4287 or much of anything
		1062	# else either. Whats incredibe over standard strptime totally escapes me.
		1063	# doesn't do fractional times, either. sigh.
		1064	# In fact, it's all a lie, it uses whatever strptime it wants, and of course,
		1065	# they are all incompatible. The openbsd one simply ignores %z (but according to the
		1066	# docs, it would be much more incredibly flexible indeed. If it worked, that is.).
		1067	scalar eval {
		1068	my $s = $_[1];
416		1069
417	sub true() { $true }	1070	$s =~ s/Z$/+00:00/;
418	sub false() { $false }	1071	$s =~ s/(\.[0-9]+)?([+-][0-9][0-9]):([0-9][0-9])$//
		1072	or die;
419		1073
420	sub is_bool($) {	1074	my $b = $1 - ($2 * 60 + $3) * 60; # fractional part + offset. hopefully
421	UNIVERSAL::isa $_[0], "CBOR::XS::Boolean"	1075	my $d = Time::Piece->strptime ($s, "%Y-%m-%dT%H:%M:%S");
422	# or UNIVERSAL::isa $_[0], "CBOR::Literal"	1076
		1077	Time::Piece::gmtime ($d->epoch + $b)
		1078	} \|\| die "corrupted CBOR date/time string ($_[0])";
		1079	},
		1080
		1081	1 => sub { # seconds since the epoch, possibly fractional
		1082	require Time::Piece;
		1083	scalar Time::Piece::gmtime (pop)
		1084	},
		1085
		1086	2 => sub { # pos bigint
		1087	require Math::BigInt;
		1088	Math::BigInt->new ("0x" . unpack "H*", pop)
		1089	},
		1090
		1091	3 => sub { # neg bigint
		1092	require Math::BigInt;
		1093	-Math::BigInt->new ("0x" . unpack "H*", pop)
		1094	},
		1095
		1096	4 => sub { # decimal fraction, array
		1097	require Math::BigFloat;
		1098	Math::BigFloat->new ($_[1][1] . "E" . $_[1][0])
		1099	},
		1100
		1101	5 => sub { # bigfloat, array
		1102	require Math::BigFloat;
		1103	scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0])
		1104	},
		1105
		1106	21 => sub { pop }, # expected conversion to base64url encoding
		1107	22 => sub { pop }, # expected conversion to base64 encoding
		1108	23 => sub { pop }, # expected conversion to base16 encoding
		1109
		1110	# 24 # embedded cbor, byte string
		1111
		1112	32 => sub {
		1113	require URI;
		1114	URI->new (pop)
		1115	},
		1116
		1117	# 33 # base64url rfc4648, utf-8
		1118	# 34 # base64 rfc46484, utf-8
		1119	# 35 # regex pcre/ecma262, utf-8
		1120	# 36 # mime message rfc2045, utf-8
		1121	);
		1122
		1123	sub CBOR::XS::default_filter {
		1124	&{ $FILTER{$_[0]} or return }
423	}	1125	}
424		1126
		1127	sub URI::TO_CBOR {
		1128	my $uri = $_[0]->as_string;
		1129	utf8::upgrade $uri;
		1130	tag 32, $uri
		1131	}
		1132
		1133	sub Math::BigInt::TO_CBOR {
		1134	if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
		1135	$_[0]->numify
		1136	} else {
		1137	my $hex = substr $_[0]->as_hex, 2;
		1138	$hex = "0$hex" if 1 & length $hex; # sigh
		1139	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
		1140	}
		1141	}
		1142
		1143	sub Math::BigFloat::TO_CBOR {
		1144	my ($m, $e) = $_[0]->parts;
		1145	tag 4, [$e->numify, $m]
		1146	}
		1147
		1148	sub Time::Piece::TO_CBOR {
		1149	tag 1, 0 + $_[0]->epoch
		1150	}
		1151
425	XSLoader::load "CBOR::XS", $VERSION;	1152	XSLoader::load "CBOR::XS", $VERSION;
426
427	package CBOR::XS::Boolean;
428
429	use overload
430	"0+" => sub { ${$_[0]} },
431	"++" => sub { $_[0] = ${$_[0]} + 1 },
432	"--" => sub { $_[0] = ${$_[0]} - 1 },
433	fallback => 1;
434
435	1;
436		1153
437	=head1 SEE ALSO	1154	=head1 SEE ALSO
438		1155
439	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,	1156	The L<JSON> and L<JSON::XS> modules that do similar, but human-readable,
440	serialisation.	1157	serialisation.
441		1158
		1159	The L<Types::Serialiser> module provides the data model for true, false
		1160	and error values.
		1161
442	=head1 AUTHOR	1162	=head1 AUTHOR
443		1163
444	Marc Lehmann <schmorp@schmorp.de>	1164	Marc Lehmann <schmorp@schmorp.de>
445	http://home.schmorp.de/	1165	http://home.schmorp.de/
446		1166
447	=cut	1167	=cut
448		1168
		1169	1
		1170

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.4 by root, Sat Oct 26 22:25:47 2013 UTC vs. Revision 1.54 by root, Mon Apr 25 18:24:44 2016 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.4 by root, Sat Oct 26 22:25:47 2013 UTC vs.
Revision 1.54 by root, Mon Apr 25 18:24:44 2016 UTC