[ViewVC] Diff of: cvs/CBOR-XS/XS.pm

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.31 by root, Sat Nov 30 18:13:53 2013 UTC vs.
Revision 1.40 by root, Sun Jan 5 14:24:54 2014 UTC

…		…
64		64
65	package CBOR::XS;	65	package CBOR::XS;
66		66
67	use common::sense;	67	use common::sense;
68		68
69	our $VERSION = '1.0';	69	our $VERSION = 1.25;
70	our @ISA = qw(Exporter);	70	our @ISA = qw(Exporter);
71		71
72	our @EXPORT = qw(encode_cbor decode_cbor);	72	our @EXPORT = qw(encode_cbor decode_cbor);
73		73
74	use Exporter;	74	use Exporter;
…		…
242	the standard CBOR way.	242	the standard CBOR way.
243		243
244	This option does not affect C<decode> in any way - string references will	244	This option does not affect C<decode> in any way - string references will
245	always be decoded properly if present.	245	always be decoded properly if present.
246		246
		247	=item $cbor = $cbor->validate_utf8 ([$enable])
		248
		249	=item $enabled = $cbor->get_validate_utf8
		250
		251	If C<$enable> is true (or missing), then C<decode> will validate that
		252	elements (text strings) containing UTF-8 data in fact contain valid UTF-8
		253	data (instead of blindly accepting it). This validation obviously takes
		254	extra time during decoding.
		255
		256	The concept of "valid UTF-8" used is perl's concept, which is a superset
		257	of the official UTF-8.
		258
		259	If C<$enable> is false (the default), then C<decode> will blindly accept
		260	UTF-8 data, marking them as valid UTF-8 in the resulting data structure
		261	regardless of whether thats true or not.
		262
		263	Perl isn't too happy about corrupted UTF-8 in strings, but should
		264	generally not crash or do similarly evil things. Extensions might be not
		265	so forgiving, so it's recommended to turn on this setting if you receive
		266	untrusted CBOR.
		267
		268	This option does not affect C<encode> in any way - strings that are
		269	supposedly valid UTF-8 will simply be dumped into the resulting CBOR
		270	string without checking whether that is, in fact, true or not.
		271
247	=item $cbor = $cbor->filter ([$cb->($tag, $value)])	272	=item $cbor = $cbor->filter ([$cb->($tag, $value)])
248		273
249	=item $cb_or_undef = $cbor->get_filter	274	=item $cb_or_undef = $cbor->get_filter
250		275
251	Sets or replaces the tagged value decoding filter (when C<$cb> is	276	Sets or replaces the tagged value decoding filter (when C<$cb> is
…		…
305	and you need to know where the first CBOR string ends amd the next one	330	and you need to know where the first CBOR string ends amd the next one
306	starts.	331	starts.
307		332
308	CBOR::XS->new->decode_prefix ("......")	333	CBOR::XS->new->decode_prefix ("......")
309	=> ("...", 3)	334	=> ("...", 3)
		335
		336	=back
		337
		338	=head2 INCREMENTAL PARSING
		339
		340	In some cases, there is the need for incremental parsing of JSON
		341	texts. While this module always has to keep both CBOR text and resulting
		342	Perl data structure in memory at one time, it does allow you to parse a
		343	CBOR stream incrementally, using a similar to using "decode_prefix" to see
		344	if a full CBOR object is available, but is much more efficient.
		345
		346	It basically works by parsing as much of a CBOR string as possible - if
		347	the CBOR data is not complete yet, the pasrer will remember where it was,
		348	to be able to restart when more data has been accumulated. Once enough
		349	data is available to either decode a complete CBOR value or raise an
		350	error, a real decode will be attempted.
		351
		352	A typical use case would be a network protocol that consists of sending
		353	and receiving CBOR-encoded messages. The solution that works with CBOR and
		354	about anything else is by prepending a length to every CBOR value, so the
		355	receiver knows how many octets to read. More compact (and slightly slower)
		356	would be to just send CBOR values back-to-back, as C<CBOR::XS> knows where
		357	a CBOR value ends, and doesn't need an explicit length.
		358
		359	The following methods help with this:
		360
		361	=over 4
		362
		363	=item @decoded = $cbor->incr_parse ($buffer)
		364
		365	This method attempts to decode exactly one CBOR value from the beginning
		366	of the given C<$buffer>. The value is removed from the C<$buffer> on
		367	success. When C<$buffer> doesn't contain a complete value yet, it returns
		368	nothing. Finally, when the C<$buffer> doesn't start with something
		369	that could ever be a valid CBOR value, it raises an exception, just as
		370	C<decode> would. In the latter case the decoder state is undefined and
		371	must be reset before being able to parse further.
		372
		373	This method modifies the C<$buffer> in place. When no CBOR value can be
		374	decoded, the decoder stores the current string offset. On the next call,
		375	continues decoding at the place where it stopped before. For this to make
		376	sense, the C<$buffer> must begin with the same octets as on previous
		377	unsuccessful calls.
		378
		379	You can call this method in scalar context, in which case it either
		380	returns a decoded value or C<undef>. This makes it impossible to
		381	distinguish between CBOR null values (which decode to C<undef>) and an
		382	unsuccessful decode, which is often acceptable.
		383
		384	=item @decoded = $cbor->incr_parse_multiple ($buffer)
		385
		386	Same as C<incr_parse>, but attempts to decode as many CBOR values as
		387	possible in one go, instead of at most one. Calls to C<incr_parse> and
		388	C<incr_parse_multiple> can be interleaved.
		389
		390	=item $cbor->incr_reset
		391
		392	Resets the incremental decoder. This throws away any saved state, so that
		393	subsequent calls to C<incr_parse> or C<incr_parse_multiple> start to parse
		394	a new CBOR value from the beginning of the C<$buffer> again.
		395
		396	This method can be caled at any time, but it I<must> be called if you want
		397	to change your C<$buffer> or there was a decoding error and you want to
		398	reuse the C<$cbor> object for future incremental parsings.
310		399
311	=back	400	=back
312		401
313		402
314	=head1 MAPPING	403	=head1 MAPPING
…		…
773	perl core distribution (e.g. L<URI>), it is (currently) up to the user to	862	perl core distribution (e.g. L<URI>), it is (currently) up to the user to
774	provide these modules. The decoding usually fails with an exception if the	863	provide these modules. The decoding usually fails with an exception if the
775	required module cannot be loaded.	864	required module cannot be loaded.
776		865
777	=over 4	866	=over 4
		867
		868	=item 0, 1 (date/time string, seconds since the epoch)
		869
		870	These tags are decoded into L<Time::Piece> objects. The corresponding
		871	C<Time::Piece::TO_CBOR> method always encodes into tag 1 values currently.
		872
		873	The L<Time::Piece> API is generally surprisingly bad, and fractional
		874	seconds are only accidentally kept intact, so watch out. On the plus side,
		875	the module comes with perl since 5.10, which has to count for something.
778		876
779	=item 2, 3 (positive/negative bignum)	877	=item 2, 3 (positive/negative bignum)
780		878
781	These tags are decoded into L<Math::BigInt> objects. The corresponding	879	These tags are decoded into L<Math::BigInt> objects. The corresponding
782	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR	880	C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR
…		…
947	service. I put the contact address into my modules for a reason.	1045	service. I put the contact address into my modules for a reason.
948		1046
949	=cut	1047	=cut
950		1048
951	our %FILTER = (	1049	our %FILTER = (
952	# 0 # rfc4287 datetime, utf-8	1050	0 => sub { # rfc4287 datetime, utf-8
953	# 1 # unix timestamp, any	1051	require Time::Piece;
		1052	# Time::Piece::Strptime uses the "incredibly flexible date parsing routine"
		1053	# from FreeBSD, which can't parse ISO 8601, RFC3339, RFC4287 or much of anything
		1054	# else either. Whats incredibe over standard strptime totally escapes me.
		1055	# doesn't do fractional times, either. sigh.
		1056	# In fact, it's all a lie, it uses whatever strptime it wants, and of course,
		1057	# they are all incomptible. The openbsd one simply ignores %z (but according to the
		1058	# docs, it would be much more incredibly flexible indeed. If it worked, that is.).
		1059	scalar eval {
		1060	my $s = $_[1];
		1061
		1062	$s =~ s/Z$/+00:00/;
		1063	$s =~ s/(\.[0-9]+)?([+-][0-9][0-9]):([0-9][0-9])$//
		1064	or die;
		1065
		1066	my $b = $1 - ($2 * 60 + $3) * 60; # fractional part + offset. hopefully
		1067	my $d = Time::Piece->strptime ($s, "%Y-%m-%dT%H:%M:%S");
		1068
		1069	Time::Piece::gmtime ($d->epoch + $b)
		1070	} \|\| die "corrupted CBOR date/time string ($_[0])";
		1071	},
		1072
		1073	1 => sub { # seconds since the epoch, possibly fractional
		1074	require Time::Piece;
		1075	scalar Time::Piece::gmtime (pop)
		1076	},
954		1077
955	2 => sub { # pos bigint	1078	2 => sub { # pos bigint
956	require Math::BigInt;	1079	require Math::BigInt;
957	Math::BigInt->new ("0x" . unpack "H*", pop)	1080	Math::BigInt->new ("0x" . unpack "H*", pop)
958	},	1081	},
…		…
994	}	1117	}
995		1118
996	sub URI::TO_CBOR {	1119	sub URI::TO_CBOR {
997	my $uri = $_[0]->as_string;	1120	my $uri = $_[0]->as_string;
998	utf8::upgrade $uri;	1121	utf8::upgrade $uri;
999	CBOR::XS::tag 32, $uri	1122	tag 32, $uri
1000	}	1123	}
1001		1124
1002	sub Math::BigInt::TO_CBOR {	1125	sub Math::BigInt::TO_CBOR {
1003	if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {	1126	if ($_[0] >= -2147483648 && $_[0] <= 2147483647) {
1004	$_[0]->numify	1127	$_[0]->numify
1005	} else {	1128	} else {
1006	my $hex = substr $_[0]->as_hex, 2;	1129	my $hex = substr $_[0]->as_hex, 2;
1007	$hex = "0$hex" if 1 & length $hex; # sigh	1130	$hex = "0$hex" if 1 & length $hex; # sigh
1008	CBOR::XS::tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex	1131	tag $_[0] >= 0 ? 2 : 3, pack "H*", $hex
1009	}	1132	}
1010	}	1133	}
1011		1134
1012	sub Math::BigFloat::TO_CBOR {	1135	sub Math::BigFloat::TO_CBOR {
1013	my ($m, $e) = $_[0]->parts;	1136	my ($m, $e) = $_[0]->parts;
1014	CBOR::XS::tag 4, [$e->numify, $m]	1137	tag 4, [$e->numify, $m]
		1138	}
		1139
		1140	sub Time::Piece::TO_CBOR {
		1141	tag 1, 0 + $_[0]->epoch
1015	}	1142	}
1016		1143
1017	XSLoader::load "CBOR::XS", $VERSION;	1144	XSLoader::load "CBOR::XS", $VERSION;
1018		1145
1019	=head1 SEE ALSO	1146	=head1 SEE ALSO

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing CBOR-XS/XS.pm (file contents): Revision 1.31 by root, Sat Nov 30 18:13:53 2013 UTC vs. Revision 1.40 by root, Sun Jan 5 14:24:54 2014 UTC

Diff Legend

Comparing CBOR-XS/XS.pm (file contents):
Revision 1.31 by root, Sat Nov 30 18:13:53 2013 UTC vs.
Revision 1.40 by root, Sun Jan 5 14:24:54 2014 UTC