| 1 |
root |
1.1 |
=head1 NAME |
| 2 |
|
|
|
| 3 |
|
|
Types::Serialiser - simple data types for common serialisation formats |
| 4 |
|
|
|
| 5 |
|
|
=encoding utf-8 |
| 6 |
|
|
|
| 7 |
|
|
=head1 SYNOPSIS |
| 8 |
|
|
|
| 9 |
|
|
=head1 DESCRIPTION |
| 10 |
|
|
|
| 11 |
|
|
This module provides some extra datatypes that are used by common |
| 12 |
|
|
serialisation formats such as JSON or CBOR. The idea is to have a |
| 13 |
|
|
repository of simple/small constants and containers that can be shared by |
| 14 |
|
|
different implementations so they become interoperable between each other. |
| 15 |
|
|
|
| 16 |
|
|
=cut |
| 17 |
|
|
|
| 18 |
|
|
package Types::Serialiser; |
| 19 |
|
|
|
| 20 |
root |
1.4 |
use common::sense; # required to suppress annoying warnings |
| 21 |
|
|
|
| 22 |
root |
1.9 |
our $VERSION = '1.0'; |
| 23 |
root |
1.1 |
|
| 24 |
|
|
=head1 SIMPLE SCALAR CONSTANTS |
| 25 |
|
|
|
| 26 |
|
|
Simple scalar constants are values that are overloaded to act like simple |
| 27 |
|
|
Perl values, but have (class) type to differentiate them from normal Perl |
| 28 |
|
|
scalars. This is necessary because these have different representations in |
| 29 |
|
|
the serialisation formats. |
| 30 |
|
|
|
| 31 |
|
|
=head2 BOOLEANS (Types::Serialiser::Boolean class) |
| 32 |
|
|
|
| 33 |
|
|
This type has only two instances, true and false. A natural representation |
| 34 |
|
|
for these in Perl is C<1> and C<0>, but serialisation formats need to be |
| 35 |
|
|
able to differentiate between them and mere numbers. |
| 36 |
|
|
|
| 37 |
|
|
=over 4 |
| 38 |
|
|
|
| 39 |
|
|
=item $Types::Serialiser::true, Types::Serialiser::true |
| 40 |
|
|
|
| 41 |
|
|
This value represents the "true" value. In most contexts is acts like |
| 42 |
|
|
the number C<1>. It is up to you whether you use the variable form |
| 43 |
|
|
(C<$Types::Serialiser::true>) or the constant form (C<Types::Serialiser::true>). |
| 44 |
|
|
|
| 45 |
|
|
The constant is represented as a reference to a scalar containing C<1> - |
| 46 |
|
|
implementations are allowed to directly test for this. |
| 47 |
|
|
|
| 48 |
|
|
=item $Types::Serialiser::false, Types::Serialiser::false |
| 49 |
|
|
|
| 50 |
|
|
This value represents the "false" value. In most contexts is acts like |
| 51 |
|
|
the number C<0>. It is up to you whether you use the variable form |
| 52 |
|
|
(C<$Types::Serialiser::false>) or the constant form (C<Types::Serialiser::false>). |
| 53 |
|
|
|
| 54 |
|
|
The constant is represented as a reference to a scalar containing C<0> - |
| 55 |
|
|
implementations are allowed to directly test for this. |
| 56 |
|
|
|
| 57 |
|
|
=item $is_bool = Types::Serialiser::is_bool $value |
| 58 |
|
|
|
| 59 |
|
|
Returns true iff the C<$value> is either C<$Types::Serialiser::true> or |
| 60 |
|
|
C<$Types::Serialiser::false>. |
| 61 |
|
|
|
| 62 |
|
|
For example, you could differentiate between a perl true value and a |
| 63 |
|
|
C<Types::Serialiser::true> by using this: |
| 64 |
|
|
|
| 65 |
|
|
$value && Types::Serialiser::is_bool $value |
| 66 |
|
|
|
| 67 |
|
|
=item $is_true = Types::Serialiser::is_true $value |
| 68 |
|
|
|
| 69 |
|
|
Returns true iff C<$value> is C<$Types::Serialiser::true>. |
| 70 |
|
|
|
| 71 |
|
|
=item $is_false = Types::Serialiser::is_false $value |
| 72 |
|
|
|
| 73 |
|
|
Returns false iff C<$value> is C<$Types::Serialiser::false>. |
| 74 |
|
|
|
| 75 |
|
|
=back |
| 76 |
|
|
|
| 77 |
|
|
=head2 ERROR (Types::Serialiser::Error class) |
| 78 |
|
|
|
| 79 |
|
|
This class has only a single instance, C<error>. It is used to signal |
| 80 |
|
|
an encoding or decoding error. In CBOR for example, and object that |
| 81 |
|
|
couldn't be encoded will be represented by a CBOR undefined value, which |
| 82 |
|
|
is represented by the error value in Perl. |
| 83 |
|
|
|
| 84 |
|
|
=over 4 |
| 85 |
|
|
|
| 86 |
|
|
=item $Types::Serialiser::error, Types::Serialiser::error |
| 87 |
|
|
|
| 88 |
|
|
This value represents the "error" value. Accessing values of this type |
| 89 |
|
|
will throw an exception. |
| 90 |
|
|
|
| 91 |
|
|
The constant is represented as a reference to a scalar containing C<undef> |
| 92 |
|
|
- implementations are allowed to directly test for this. |
| 93 |
|
|
|
| 94 |
|
|
=item $is_error = Types::Serialiser::is_error $value |
| 95 |
|
|
|
| 96 |
|
|
Returns false iff C<$value> is C<$Types::Serialiser::error>. |
| 97 |
|
|
|
| 98 |
|
|
=back |
| 99 |
|
|
|
| 100 |
|
|
=cut |
| 101 |
|
|
|
| 102 |
root |
1.4 |
BEGIN { |
| 103 |
|
|
# for historical reasons, and to avoid extra dependencies in JSON::PP, |
| 104 |
|
|
# we alias *Types::Serialiser::Boolean with JSON::PP::Boolean. |
| 105 |
|
|
package JSON::PP::Boolean; |
| 106 |
root |
1.6 |
|
| 107 |
root |
1.4 |
*Types::Serialiser::Boolean:: = *JSON::PP::Boolean::; |
| 108 |
|
|
} |
| 109 |
|
|
|
| 110 |
root |
1.6 |
{ |
| 111 |
|
|
# this must done before blessing to work around bugs |
| 112 |
|
|
# in perl < 5.18 (it seems to be fixed in 5.18). |
| 113 |
|
|
package Types::Serialiser::BooleanBase; |
| 114 |
|
|
|
| 115 |
|
|
use overload |
| 116 |
|
|
"0+" => sub { ${$_[0]} }, |
| 117 |
|
|
"++" => sub { $_[0] = ${$_[0]} + 1 }, |
| 118 |
|
|
"--" => sub { $_[0] = ${$_[0]} - 1 }, |
| 119 |
|
|
fallback => 1; |
| 120 |
|
|
|
| 121 |
|
|
@Types::Serialiser::Boolean::ISA = Types::Serialiser::BooleanBase::; |
| 122 |
|
|
} |
| 123 |
|
|
|
| 124 |
root |
1.1 |
our $true = do { bless \(my $dummy = 1), Types::Serialiser::Boolean:: }; |
| 125 |
|
|
our $false = do { bless \(my $dummy = 0), Types::Serialiser::Boolean:: }; |
| 126 |
|
|
our $error = do { bless \(my $dummy ), Types::Serialiser::Error:: }; |
| 127 |
|
|
|
| 128 |
|
|
sub true () { $true } |
| 129 |
|
|
sub false () { $false } |
| 130 |
|
|
sub error () { $error } |
| 131 |
|
|
|
| 132 |
|
|
sub is_bool ($) { UNIVERSAL::isa $_[0], Types::Serialiser::Boolean:: } |
| 133 |
|
|
sub is_true ($) { $_[0] && UNIVERSAL::isa $_[0], Types::Serialiser::Boolean:: } |
| 134 |
|
|
sub is_false ($) { !$_[0] && UNIVERSAL::isa $_[0], Types::Serialiser::Boolean:: } |
| 135 |
|
|
sub is_error ($) { UNIVERSAL::isa $_[0], Types::Serialiser::Error:: } |
| 136 |
|
|
|
| 137 |
|
|
package Types::Serialiser::Error; |
| 138 |
|
|
|
| 139 |
|
|
sub error { |
| 140 |
|
|
require Carp; |
| 141 |
root |
1.2 |
Carp::croak ("caught attempt to use the Types::Serialiser::error value"); |
| 142 |
root |
1.1 |
}; |
| 143 |
|
|
|
| 144 |
|
|
use overload |
| 145 |
|
|
"0+" => \&error, |
| 146 |
|
|
"++" => \&error, |
| 147 |
|
|
"--" => \&error, |
| 148 |
|
|
fallback => 1; |
| 149 |
|
|
|
| 150 |
root |
1.2 |
=head1 NOTES FOR XS USERS |
| 151 |
|
|
|
| 152 |
|
|
The recommended way to detect whether a scalar is one of these objects |
| 153 |
|
|
is to check whether the stash is the C<Types::Serialiser::Boolean> or |
| 154 |
|
|
C<Types::Serialiser::Error> stash, and then follow the scalar reference to |
| 155 |
|
|
see if it's C<1> (true), C<0> (false) or C<undef> (error). |
| 156 |
|
|
|
| 157 |
|
|
While it is possible to use an isa test, directly comparing stash pointers |
| 158 |
|
|
is faster and guaranteed to work. |
| 159 |
|
|
|
| 160 |
root |
1.4 |
For historical reasons, the C<Types::Serialiser::Boolean> stash is |
| 161 |
|
|
just an alias for C<JSON::PP::Boolean>. When printed, the classname |
| 162 |
root |
1.8 |
with usually be C<JSON::PP::Boolean>, but isa tests and stash pointer |
| 163 |
root |
1.4 |
comparison will normally work correctly (i.e. Types::Serialiser::true ISA |
| 164 |
|
|
JSON::PP::Boolean, but also ISA Types::Serialiser::Boolean). |
| 165 |
|
|
|
| 166 |
root |
1.3 |
=head1 A GENERIC OBJECT SERIALIATION PROTOCOL |
| 167 |
|
|
|
| 168 |
|
|
This section explains the object serialisation protocol used by |
| 169 |
|
|
L<CBOR::XS>. It is meant to be generic enough to support any kind of |
| 170 |
|
|
generic object serialiser. |
| 171 |
|
|
|
| 172 |
|
|
This protocol is called "the Types::Serialiser object serialisation |
| 173 |
|
|
protocol". |
| 174 |
|
|
|
| 175 |
|
|
=head2 ENCODING |
| 176 |
|
|
|
| 177 |
|
|
When the encoder encounters an object that it cannot otherwise encode (for |
| 178 |
|
|
example, L<CBOR::XS> can encode a few special types itself, and will first |
| 179 |
|
|
attempt to use the special C<TO_CBOR> serialisation protocol), it will |
| 180 |
|
|
look up the C<FREEZE> method on the object. |
| 181 |
|
|
|
| 182 |
root |
1.9 |
Note that the C<FREEZE> method will normally be called I<during> encoding, |
| 183 |
|
|
and I<MUST NOT> change the data structure that is being encoded in any |
| 184 |
|
|
way, or it might cause memory corruption or worse. |
| 185 |
|
|
|
| 186 |
root |
1.8 |
If it exists, it will call it with two arguments: the object to serialise, |
| 187 |
root |
1.9 |
and a constant string that indicates the name of the data model. For |
| 188 |
|
|
example L<CBOR::XS> uses C<CBOR>, and the L<JSON> and L<JSON::XS> modules |
| 189 |
root |
1.8 |
(or any other JSON serialiser), would use C<JSON> as second argument. |
| 190 |
root |
1.3 |
|
| 191 |
|
|
The C<FREEZE> method can then return zero or more values to identify the |
| 192 |
|
|
object instance. The serialiser is then supposed to encode the class name |
| 193 |
|
|
and all of these return values (which must be encodable in the format) |
| 194 |
root |
1.9 |
using the relevant form for Perl objects. In CBOR for example, there is a |
| 195 |
root |
1.3 |
registered tag number for encoded perl objects. |
| 196 |
|
|
|
| 197 |
root |
1.5 |
The values that C<FREEZE> returns must be serialisable with the serialiser |
| 198 |
|
|
that calls it. Therefore, it is recommended to use simple types such as |
| 199 |
|
|
strings and numbers, and maybe array references and hashes (basically, the |
| 200 |
|
|
JSON data model). You can always use a more complex format for a specific |
| 201 |
root |
1.9 |
data model by checking the second argument, the data model. |
| 202 |
|
|
|
| 203 |
|
|
The "data model" is not the same as the "data format" - the data model |
| 204 |
|
|
indicates what types and kinds of return values can be returned from |
| 205 |
|
|
C<FREEZE>. For example, in C<CBOR> it is permissible to return tagged CBOR |
| 206 |
|
|
values, while JSON does not support these at all, so C<JSON> would be a |
| 207 |
|
|
valid (but too limited) data model name for C<CBOR::XS>. similarly, a |
| 208 |
|
|
serialising format that supports more or less the same data model as JSON |
| 209 |
|
|
could use C<JSON> as data model without losing anything. |
| 210 |
root |
1.5 |
|
| 211 |
root |
1.3 |
=head2 DECODING |
| 212 |
|
|
|
| 213 |
|
|
When the decoder then encounters such an encoded perl object, it should |
| 214 |
|
|
look up the C<THAW> method on the stored classname, and invoke it with the |
| 215 |
root |
1.8 |
classname, the constant string to identify the data model/data format, and |
| 216 |
|
|
all the return values returned by C<FREEZE>. |
| 217 |
root |
1.3 |
|
| 218 |
|
|
=head2 EXAMPLES |
| 219 |
|
|
|
| 220 |
|
|
See the C<OBJECT SERIALISATION> section in the L<CBOR::XS> manpage for |
| 221 |
|
|
more details, an example implementation, and code examples. |
| 222 |
|
|
|
| 223 |
|
|
Here is an example C<FREEZE>/C<THAW> method pair: |
| 224 |
|
|
|
| 225 |
|
|
sub My::Object::FREEZE { |
| 226 |
root |
1.8 |
my ($self, $model) = @_; |
| 227 |
root |
1.3 |
|
| 228 |
|
|
($self->{type}, $self->{id}, $self->{variant}) |
| 229 |
|
|
} |
| 230 |
|
|
|
| 231 |
|
|
sub My::Object::THAW { |
| 232 |
root |
1.8 |
my ($class, $model, $type, $id, $variant) = @_; |
| 233 |
root |
1.3 |
|
| 234 |
root |
1.7 |
$class->new (type => $type, id => $id, variant => $variant) |
| 235 |
root |
1.3 |
} |
| 236 |
|
|
|
| 237 |
root |
1.1 |
=head1 BUGS |
| 238 |
|
|
|
| 239 |
|
|
The use of L<overload> makes this module much heavier than it should be |
| 240 |
|
|
(on my system, this module: 4kB RSS, overload: 260kB RSS). |
| 241 |
|
|
|
| 242 |
|
|
=head1 SEE ALSO |
| 243 |
|
|
|
| 244 |
|
|
Currently, L<JSON::XS> and L<CBOR::XS> use these types. |
| 245 |
|
|
|
| 246 |
|
|
=head1 AUTHOR |
| 247 |
|
|
|
| 248 |
|
|
Marc Lehmann <schmorp@schmorp.de> |
| 249 |
|
|
http://home.schmorp.de/ |
| 250 |
|
|
|
| 251 |
|
|
=cut |
| 252 |
|
|
|
| 253 |
|
|
1 |
| 254 |
|
|
|