1 |
root |
1.1 |
=head1 NAME |
2 |
|
|
|
3 |
|
|
Types::Serialiser - simple data types for common serialisation formats |
4 |
|
|
|
5 |
|
|
=encoding utf-8 |
6 |
|
|
|
7 |
|
|
=head1 SYNOPSIS |
8 |
|
|
|
9 |
|
|
=head1 DESCRIPTION |
10 |
|
|
|
11 |
|
|
This module provides some extra datatypes that are used by common |
12 |
|
|
serialisation formats such as JSON or CBOR. The idea is to have a |
13 |
|
|
repository of simple/small constants and containers that can be shared by |
14 |
|
|
different implementations so they become interoperable between each other. |
15 |
|
|
|
16 |
|
|
=cut |
17 |
|
|
|
18 |
|
|
package Types::Serialiser; |
19 |
|
|
|
20 |
root |
1.4 |
use common::sense; # required to suppress annoying warnings |
21 |
|
|
|
22 |
root |
1.10 |
our $VERSION = '1.01'; |
23 |
root |
1.1 |
|
24 |
|
|
=head1 SIMPLE SCALAR CONSTANTS |
25 |
|
|
|
26 |
|
|
Simple scalar constants are values that are overloaded to act like simple |
27 |
|
|
Perl values, but have (class) type to differentiate them from normal Perl |
28 |
|
|
scalars. This is necessary because these have different representations in |
29 |
|
|
the serialisation formats. |
30 |
|
|
|
31 |
root |
1.10 |
In the following, functions with zero or one arguments have a prototype of |
32 |
|
|
C<()> and C<($)>, respectively, so act as constants and unary operators. |
33 |
|
|
|
34 |
root |
1.1 |
=head2 BOOLEANS (Types::Serialiser::Boolean class) |
35 |
|
|
|
36 |
|
|
This type has only two instances, true and false. A natural representation |
37 |
|
|
for these in Perl is C<1> and C<0>, but serialisation formats need to be |
38 |
|
|
able to differentiate between them and mere numbers. |
39 |
|
|
|
40 |
|
|
=over 4 |
41 |
|
|
|
42 |
|
|
=item $Types::Serialiser::true, Types::Serialiser::true |
43 |
|
|
|
44 |
|
|
This value represents the "true" value. In most contexts is acts like |
45 |
|
|
the number C<1>. It is up to you whether you use the variable form |
46 |
|
|
(C<$Types::Serialiser::true>) or the constant form (C<Types::Serialiser::true>). |
47 |
|
|
|
48 |
|
|
The constant is represented as a reference to a scalar containing C<1> - |
49 |
|
|
implementations are allowed to directly test for this. |
50 |
|
|
|
51 |
|
|
=item $Types::Serialiser::false, Types::Serialiser::false |
52 |
|
|
|
53 |
|
|
This value represents the "false" value. In most contexts is acts like |
54 |
|
|
the number C<0>. It is up to you whether you use the variable form |
55 |
|
|
(C<$Types::Serialiser::false>) or the constant form (C<Types::Serialiser::false>). |
56 |
|
|
|
57 |
|
|
The constant is represented as a reference to a scalar containing C<0> - |
58 |
|
|
implementations are allowed to directly test for this. |
59 |
|
|
|
60 |
root |
1.10 |
=item Types::Serialiser::as_bool $value |
61 |
|
|
|
62 |
|
|
Converts a Perl scalar into a boolean, which is useful syntactic |
63 |
|
|
sugar. Strictly equivalent to: |
64 |
|
|
|
65 |
|
|
$value ? $Types::Serialiser::true : $Types::Serialiser::false |
66 |
|
|
|
67 |
root |
1.1 |
=item $is_bool = Types::Serialiser::is_bool $value |
68 |
|
|
|
69 |
|
|
Returns true iff the C<$value> is either C<$Types::Serialiser::true> or |
70 |
|
|
C<$Types::Serialiser::false>. |
71 |
|
|
|
72 |
|
|
For example, you could differentiate between a perl true value and a |
73 |
|
|
C<Types::Serialiser::true> by using this: |
74 |
|
|
|
75 |
|
|
$value && Types::Serialiser::is_bool $value |
76 |
|
|
|
77 |
|
|
=item $is_true = Types::Serialiser::is_true $value |
78 |
|
|
|
79 |
|
|
Returns true iff C<$value> is C<$Types::Serialiser::true>. |
80 |
|
|
|
81 |
|
|
=item $is_false = Types::Serialiser::is_false $value |
82 |
|
|
|
83 |
|
|
Returns false iff C<$value> is C<$Types::Serialiser::false>. |
84 |
|
|
|
85 |
|
|
=back |
86 |
|
|
|
87 |
|
|
=head2 ERROR (Types::Serialiser::Error class) |
88 |
|
|
|
89 |
|
|
This class has only a single instance, C<error>. It is used to signal |
90 |
|
|
an encoding or decoding error. In CBOR for example, and object that |
91 |
|
|
couldn't be encoded will be represented by a CBOR undefined value, which |
92 |
|
|
is represented by the error value in Perl. |
93 |
|
|
|
94 |
|
|
=over 4 |
95 |
|
|
|
96 |
|
|
=item $Types::Serialiser::error, Types::Serialiser::error |
97 |
|
|
|
98 |
|
|
This value represents the "error" value. Accessing values of this type |
99 |
|
|
will throw an exception. |
100 |
|
|
|
101 |
|
|
The constant is represented as a reference to a scalar containing C<undef> |
102 |
|
|
- implementations are allowed to directly test for this. |
103 |
|
|
|
104 |
|
|
=item $is_error = Types::Serialiser::is_error $value |
105 |
|
|
|
106 |
|
|
Returns false iff C<$value> is C<$Types::Serialiser::error>. |
107 |
|
|
|
108 |
|
|
=back |
109 |
|
|
|
110 |
|
|
=cut |
111 |
|
|
|
112 |
root |
1.4 |
BEGIN { |
113 |
|
|
# for historical reasons, and to avoid extra dependencies in JSON::PP, |
114 |
|
|
# we alias *Types::Serialiser::Boolean with JSON::PP::Boolean. |
115 |
|
|
package JSON::PP::Boolean; |
116 |
root |
1.6 |
|
117 |
root |
1.4 |
*Types::Serialiser::Boolean:: = *JSON::PP::Boolean::; |
118 |
|
|
} |
119 |
|
|
|
120 |
root |
1.6 |
{ |
121 |
|
|
# this must done before blessing to work around bugs |
122 |
|
|
# in perl < 5.18 (it seems to be fixed in 5.18). |
123 |
|
|
package Types::Serialiser::BooleanBase; |
124 |
|
|
|
125 |
|
|
use overload |
126 |
|
|
"0+" => sub { ${$_[0]} }, |
127 |
|
|
"++" => sub { $_[0] = ${$_[0]} + 1 }, |
128 |
|
|
"--" => sub { $_[0] = ${$_[0]} - 1 }, |
129 |
|
|
fallback => 1; |
130 |
|
|
|
131 |
|
|
@Types::Serialiser::Boolean::ISA = Types::Serialiser::BooleanBase::; |
132 |
|
|
} |
133 |
|
|
|
134 |
root |
1.1 |
our $true = do { bless \(my $dummy = 1), Types::Serialiser::Boolean:: }; |
135 |
|
|
our $false = do { bless \(my $dummy = 0), Types::Serialiser::Boolean:: }; |
136 |
|
|
our $error = do { bless \(my $dummy ), Types::Serialiser::Error:: }; |
137 |
|
|
|
138 |
|
|
sub true () { $true } |
139 |
|
|
sub false () { $false } |
140 |
|
|
sub error () { $error } |
141 |
|
|
|
142 |
root |
1.10 |
sub as_bool($) { $_[0] ? $true : $false } |
143 |
|
|
|
144 |
root |
1.1 |
sub is_bool ($) { UNIVERSAL::isa $_[0], Types::Serialiser::Boolean:: } |
145 |
|
|
sub is_true ($) { $_[0] && UNIVERSAL::isa $_[0], Types::Serialiser::Boolean:: } |
146 |
|
|
sub is_false ($) { !$_[0] && UNIVERSAL::isa $_[0], Types::Serialiser::Boolean:: } |
147 |
|
|
sub is_error ($) { UNIVERSAL::isa $_[0], Types::Serialiser::Error:: } |
148 |
|
|
|
149 |
|
|
package Types::Serialiser::Error; |
150 |
|
|
|
151 |
|
|
sub error { |
152 |
|
|
require Carp; |
153 |
root |
1.2 |
Carp::croak ("caught attempt to use the Types::Serialiser::error value"); |
154 |
root |
1.1 |
}; |
155 |
|
|
|
156 |
|
|
use overload |
157 |
|
|
"0+" => \&error, |
158 |
|
|
"++" => \&error, |
159 |
|
|
"--" => \&error, |
160 |
|
|
fallback => 1; |
161 |
|
|
|
162 |
root |
1.2 |
=head1 NOTES FOR XS USERS |
163 |
|
|
|
164 |
|
|
The recommended way to detect whether a scalar is one of these objects |
165 |
|
|
is to check whether the stash is the C<Types::Serialiser::Boolean> or |
166 |
|
|
C<Types::Serialiser::Error> stash, and then follow the scalar reference to |
167 |
|
|
see if it's C<1> (true), C<0> (false) or C<undef> (error). |
168 |
|
|
|
169 |
|
|
While it is possible to use an isa test, directly comparing stash pointers |
170 |
|
|
is faster and guaranteed to work. |
171 |
|
|
|
172 |
root |
1.4 |
For historical reasons, the C<Types::Serialiser::Boolean> stash is |
173 |
|
|
just an alias for C<JSON::PP::Boolean>. When printed, the classname |
174 |
root |
1.8 |
with usually be C<JSON::PP::Boolean>, but isa tests and stash pointer |
175 |
root |
1.4 |
comparison will normally work correctly (i.e. Types::Serialiser::true ISA |
176 |
|
|
JSON::PP::Boolean, but also ISA Types::Serialiser::Boolean). |
177 |
|
|
|
178 |
root |
1.3 |
=head1 A GENERIC OBJECT SERIALIATION PROTOCOL |
179 |
|
|
|
180 |
|
|
This section explains the object serialisation protocol used by |
181 |
|
|
L<CBOR::XS>. It is meant to be generic enough to support any kind of |
182 |
|
|
generic object serialiser. |
183 |
|
|
|
184 |
|
|
This protocol is called "the Types::Serialiser object serialisation |
185 |
|
|
protocol". |
186 |
|
|
|
187 |
|
|
=head2 ENCODING |
188 |
|
|
|
189 |
|
|
When the encoder encounters an object that it cannot otherwise encode (for |
190 |
|
|
example, L<CBOR::XS> can encode a few special types itself, and will first |
191 |
|
|
attempt to use the special C<TO_CBOR> serialisation protocol), it will |
192 |
|
|
look up the C<FREEZE> method on the object. |
193 |
|
|
|
194 |
root |
1.9 |
Note that the C<FREEZE> method will normally be called I<during> encoding, |
195 |
|
|
and I<MUST NOT> change the data structure that is being encoded in any |
196 |
|
|
way, or it might cause memory corruption or worse. |
197 |
|
|
|
198 |
root |
1.8 |
If it exists, it will call it with two arguments: the object to serialise, |
199 |
root |
1.9 |
and a constant string that indicates the name of the data model. For |
200 |
|
|
example L<CBOR::XS> uses C<CBOR>, and the L<JSON> and L<JSON::XS> modules |
201 |
root |
1.8 |
(or any other JSON serialiser), would use C<JSON> as second argument. |
202 |
root |
1.3 |
|
203 |
|
|
The C<FREEZE> method can then return zero or more values to identify the |
204 |
|
|
object instance. The serialiser is then supposed to encode the class name |
205 |
|
|
and all of these return values (which must be encodable in the format) |
206 |
root |
1.9 |
using the relevant form for Perl objects. In CBOR for example, there is a |
207 |
root |
1.3 |
registered tag number for encoded perl objects. |
208 |
|
|
|
209 |
root |
1.5 |
The values that C<FREEZE> returns must be serialisable with the serialiser |
210 |
|
|
that calls it. Therefore, it is recommended to use simple types such as |
211 |
|
|
strings and numbers, and maybe array references and hashes (basically, the |
212 |
|
|
JSON data model). You can always use a more complex format for a specific |
213 |
root |
1.9 |
data model by checking the second argument, the data model. |
214 |
|
|
|
215 |
|
|
The "data model" is not the same as the "data format" - the data model |
216 |
|
|
indicates what types and kinds of return values can be returned from |
217 |
|
|
C<FREEZE>. For example, in C<CBOR> it is permissible to return tagged CBOR |
218 |
|
|
values, while JSON does not support these at all, so C<JSON> would be a |
219 |
|
|
valid (but too limited) data model name for C<CBOR::XS>. similarly, a |
220 |
|
|
serialising format that supports more or less the same data model as JSON |
221 |
|
|
could use C<JSON> as data model without losing anything. |
222 |
root |
1.5 |
|
223 |
root |
1.3 |
=head2 DECODING |
224 |
|
|
|
225 |
|
|
When the decoder then encounters such an encoded perl object, it should |
226 |
|
|
look up the C<THAW> method on the stored classname, and invoke it with the |
227 |
root |
1.8 |
classname, the constant string to identify the data model/data format, and |
228 |
|
|
all the return values returned by C<FREEZE>. |
229 |
root |
1.3 |
|
230 |
|
|
=head2 EXAMPLES |
231 |
|
|
|
232 |
|
|
See the C<OBJECT SERIALISATION> section in the L<CBOR::XS> manpage for |
233 |
|
|
more details, an example implementation, and code examples. |
234 |
|
|
|
235 |
|
|
Here is an example C<FREEZE>/C<THAW> method pair: |
236 |
|
|
|
237 |
|
|
sub My::Object::FREEZE { |
238 |
root |
1.8 |
my ($self, $model) = @_; |
239 |
root |
1.3 |
|
240 |
|
|
($self->{type}, $self->{id}, $self->{variant}) |
241 |
|
|
} |
242 |
|
|
|
243 |
|
|
sub My::Object::THAW { |
244 |
root |
1.8 |
my ($class, $model, $type, $id, $variant) = @_; |
245 |
root |
1.3 |
|
246 |
root |
1.7 |
$class->new (type => $type, id => $id, variant => $variant) |
247 |
root |
1.3 |
} |
248 |
|
|
|
249 |
root |
1.1 |
=head1 BUGS |
250 |
|
|
|
251 |
|
|
The use of L<overload> makes this module much heavier than it should be |
252 |
|
|
(on my system, this module: 4kB RSS, overload: 260kB RSS). |
253 |
|
|
|
254 |
|
|
=head1 SEE ALSO |
255 |
|
|
|
256 |
|
|
Currently, L<JSON::XS> and L<CBOR::XS> use these types. |
257 |
|
|
|
258 |
|
|
=head1 AUTHOR |
259 |
|
|
|
260 |
|
|
Marc Lehmann <schmorp@schmorp.de> |
261 |
|
|
http://home.schmorp.de/ |
262 |
|
|
|
263 |
|
|
=cut |
264 |
|
|
|
265 |
|
|
1 |
266 |
|
|
|