… | |
… | |
61 | =cut |
61 | =cut |
62 | |
62 | |
63 | sub decode_sgf($) { |
63 | sub decode_sgf($) { |
64 | my ($sgf_data) = @_; |
64 | my ($sgf_data) = @_; |
65 | |
65 | |
66 | Games::Go::SGF::Grove::Parser::->new->parse ($sgf_data) |
66 | Games::Go::SGF::Grove::Parser::->new->decode_sgf ($sgf_data) |
67 | } |
67 | } |
68 | |
68 | |
69 | sub encode_sgf($) { |
69 | sub encode_sgf($) { |
70 | my ($game) = @_; |
70 | my ($game) = @_; |
|
|
71 | |
|
|
72 | Games::Go::SGF::Grove::Parser::->new->encode_sgf ($game) |
71 | } |
73 | } |
72 | |
74 | |
73 | sub load_sgf($) { |
75 | sub load_sgf($) { |
74 | my ($path) = @_; |
76 | my ($path) = @_; |
75 | |
77 | |
… | |
… | |
81 | } |
83 | } |
82 | |
84 | |
83 | sub save_sgf($$) { |
85 | sub save_sgf($$) { |
84 | my ($path, $game) = @_; |
86 | my ($path, $game) = @_; |
85 | |
87 | |
86 | open my $fh, "<:perlio", $path |
88 | open my $fh, ">:perlio", $path |
87 | or die "$path: $!"; |
89 | or die "$path: $!"; |
88 | |
90 | |
89 | print $fh encode_sgf $game; |
91 | print $fh encode_sgf $game; |
90 | } |
92 | } |
91 | |
93 | |
… | |
… | |
94 | =head2 The Game Data structure |
96 | =head2 The Game Data structure |
95 | |
97 | |
96 | The SGF game is represented by a linked Perl data structure consisting of |
98 | The SGF game is represented by a linked Perl data structure consisting of |
97 | unblessed hashes and arrays. |
99 | unblessed hashes and arrays. |
98 | |
100 | |
99 | The game itself is a hash reference #TODO# |
101 | SGF files are a forest of trees, called a collection (i.e. you can have |
|
|
102 | multiple games stored in a file). The C<load_sgf> and C<decode_sgf> |
|
|
103 | functions returns this collection as a reference to an array containing |
|
|
104 | the individual game trees (usually there is only one, though). |
|
|
105 | |
|
|
106 | Each individual tree is again an array of nodes representing the main line |
|
|
107 | of play. |
|
|
108 | |
|
|
109 | Each node is simply a hash reference. Each SGF property is stored with the |
|
|
110 | (uppercase) property name as the key, and a property-dependent value for |
|
|
111 | the contents (e.g., a black move is stored as C<< B => [3, 5] >>. |
|
|
112 | |
|
|
113 | If there are any variations/branches/alternate lines of play, then these |
|
|
114 | are stored in the array reference in the C<variations> key (those again |
|
|
115 | are game trees, so array references themselves). |
|
|
116 | |
|
|
117 | This module reserves all uppercase key names for SGF properties, the key |
|
|
118 | C<variations> and all keys starting with an underscore (C<_xxx>) as it's |
|
|
119 | own. Users of this module may store additional attributes that don't |
|
|
120 | conflict with these names in any node. |
|
|
121 | |
|
|
122 | Unknown properties will be stored as scalars with the (binary) property |
|
|
123 | contents. Text nodes will always be decoded into Unicode text and encoded |
|
|
124 | into whatever the CA property of the root node says (default: C<UTF-8>). |
|
|
125 | |
|
|
126 | When saving, all uppercase keys will be saved, lowercase keys will be |
|
|
127 | ignored. |
|
|
128 | |
|
|
129 | For the actual encoding of other types, best decode some example game that |
|
|
130 | contains them and use Data::Dumper. Here is such an example: |
|
|
131 | |
|
|
132 | [ # list of game-trees, only one here |
|
|
133 | [ # the main node sequence |
|
|
134 | { # the root node, contains some variations |
|
|
135 | DI => '7k', |
|
|
136 | AP => undef, |
|
|
137 | CO => '5', |
|
|
138 | DP => '40', |
|
|
139 | GE => 'tesuji', |
|
|
140 | AW => [ |
|
|
141 | [ 2, 16 ], [ 3, 15 ], [ 15, 9 ], [ 14, 13 ], ... |
|
|
142 | ], |
|
|
143 | C => 'White just played a ladder block at h12.', |
|
|
144 | variations => [ # list of variations, only one |
|
|
145 | [ # sequence of variation moves |
|
|
146 | { B => [ 7, 5 ] }, # a black move |
|
|
147 | { W => [ 12, 12 ] }, # a white move |
|
|
148 | ... and so on |
|
|
149 | ] |
|
|
150 | ], |
|
|
151 | } |
|
|
152 | ] |
|
|
153 | } |
100 | |
154 | |
101 | =cut |
155 | =cut |
102 | |
156 | |
103 | |
|
|
104 | package Games::Go::SGF::Grove::Parser; |
157 | package Games::Go::SGF::Grove::Parser; |
105 | |
158 | |
106 | no warnings; |
159 | no warnings; |
107 | use strict; |
160 | use strict 'vars'; |
|
|
161 | |
|
|
162 | use Encode (); |
|
|
163 | use Carp qw(croak); |
108 | |
164 | |
109 | my $ws = qr{[\x00-\x20]*}s; |
165 | my $ws = qr{[\x00-\x20]*}s; |
110 | my $property; # property => propertyinfo |
166 | my $property; # property => propertyinfo |
111 | |
167 | |
112 | sub new { |
168 | sub new { |
… | |
… | |
115 | } |
171 | } |
116 | |
172 | |
117 | sub error { |
173 | sub error { |
118 | my ($self, $error) = @_; |
174 | my ($self, $error) = @_; |
119 | |
175 | |
|
|
176 | my $pos = pos $self->{sgf}; |
|
|
177 | |
120 | my $tail = substr $self->{sgf}, pos $self->{sgf}, 32; |
178 | my $tail = substr $self->{sgf}, $pos, 32; |
121 | $tail =~ s/[\x00-\x1f]+/ /g; |
179 | $tail =~ s/[\x00-\x1f]+/ /g; |
122 | |
180 | |
123 | die "$error (at '$tail')"; |
181 | croak "$error (at octet $pos, '$tail')"; |
124 | } |
182 | } |
125 | |
183 | |
126 | sub parse { |
184 | sub decode_sgf { |
127 | my ($self, $sgf) = @_; |
185 | my ($self, $sgf) = @_; |
128 | |
186 | |
129 | # correct lines |
187 | # correct lines |
130 | if ($sgf =~ /[^\015\012]\015/) { |
188 | if ($sgf =~ /[^\015\012]\015/) { |
131 | $sgf =~ s/\015\012?/\n/g; |
189 | $sgf =~ s/\015\012?/\n/g; |
… | |
… | |
134 | } |
192 | } |
135 | |
193 | |
136 | $self->{sgf} = $sgf; |
194 | $self->{sgf} = $sgf; |
137 | |
195 | |
138 | $self->{FF} = 1; |
196 | $self->{FF} = 1; |
139 | $self->{CA} = 'ISO-8859-1'; |
197 | $self->{CA} = 'WINDOWS-1252'; # too many files are |
140 | $self->{GM} = 1; |
198 | $self->{GM} = 1; |
141 | |
199 | |
142 | my @trees; |
200 | my @trees; |
143 | |
201 | |
144 | eval { |
202 | eval { |
145 | while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) { |
203 | while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) { |
146 | push @trees, $self->GameTree; |
204 | push @trees, $self->decode_GameTree; |
147 | } |
205 | } |
148 | }; |
206 | }; |
149 | |
207 | |
150 | die $@ if $@; |
208 | croak $@ if $@; |
151 | |
209 | |
152 | \@trees |
210 | \@trees |
153 | } |
211 | } |
154 | |
212 | |
155 | sub GameTree { |
213 | sub decode_GameTree { |
156 | my ($self) = @_; |
214 | my ($self) = @_; |
157 | |
215 | |
158 | $self->{sgf} =~ /\G$ws\(/sgoc |
216 | $self->{sgf} =~ /\G$ws\(/sgoc |
159 | or $self->error ("GameTree does not start with '('"); |
217 | or $self->error ("GameTree does not start with '('"); |
160 | |
218 | |
161 | my $nodes = $self->Sequence; |
219 | my $nodes = $self->decode_Sequence; |
162 | |
220 | |
163 | while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) { |
221 | while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) { |
164 | push @{$nodes->[-1]{children}}, $self->GameTree; |
222 | push @{$nodes->[-1]{variations}}, $self->decode_GameTree; |
165 | } |
223 | } |
166 | $self->{sgf} =~ /\G$ws\)/sgoc |
224 | $self->{sgf} =~ /\G$ws\)/sgoc |
167 | or $self->error ("GameTree does not end with ')'"); |
225 | or $self->error ("GameTree does not end with ')'"); |
168 | |
226 | |
169 | $nodes |
227 | $nodes |
170 | } |
228 | } |
171 | |
229 | |
|
|
230 | sub postprocess { |
|
|
231 | my $self = shift; |
|
|
232 | |
|
|
233 | for (@_) { |
|
|
234 | if ("ARRAY" eq ref) { |
|
|
235 | $self->postprocess (@$_); |
|
|
236 | } elsif ("HASH" eq ref) { |
|
|
237 | if (my $value = $_->{_text}) { |
|
|
238 | $_ = eval { Encode::decode $self->{CA}, $value } || $value |
|
|
239 | } else { |
|
|
240 | $self->postprocess (values %$_); |
|
|
241 | } |
|
|
242 | } |
|
|
243 | } |
|
|
244 | } |
|
|
245 | |
172 | sub Sequence { |
246 | sub decode_Sequence { |
173 | my ($self) = @_; |
247 | my ($self) = @_; |
174 | |
248 | |
175 | my (@nodes, $node, $name, $value); |
249 | my (@nodes, $node, $name, $value, $prop, @val); |
176 | |
250 | |
177 | while ($self->{sgf} =~ /\G$ws;/goc) { |
251 | while ($self->{sgf} =~ /\G$ws;/goc) { |
178 | push @nodes, $node = {}; |
252 | push @nodes, $node = {}; |
179 | # Node |
253 | # Node |
180 | while ($self->{sgf} =~ /\G$ws([A-Z]+)/goc) { |
254 | while ($self->{sgf} =~ /\G$ws([A-Z]+)/goc) { |
181 | # Property |
255 | # Property |
182 | $name = $1; |
256 | $name = $1; |
|
|
257 | $prop = $property->{$name}; |
183 | |
258 | |
184 | while ($self->{sgf} =~ /\G$ws\[((?:[^\\\]]*|\\.)*)\]/sgoc) { |
259 | while ($self->{sgf} =~ /\G$ws\[((?:[^\\\]]*|\\.)*)\]/sgoc) { |
185 | # PropValue |
260 | # PropValue |
186 | $value = $1; |
261 | $value = $1; |
187 | if (defined (my $prop = $property->{$name})) { |
262 | if ($prop) { |
188 | my (@val) = $prop->{in}->($self, $value, $prop); |
263 | @val = $prop->{in}->($self, $value, $prop); |
189 | |
264 | |
190 | if ($prop->{is_list}) { |
265 | if ($prop->{is_list}) { |
191 | push @{$node->{$name}}, @val |
266 | push @{$node->{$name}}, @val |
192 | } else { |
267 | } else { |
193 | $node->{$name} = $val[0]; |
268 | $node->{$name} = $val[0]; |
|
|
269 | |
|
|
270 | $self->{CA} = $val[0] if $name eq "CA"; |
194 | } |
271 | } |
195 | } else { |
272 | } else { |
196 | #warn "unknown property '$name', will be saved unchanged.";#d# |
273 | #warn "unknown property '$name', will be saved unchanged.";#d# |
197 | $node->{$name} = $value; |
274 | push @{$node->{$name}}, $value; |
198 | } |
275 | } |
199 | } |
276 | } |
200 | } |
277 | } |
|
|
278 | |
|
|
279 | # postprocess nodes, currently only to decode text and simpletext |
|
|
280 | $self->postprocess ($node); |
201 | } |
281 | } |
202 | |
282 | |
203 | \@nodes |
283 | \@nodes |
|
|
284 | } |
|
|
285 | |
|
|
286 | sub encode_sgf($) { |
|
|
287 | my ($self, $game) = @_; |
|
|
288 | |
|
|
289 | $self->{sgf} = ""; |
|
|
290 | |
|
|
291 | $self->{FF} = 4; |
|
|
292 | $self->{CA} = 'UTF-8'; |
|
|
293 | $self->{GM} = 1; |
|
|
294 | $self->{AP} = ["Games::Go::SGF::Grove", $VERSION]; |
|
|
295 | |
|
|
296 | $self->encode_GameTree ($_, 1) for @$game; |
|
|
297 | |
|
|
298 | $self->{sgf} |
|
|
299 | } |
|
|
300 | |
|
|
301 | sub encode_GameTree { |
|
|
302 | my ($self, $sequence, $is_root) = @_; |
|
|
303 | |
|
|
304 | if ($is_root) { |
|
|
305 | my $root = $sequence->[0]; |
|
|
306 | |
|
|
307 | $root->{CA} ||= $self->{CA}; |
|
|
308 | $root->{FF} ||= $self->{FF}; |
|
|
309 | $root->{GM} ||= $self->{GM}; |
|
|
310 | $root->{AP} ||= $self->{AP}; |
|
|
311 | |
|
|
312 | $self->{CA} = $root->{CA}; |
|
|
313 | } |
|
|
314 | |
|
|
315 | $self->{sgf} .= "("; |
|
|
316 | $self->encode_Sequence ($sequence); |
|
|
317 | $self->{sgf} .= ")"; |
|
|
318 | } |
|
|
319 | |
|
|
320 | sub encode_Sequence { |
|
|
321 | my ($self, $sequence) = @_; |
|
|
322 | |
|
|
323 | for my $node (@$sequence) { |
|
|
324 | $self->{sgf} .= ";"; |
|
|
325 | |
|
|
326 | while (my ($name, $value) = each %$node) { |
|
|
327 | next unless $name eq uc $name; |
|
|
328 | |
|
|
329 | $self->{sgf} .= "$name\["; |
|
|
330 | |
|
|
331 | if (my $prop = $property->{$name}) { |
|
|
332 | if ($prop->{is_list}) { |
|
|
333 | $self->{sgf} .= join "][", map $prop->{out}->($self, $_), @$value; |
|
|
334 | } else { |
|
|
335 | $self->{sgf} .= $prop->{out}->($self, $value); |
|
|
336 | } |
|
|
337 | } else { |
|
|
338 | $self->{sgf} .= join "][", @$value; |
|
|
339 | } |
|
|
340 | |
|
|
341 | $self->{sgf} .= "]"; |
|
|
342 | } |
|
|
343 | |
|
|
344 | $self->encode_GameTree ($_) for @{ $node->{variations} }; |
|
|
345 | } |
204 | } |
346 | } |
205 | |
347 | |
206 | ############################################################################# |
348 | ############################################################################# |
207 | |
349 | |
208 | =head2 Property Type Structure |
350 | =head2 Property Type Structure |
… | |
… | |
224 | |
366 | |
225 | =cut |
367 | =cut |
226 | |
368 | |
227 | |
369 | |
228 | { |
370 | { |
229 | use Encode (); |
|
|
230 | |
|
|
231 | my ($group, $name, $value, $prop); |
371 | my ($group, $name, $value, $prop); |
232 | |
372 | |
233 | my (%char2coord, %coord2char); |
373 | my (%char2coord, %coord2char); |
234 | |
374 | |
235 | { |
375 | { |
236 | my @coord = ("a" .. "z", "A" .. "Z"); |
376 | my @coord = ("a" .. "z", "A" .. "Z"); |
|
|
377 | |
237 | for (0.. $#coord) { |
378 | for (0.. $#coord) { |
238 | $char2coord{ $coord[$_] } = $_; |
379 | $char2coord{ $coord[$_] } = $_; |
239 | $coord2char{ $_ } = $coord[$_]; |
380 | $coord2char{ $_ } = $coord[$_]; |
240 | } |
381 | } |
241 | } |
382 | } |
… | |
… | |
244 | sub _parsetype { |
385 | sub _parsetype { |
245 | for (shift) { |
386 | for (shift) { |
246 | if (s/e?list of //) { |
387 | if (s/e?list of //) { |
247 | $prop->{is_list} = 1; |
388 | $prop->{is_list} = 1; |
248 | return _parsetype $_; |
389 | return _parsetype $_; |
|
|
390 | |
249 | } elsif (s/composed (\S+)\s+(?:':'\s+)?(\S+)//) { |
391 | } elsif (s/composed (\S+)\s+(?:':'\s+)?(\S+)//) { |
|
|
392 | $prop->{composed} = 1; |
250 | my ($i, $o) = ($1, $2); |
393 | my ($i, $o) = ($1, $2); |
251 | my ($i1, $o1, $i2, $o2) = (_parsetype $i, _parsetype $o); |
394 | my ($i1, $o1, $i2, $o2) = (_parsetype $i, _parsetype $o); |
252 | return ( |
395 | return ( |
253 | sub { |
396 | sub { |
254 | if ($_[1] =~ /^((?:[^\\:]+|\\.)*)(?::(.*))$/s) { |
397 | if ($_[1] =~ /^((?:[^\\:]+|\\.)*)(?::(.*))$/s) { |
255 | # or $_[0]->error ("'Compose' ($i:$o) expected, got '$_[1]'"); |
398 | # or $_[0]->error ("'Compose' ($i:$o) expected, got '$_[1]'"); |
256 | my ($l, $r) = ($1, $2); |
399 | my ($l, $r) = ($1, $2); |
257 | |
400 | |
258 | warn "$i1,$o1,$i2,$o2 ($i,$o)\n";#d# |
|
|
259 | [ |
401 | [ |
260 | $i1->($_[0], $l), |
402 | $i1->($_[0], $l), |
261 | defined $r ? $i2->($_[0], $r) : undef, |
403 | defined $r ? $i2->($_[0], $r) : undef, |
262 | ], |
404 | ] |
263 | } else { |
405 | } else { |
264 | # yes, this is not according to standard, but let's handle it somehow anyway |
406 | # yes, this is not according to standard, but let's handle it somehow anyway |
265 | #[ $i1->($_[0], $l) ] #d# #TODO# |
407 | #[ $i1->($_[0], $l) ] #d# #TODO# |
266 | } |
408 | } |
267 | }, |
409 | }, |
268 | sub { |
410 | sub { |
269 | $o1->($_[0], $_[1][0]) |
411 | $o1->($_[0], $_[1][0]) |
270 | . (defined $_[1][1] |
412 | . ":" |
271 | ? ":" . $o1->($_[0], $_[1][0]) |
413 | . $o2->($_[0], $_[1][1]) |
272 | : ""), |
|
|
273 | }, |
414 | }, |
274 | ); |
415 | ); |
|
|
416 | |
275 | } elsif (s/double//) { |
417 | } elsif (s/double//) { |
276 | return ( |
418 | return ( |
277 | sub { |
419 | sub { |
278 | $_[1] =~ /^[12]$/ |
420 | $_[1] =~ /^[12]$/ |
279 | or $_[0]->error ("'Double' (1|2) expected, got '$_[1]'"); |
421 | or $_[0]->error ("'Double' (1|2) expected, got '$_[1]'"); |
… | |
… | |
284 | }, |
426 | }, |
285 | ); |
427 | ); |
286 | } elsif (s/color//) { |
428 | } elsif (s/color//) { |
287 | return ( |
429 | return ( |
288 | sub { |
430 | sub { |
|
|
431 | # too many broken programs write this wrong |
|
|
432 | return "B" if $_[1] eq "1"; |
|
|
433 | return "W" if $_[1] eq "2"; |
|
|
434 | |
289 | $_[1] =~ /^[BW]$/i |
435 | $_[1] =~ /^[BW]$/i |
290 | or $_[0]->error ("'Color' (B|W) expected, got '$_[1]'"); |
436 | or $_[0]->error ("'Color' (B|W) expected, got '$_[1]'"); |
291 | lc $_[1] |
437 | lc $_[1] |
292 | }, |
438 | }, |
293 | sub { |
439 | sub { |
… | |
… | |
325 | $_[1] =~ /^(.)(.)$/ |
471 | $_[1] =~ /^(.)(.)$/ |
326 | ? [ $char2coord{$1}, $char2coord{$2} ] |
472 | ? [ $char2coord{$1}, $char2coord{$2} ] |
327 | : [] |
473 | : [] |
328 | }, |
474 | }, |
329 | sub { |
475 | sub { |
330 | die; |
476 | $coord2char{$_[1][0]} . $coord2char{$_[1][1]} |
331 | }, |
477 | }, |
332 | ); |
478 | ); |
333 | } elsif (s/real//) { |
479 | } elsif (s/real//) { |
334 | return ( |
480 | return ( |
335 | sub { |
481 | sub { |
336 | $_[1] =~ /^[+-]?[0-9]*\.?[0-9]*$/i |
482 | $_[1] =~ /^[+-]?[0-9]*\.?[0-9]*$/i |
337 | or $_[0]->error ("'Real' expected, got '$_[1]'"); |
483 | or $_[0]->error ("'Real' expected, got '$_[1]'"); |
338 | $_[1]+0, |
484 | $_[1]+0 |
339 | }, |
485 | }, |
340 | sub { |
486 | sub { |
341 | $_[1]+0, |
487 | $_[1]+0 |
342 | }, |
488 | }, |
343 | ); |
489 | ); |
344 | } elsif (s/number//) { |
490 | } elsif (s/number//) { |
345 | return ( |
491 | return ( |
346 | sub { |
492 | sub { |
347 | $_[1] =~ /^[+-]?[0-9]*$/i |
493 | $_[1] =~ /^[+-]?[0-9]*$/i |
348 | or $_[0]->error ("'Number' expected, got '$_[1]'"); |
494 | or $_[0]->error ("'Number' expected, got '$_[1]'"); |
349 | $_[1]+0, |
495 | $_[1]+0 |
350 | }, |
496 | }, |
351 | sub { |
497 | sub { |
352 | int $_[1], |
498 | int $_[1] |
353 | }, |
499 | }, |
354 | ); |
500 | ); |
355 | } elsif (s/text//) { |
501 | } elsif (s/text// || s/simpletext//i) { |
356 | return ( |
502 | return ( |
357 | sub { |
503 | sub { |
358 | warn "$_[0]{CA}<-CA\n";#d# |
504 | { _text => $_[1] } |
359 | eval { Encode::decode $_[0]{CA}, $_[1] } || $_[1], |
|
|
360 | }, |
505 | }, |
361 | sub { |
506 | sub { |
362 | die; |
507 | my $str = Encode::encode $_[0]{CA}, $_[1]; |
363 | }, |
508 | $str =~ s/([\:\]\\])/\\$1/g; |
364 | ); |
|
|
365 | } elsif (s/simpletext//i) { |
|
|
366 | return ( |
|
|
367 | sub { |
|
|
368 | $_[1], |
509 | $str |
369 | }, |
|
|
370 | sub { |
|
|
371 | die; |
|
|
372 | }, |
510 | }, |
373 | ); |
511 | ); |
374 | } else { |
512 | } else { |
375 | die "FATAL: garbled DATA section, unknown type '$_'"; |
513 | die "FATAL: garbled DATA section, unknown type '$_'"; |
376 | |
|
|
377 | } |
514 | } |
378 | } |
515 | } |
379 | } |
516 | } |
380 | |
517 | |
381 | while (<DATA>) { |
518 | while (<DATA>) { |
… | |
… | |
801 | 1 = board markup/variations of current node |
938 | 1 = board markup/variations of current node |
802 | Default value: 0 |
939 | Default value: 0 |
803 | Related: C, FF, GM, SZ, AP, CA |
940 | Related: C, FF, GM, SZ, AP, CA |
804 | |
941 | |
805 | Property: SZ |
942 | Property: SZ |
806 | Propvalue: composed number number |
943 | Propvalue: number |
807 | Propertytype: root |
944 | Propertytype: root |
808 | Function: Defines the size of the board. If only a single value |
945 | Function: Defines the size of the board. If only a single value |
809 | is given, the board is a square; with two numbers given, |
946 | is given, the board is a square; with two numbers given, |
810 | rectangular boards are possible. |
947 | rectangular boards are possible. |
811 | If a rectangular board is specified, the first number specifies |
948 | If a rectangular board is specified, the first number specifies |