ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Games-Go-SGF-Grove/Grove.pm
(Generate patch)

Comparing Games-Go-SGF-Grove/Grove.pm (file contents):
Revision 1.3 by root, Sun Jul 20 19:54:00 2008 UTC vs.
Revision 1.4 by root, Sun Jul 20 22:16:14 2008 UTC

61=cut 61=cut
62 62
63sub decode_sgf($) { 63sub decode_sgf($) {
64 my ($sgf_data) = @_; 64 my ($sgf_data) = @_;
65 65
66 Games::Go::SGF::Grove::Parser::->new->parse ($sgf_data) 66 Games::Go::SGF::Grove::Parser::->new->decode_sgf ($sgf_data)
67} 67}
68 68
69sub encode_sgf($) { 69sub encode_sgf($) {
70 my ($game) = @_; 70 my ($game) = @_;
71
72 Games::Go::SGF::Grove::Parser::->new->encode_sgf ($game)
71} 73}
72 74
73sub load_sgf($) { 75sub load_sgf($) {
74 my ($path) = @_; 76 my ($path) = @_;
75 77
81} 83}
82 84
83sub save_sgf($$) { 85sub save_sgf($$) {
84 my ($path, $game) = @_; 86 my ($path, $game) = @_;
85 87
86 open my $fh, "<:perlio", $path 88 open my $fh, ">:perlio", $path
87 or die "$path: $!"; 89 or die "$path: $!";
88 90
89 print $fh encode_sgf $game; 91 print $fh encode_sgf $game;
90} 92}
91 93
94=head2 The Game Data structure 96=head2 The Game Data structure
95 97
96The SGF game is represented by a linked Perl data structure consisting of 98The SGF game is represented by a linked Perl data structure consisting of
97unblessed hashes and arrays. 99unblessed hashes and arrays.
98 100
99The game itself is a hash reference #TODO# 101SGF files are a forest of trees, called a collection (i.e. you can have
102multiple games stored in a file). The C<load_sgf> and C<decode_sgf>
103functions returns this collection as a reference to an array containing
104the individual game trees (usually there is only one, though).
105
106Each individual tree is again an array of nodes representing the main line
107of play.
108
109Each node is simply a hash reference. Each SGF property is stored with the
110(uppercase) property name as the key, and a property-dependent value for
111the contents (e.g., a black move is stored as C<< B => [3, 5] >>.
112
113If there are any variations/branches/alternate lines of play, then these
114are stored in the array reference in the C<variations> key (those again
115are game trees, so array references themselves).
116
117This module reserves all uppercase key names for SGF properties, the key
118C<variations> and all keys starting with an underscore (C<_xxx>) as it's
119own. Users of this module may store additional attributes that don't
120conflict with these names in any node.
121
122Unknown properties will be stored as scalars with the (binary) property
123contents. Text nodes will always be decoded into Unicode text and encoded
124into whatever the CA property of the root node says (default: C<UTF-8>).
125
126When saving, all uppercase keys will be saved, lowercase keys will be
127ignored.
128
129For the actual encoding of other types, best decode some example game that
130contains them and use Data::Dumper. Here is such an example:
131
132 [ # list of game-trees, only one here
133 [ # the main node sequence
134 { # the root node, contains some variations
135 DI => '7k',
136 AP => undef,
137 CO => '5',
138 DP => '40',
139 GE => 'tesuji',
140 AW => [
141 [ 2, 16 ], [ 3, 15 ], [ 15, 9 ], [ 14, 13 ], ...
142 ],
143 C => 'White just played a ladder block at h12.',
144 variations => [ # list of variations, only one
145 [ # sequence of variation moves
146 { B => [ 7, 5 ] }, # a black move
147 { W => [ 12, 12 ] }, # a white move
148 ... and so on
149 ]
150 ],
151 }
152 ]
153 }
100 154
101=cut 155=cut
102 156
103
104package Games::Go::SGF::Grove::Parser; 157package Games::Go::SGF::Grove::Parser;
105 158
106no warnings; 159no warnings;
107use strict; 160use strict 'vars';
161
162use Encode ();
163use Carp qw(croak);
108 164
109my $ws = qr{[\x00-\x20]*}s; 165my $ws = qr{[\x00-\x20]*}s;
110my $property; # property => propertyinfo 166my $property; # property => propertyinfo
111 167
112sub new { 168sub new {
115} 171}
116 172
117sub error { 173sub error {
118 my ($self, $error) = @_; 174 my ($self, $error) = @_;
119 175
176 my $pos = pos $self->{sgf};
177
120 my $tail = substr $self->{sgf}, pos $self->{sgf}, 32; 178 my $tail = substr $self->{sgf}, $pos, 32;
121 $tail =~ s/[\x00-\x1f]+/ /g; 179 $tail =~ s/[\x00-\x1f]+/ /g;
122 180
123 die "$error (at '$tail')"; 181 croak "$error (at octet $pos, '$tail')";
124} 182}
125 183
126sub parse { 184sub decode_sgf {
127 my ($self, $sgf) = @_; 185 my ($self, $sgf) = @_;
128 186
129 # correct lines 187 # correct lines
130 if ($sgf =~ /[^\015\012]\015/) { 188 if ($sgf =~ /[^\015\012]\015/) {
131 $sgf =~ s/\015\012?/\n/g; 189 $sgf =~ s/\015\012?/\n/g;
134 } 192 }
135 193
136 $self->{sgf} = $sgf; 194 $self->{sgf} = $sgf;
137 195
138 $self->{FF} = 1; 196 $self->{FF} = 1;
139 $self->{CA} = 'ISO-8859-1'; 197 $self->{CA} = 'WINDOWS-1252'; # too many files are
140 $self->{GM} = 1; 198 $self->{GM} = 1;
141 199
142 my @trees; 200 my @trees;
143 201
144 eval { 202 eval {
145 while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) { 203 while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) {
146 push @trees, $self->GameTree; 204 push @trees, $self->decode_GameTree;
147 } 205 }
148 }; 206 };
149 207
150 die $@ if $@; 208 croak $@ if $@;
151 209
152 \@trees 210 \@trees
153} 211}
154 212
155sub GameTree { 213sub decode_GameTree {
156 my ($self) = @_; 214 my ($self) = @_;
157 215
158 $self->{sgf} =~ /\G$ws\(/sgoc 216 $self->{sgf} =~ /\G$ws\(/sgoc
159 or $self->error ("GameTree does not start with '('"); 217 or $self->error ("GameTree does not start with '('");
160 218
161 my $nodes = $self->Sequence; 219 my $nodes = $self->decode_Sequence;
162 220
163 while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) { 221 while ($self->{sgf} =~ /\G$ws(?=\()/sgoc) {
164 push @{$nodes->[-1]{children}}, $self->GameTree; 222 push @{$nodes->[-1]{variations}}, $self->decode_GameTree;
165 } 223 }
166 $self->{sgf} =~ /\G$ws\)/sgoc 224 $self->{sgf} =~ /\G$ws\)/sgoc
167 or $self->error ("GameTree does not end with ')'"); 225 or $self->error ("GameTree does not end with ')'");
168 226
169 $nodes 227 $nodes
170} 228}
171 229
230sub postprocess {
231 my $self = shift;
232
233 for (@_) {
234 if ("ARRAY" eq ref) {
235 $self->postprocess (@$_);
236 } elsif ("HASH" eq ref) {
237 if (my $value = $_->{_text}) {
238 $_ = eval { Encode::decode $self->{CA}, $value } || $value
239 } else {
240 $self->postprocess (values %$_);
241 }
242 }
243 }
244}
245
172sub Sequence { 246sub decode_Sequence {
173 my ($self) = @_; 247 my ($self) = @_;
174 248
175 my (@nodes, $node, $name, $value); 249 my (@nodes, $node, $name, $value, $prop, @val);
176 250
177 while ($self->{sgf} =~ /\G$ws;/goc) { 251 while ($self->{sgf} =~ /\G$ws;/goc) {
178 push @nodes, $node = {}; 252 push @nodes, $node = {};
179 # Node 253 # Node
180 while ($self->{sgf} =~ /\G$ws([A-Z]+)/goc) { 254 while ($self->{sgf} =~ /\G$ws([A-Z]+)/goc) {
181 # Property 255 # Property
182 $name = $1; 256 $name = $1;
257 $prop = $property->{$name};
183 258
184 while ($self->{sgf} =~ /\G$ws\[((?:[^\\\]]*|\\.)*)\]/sgoc) { 259 while ($self->{sgf} =~ /\G$ws\[((?:[^\\\]]*|\\.)*)\]/sgoc) {
185 # PropValue 260 # PropValue
186 $value = $1; 261 $value = $1;
187 if (defined (my $prop = $property->{$name})) { 262 if ($prop) {
188 my (@val) = $prop->{in}->($self, $value, $prop); 263 @val = $prop->{in}->($self, $value, $prop);
189 264
190 if ($prop->{is_list}) { 265 if ($prop->{is_list}) {
191 push @{$node->{$name}}, @val 266 push @{$node->{$name}}, @val
192 } else { 267 } else {
193 $node->{$name} = $val[0]; 268 $node->{$name} = $val[0];
269
270 $self->{CA} = $val[0] if $name eq "CA";
194 } 271 }
195 } else { 272 } else {
196 #warn "unknown property '$name', will be saved unchanged.";#d# 273 #warn "unknown property '$name', will be saved unchanged.";#d#
197 $node->{$name} = $value; 274 push @{$node->{$name}}, $value;
198 } 275 }
199 } 276 }
200 } 277 }
278
279 # postprocess nodes, currently only to decode text and simpletext
280 $self->postprocess ($node);
201 } 281 }
202 282
203 \@nodes 283 \@nodes
284}
285
286sub encode_sgf($) {
287 my ($self, $game) = @_;
288
289 $self->{sgf} = "";
290
291 $self->{FF} = 4;
292 $self->{CA} = 'UTF-8';
293 $self->{GM} = 1;
294 $self->{AP} = ["Games::Go::SGF::Grove", $VERSION];
295
296 $self->encode_GameTree ($_, 1) for @$game;
297
298 $self->{sgf}
299}
300
301sub encode_GameTree {
302 my ($self, $sequence, $is_root) = @_;
303
304 if ($is_root) {
305 my $root = $sequence->[0];
306
307 $root->{CA} ||= $self->{CA};
308 $root->{FF} ||= $self->{FF};
309 $root->{GM} ||= $self->{GM};
310 $root->{AP} ||= $self->{AP};
311
312 $self->{CA} = $root->{CA};
313 }
314
315 $self->{sgf} .= "(";
316 $self->encode_Sequence ($sequence);
317 $self->{sgf} .= ")";
318}
319
320sub encode_Sequence {
321 my ($self, $sequence) = @_;
322
323 for my $node (@$sequence) {
324 $self->{sgf} .= ";";
325
326 while (my ($name, $value) = each %$node) {
327 next unless $name eq uc $name;
328
329 $self->{sgf} .= "$name\[";
330
331 if (my $prop = $property->{$name}) {
332 if ($prop->{is_list}) {
333 $self->{sgf} .= join "][", map $prop->{out}->($self, $_), @$value;
334 } else {
335 $self->{sgf} .= $prop->{out}->($self, $value);
336 }
337 } else {
338 $self->{sgf} .= join "][", @$value;
339 }
340
341 $self->{sgf} .= "]";
342 }
343
344 $self->encode_GameTree ($_) for @{ $node->{variations} };
345 }
204} 346}
205 347
206############################################################################# 348#############################################################################
207 349
208=head2 Property Type Structure 350=head2 Property Type Structure
224 366
225=cut 367=cut
226 368
227 369
228{ 370{
229 use Encode ();
230
231 my ($group, $name, $value, $prop); 371 my ($group, $name, $value, $prop);
232 372
233 my (%char2coord, %coord2char); 373 my (%char2coord, %coord2char);
234 374
235 { 375 {
236 my @coord = ("a" .. "z", "A" .. "Z"); 376 my @coord = ("a" .. "z", "A" .. "Z");
377
237 for (0.. $#coord) { 378 for (0.. $#coord) {
238 $char2coord{ $coord[$_] } = $_; 379 $char2coord{ $coord[$_] } = $_;
239 $coord2char{ $_ } = $coord[$_]; 380 $coord2char{ $_ } = $coord[$_];
240 } 381 }
241 } 382 }
244 sub _parsetype { 385 sub _parsetype {
245 for (shift) { 386 for (shift) {
246 if (s/e?list of //) { 387 if (s/e?list of //) {
247 $prop->{is_list} = 1; 388 $prop->{is_list} = 1;
248 return _parsetype $_; 389 return _parsetype $_;
390
249 } elsif (s/composed (\S+)\s+(?:':'\s+)?(\S+)//) { 391 } elsif (s/composed (\S+)\s+(?:':'\s+)?(\S+)//) {
392 $prop->{composed} = 1;
250 my ($i, $o) = ($1, $2); 393 my ($i, $o) = ($1, $2);
251 my ($i1, $o1, $i2, $o2) = (_parsetype $i, _parsetype $o); 394 my ($i1, $o1, $i2, $o2) = (_parsetype $i, _parsetype $o);
252 return ( 395 return (
253 sub { 396 sub {
254 if ($_[1] =~ /^((?:[^\\:]+|\\.)*)(?::(.*))$/s) { 397 if ($_[1] =~ /^((?:[^\\:]+|\\.)*)(?::(.*))$/s) {
255 # or $_[0]->error ("'Compose' ($i:$o) expected, got '$_[1]'"); 398 # or $_[0]->error ("'Compose' ($i:$o) expected, got '$_[1]'");
256 my ($l, $r) = ($1, $2); 399 my ($l, $r) = ($1, $2);
257 400
258 warn "$i1,$o1,$i2,$o2 ($i,$o)\n";#d#
259 [ 401 [
260 $i1->($_[0], $l), 402 $i1->($_[0], $l),
261 defined $r ? $i2->($_[0], $r) : undef, 403 defined $r ? $i2->($_[0], $r) : undef,
262 ], 404 ]
263 } else { 405 } else {
264 # yes, this is not according to standard, but let's handle it somehow anyway 406 # yes, this is not according to standard, but let's handle it somehow anyway
265 #[ $i1->($_[0], $l) ] #d# #TODO# 407 #[ $i1->($_[0], $l) ] #d# #TODO#
266 } 408 }
267 }, 409 },
268 sub { 410 sub {
269 $o1->($_[0], $_[1][0]) 411 $o1->($_[0], $_[1][0])
270 . (defined $_[1][1] 412 . ":"
271 ? ":" . $o1->($_[0], $_[1][0]) 413 . $o2->($_[0], $_[1][1])
272 : ""),
273 }, 414 },
274 ); 415 );
416
275 } elsif (s/double//) { 417 } elsif (s/double//) {
276 return ( 418 return (
277 sub { 419 sub {
278 $_[1] =~ /^[12]$/ 420 $_[1] =~ /^[12]$/
279 or $_[0]->error ("'Double' (1|2) expected, got '$_[1]'"); 421 or $_[0]->error ("'Double' (1|2) expected, got '$_[1]'");
284 }, 426 },
285 ); 427 );
286 } elsif (s/color//) { 428 } elsif (s/color//) {
287 return ( 429 return (
288 sub { 430 sub {
431 # too many broken programs write this wrong
432 return "B" if $_[1] eq "1";
433 return "W" if $_[1] eq "2";
434
289 $_[1] =~ /^[BW]$/i 435 $_[1] =~ /^[BW]$/i
290 or $_[0]->error ("'Color' (B|W) expected, got '$_[1]'"); 436 or $_[0]->error ("'Color' (B|W) expected, got '$_[1]'");
291 lc $_[1] 437 lc $_[1]
292 }, 438 },
293 sub { 439 sub {
325 $_[1] =~ /^(.)(.)$/ 471 $_[1] =~ /^(.)(.)$/
326 ? [ $char2coord{$1}, $char2coord{$2} ] 472 ? [ $char2coord{$1}, $char2coord{$2} ]
327 : [] 473 : []
328 }, 474 },
329 sub { 475 sub {
330 die; 476 $coord2char{$_[1][0]} . $coord2char{$_[1][1]}
331 }, 477 },
332 ); 478 );
333 } elsif (s/real//) { 479 } elsif (s/real//) {
334 return ( 480 return (
335 sub { 481 sub {
336 $_[1] =~ /^[+-]?[0-9]*\.?[0-9]*$/i 482 $_[1] =~ /^[+-]?[0-9]*\.?[0-9]*$/i
337 or $_[0]->error ("'Real' expected, got '$_[1]'"); 483 or $_[0]->error ("'Real' expected, got '$_[1]'");
338 $_[1]+0, 484 $_[1]+0
339 }, 485 },
340 sub { 486 sub {
341 $_[1]+0, 487 $_[1]+0
342 }, 488 },
343 ); 489 );
344 } elsif (s/number//) { 490 } elsif (s/number//) {
345 return ( 491 return (
346 sub { 492 sub {
347 $_[1] =~ /^[+-]?[0-9]*$/i 493 $_[1] =~ /^[+-]?[0-9]*$/i
348 or $_[0]->error ("'Number' expected, got '$_[1]'"); 494 or $_[0]->error ("'Number' expected, got '$_[1]'");
349 $_[1]+0, 495 $_[1]+0
350 }, 496 },
351 sub { 497 sub {
352 int $_[1], 498 int $_[1]
353 }, 499 },
354 ); 500 );
355 } elsif (s/text//) { 501 } elsif (s/text// || s/simpletext//i) {
356 return ( 502 return (
357 sub { 503 sub {
358 warn "$_[0]{CA}<-CA\n";#d# 504 { _text => $_[1] }
359 eval { Encode::decode $_[0]{CA}, $_[1] } || $_[1],
360 }, 505 },
361 sub { 506 sub {
362 die; 507 my $str = Encode::encode $_[0]{CA}, $_[1];
363 }, 508 $str =~ s/([\:\]\\])/\\$1/g;
364 );
365 } elsif (s/simpletext//i) {
366 return (
367 sub {
368 $_[1], 509 $str
369 },
370 sub {
371 die;
372 }, 510 },
373 ); 511 );
374 } else { 512 } else {
375 die "FATAL: garbled DATA section, unknown type '$_'"; 513 die "FATAL: garbled DATA section, unknown type '$_'";
376
377 } 514 }
378 } 515 }
379 } 516 }
380 517
381 while (<DATA>) { 518 while (<DATA>) {
801 1 = board markup/variations of current node 938 1 = board markup/variations of current node
802 Default value: 0 939 Default value: 0
803Related: C, FF, GM, SZ, AP, CA 940Related: C, FF, GM, SZ, AP, CA
804 941
805Property: SZ 942Property: SZ
806Propvalue: composed number number 943Propvalue: number
807Propertytype: root 944Propertytype: root
808Function: Defines the size of the board. If only a single value 945Function: Defines the size of the board. If only a single value
809 is given, the board is a square; with two numbers given, 946 is given, the board is a square; with two numbers given,
810 rectangular boards are possible. 947 rectangular boards are possible.
811 If a rectangular board is specified, the first number specifies 948 If a rectangular board is specified, the first number specifies

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines