ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cfmaps/cfmapidx
(Generate patch)

Comparing cfmaps/cfmapidx (file contents):
Revision 1.11 by root, Thu Oct 22 05:31:04 2009 UTC vs.
Revision 1.14 by root, Sat Oct 31 17:26:44 2009 UTC

1#!/opt/bin/perl 1#!/opt/bin/perl
2 2
3# cfmap2idx - inverted index for deliantra maps 3# cfmapidx - inverted index for deliantra maps
4# Copyright (C) 2005,2007,2008 Marc Lehmann <cfmaps@schmorp.de> 4# Copyright (C) 2005,2007,2008,2009 Marc Lehmann <cfmaps@schmorp.de>
5# 5#
6# CFMAPIDX is free software; you can redistribute it and/or modify 6# CFMAPIDX is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by 7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or 8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version. 9# (at your option) any later version.
15# 15#
16# You should have received a copy of the GNU General Public License 16# You should have received a copy of the GNU General Public License
17# along with cfmaps; if not, write to the Free Software 17# along with cfmaps; if not, write to the Free Software
18# Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18# Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
20our $VERSION = '0.913'; 20our $VERSION = '2.0';
21
22use common::sense;
21 23
22use Deliantra; 24use Deliantra;
23use Deliantra::Map; 25use Deliantra::Map;
26
27use Fcntl;
24use SDBM_File; 28use GDBM_File;
29
30sub FAST () { 1 } # skip stopwords, skip duplicate-management
31
32our $RESULTS = 200;
33our %idx; # the database
25 34
26sub escape_html($) { 35sub escape_html($) {
27 local $_ = shift; 36 local $_ = shift;
28 s/([<>&])/sprintf "&#%d;", ord $1/ge; 37 s/([<>&])/sprintf "&#%d;", ord $1/ge;
29 $_ 38 $_
30} 39}
31 40
32sub search { 41sub find {
33 my (@kw) = @_; 42 my @kw = @_;
34 43
35 tie %idx, SDBM_File, ".index.dat", O_RDONLY, 0, $DB_HASH 44 tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0
36 or die ".index.dat: $!"; 45 or die ".index.dat: $!";
37 46
38 my $cnt; 47 my $cnt;
39 my %res; 48 my %res;
40 my @found; 49 my @found;
41 50
42 for (map lc, @kw) { 51 for (@kw) {
43 if (exists $idx{$_}) { 52 if (exists $idx{$_}) {
44 $cnt++; 53 $cnt++;
45 $res{$_}++ for unpack "n*", $idx{$_}; 54 $res{$_}++ for unpack "(A4)*", $idx{$_};
46 push @found, $_; 55 push @found, $_;
47 #warn "$_ found\n";#d# 56 #warn "$_ found\n";#d#
48 } else { 57 } else {
49 #warn "$_ not found\n";#d# 58 #warn "$_ not found\n";#d#
50 } 59 }
51 } 60 }
52 61
53 my @paths; 62 my @res;
54 63
55 while (my ($k, $v) = each %res) { 64 while (my ($k, $v) = each %res) {
56 next unless $v == $cnt; 65 next unless $v == $cnt;
57 push @paths, $idx{"D" . pack "n", $k}; 66
67 my ($docnum, $x, $y) = unpack "nCC", $k;
68 my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum};
69
70 push @res, [$path, $x, $y];
58 } 71 }
72
73 (\@res, \@found)
74}
75
76sub dotag {
77 my ($tag) = @_;
78
79 my ($res) = find "T$tag";
80
81 use Data::Dumper; warn Dumper $res;
82 print <<EOF;
83Content-Type: application/xhtml+xml
84
85EOF
86
87 print "<?xml version='1.0' encoding='utf-8'?>",
88 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
89 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
90 "<head>",
91 "<title>Deliantra Tag Search</title>",
92 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
93 "</head>",
94 "<body>";
95
96 print "<h1>Search results for tag '", (escape_html $tag), "'</h1>",
97 "<p class='searchresult'><ul>";
98
99 for (sort { $a->[0] cmp $b->[0] } @$res) {
100 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
101 }
102
103 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
104 "</body></html>";
105}
106
107sub search {
108 my (@kw) = @_;
109
110 my ($res, $found) = find map lc, @kw;
59 111
60 binmode STDOUT, ":utf8"; 112 binmode STDOUT, ":utf8";
113
114 print <<EOF;
115Content-Type: application/xhtml+xml
116
117EOF
61 118
62 print "<?xml version='1.0' encoding='utf-8'?>", 119 print "<?xml version='1.0' encoding='utf-8'?>",
63 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', 120 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
64 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>", 121 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
65 "<head>", 122 "<head>",
66 "<title>Deliantra Keyword Search</title>", 123 "<title>Deliantra Keyword Search</title>",
67 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n", 124 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
68 "</head>", 125 "</head>",
69 "<body>"; 126 "<body>";
70 127
71 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @found), "'</h1>", 128 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @$found), "'</h1>",
72 "<p class='searchcount'>", (scalar @paths), " results found, up to 200 results shown.</p>", 129 "<p class='searchcount'>", (scalar @$res), " results found, up to $RESULTS results shown.</p>",
73 "<p class='searchresult'><ul>"; 130 "<p class='searchresult'><ul>";
74 131
75 pop @paths while @paths > 200; 132 pop @$res while @$res > $RESULTS;
76 133
77 for (sort @paths) { 134 for (sort { $a->[0] cmp $b->[0] } @$res) {
78 print "<li><a href='$_.xhtml'>$_</a></li>"; 135 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
79 } 136 }
80 137
81 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>", 138 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
82 "</body></html>"; 139 "</body></html>";
83} 140}
84 141
85if ($ARGV[0] eq "-a") { 142if ($ARGV[0] eq "-a") {
86 shift; 143 shift;
87 144
88 tie %idx, SDBM_File, ".index.dat~", O_RDWR|O_CREAT, 0644, $DB_HASH 145 tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644
89 or die ".index.dat~: $!"; 146 or die ".index.dat~: $!";
90 147
91 my %idx2; 148 if ($ARGV[0] eq "-r") {
149 (tied %idx)->reorganize;
150 untie %idx;
151 exit 0;
152 }
153
154 my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords};
92 155
93 for my $path (@ARGV) { 156 for my $path (@ARGV) {
157 next if $path =~ m%^world-precomposed/%;
158
159 my %idx2;
160
94 (my $base = $path) =~ s/\.map$//; 161 (my $base = $path) =~ s/\.map$//;
95 my $docnum = pack "n", ++$idx{Vdocnum}; 162 my $docnum = ++$idx{Vdocnum};
96 $idx{"D$docnum"} = $base;
97 my $meta = Deliantra::Map->new_from_file ("$base.map"); 163 my $meta = eval { Deliantra::Map->new_from_file ("$base.map") }
164 or next;
98 165
99 for my $x (0 .. $meta->{width} - 1) { 166 $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9];
100 for my $y (0 .. $meta->{height} - 1) { 167
101 for my $a (@{ $meta->{map}[$x][$y] }) { 168 my ($x, $y, $pos);
169
170 my $add; $add = sub {
171 for my $a (@{ $_[0] }) {
172 # delete "obvious" crap elements
173 delete @$a{qw(elevation x y move_block move_slow move_allow)};
174
175 $add->(delete $a->{inventory})
176 if $a->{inventory};
177
178 $idx2{"T$a->{tag}"} ||= $pos
179 if exists $a->{tag};
180
102 for my $v (values %$a) { 181 for my $v (values %$a) {
103 $v = lc $v; 182 $v = lc $v;
104 $v =~ y/a-zA-Z0-9_\-.\// /c; 183 $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim
184
105 for (split /\s+/, $v) { 185 for (split /\s+/, $v) {
186 next if !FAST && exists $stop{$_};
187
106 $idx2{$_} .= $docnum; 188 $idx2{$_} ||= $pos;
107 189
108 if (/[_\-\.\/]/) { 190 if (/[_\-\.\/]/) {
109 $idx2{$_} .= $docnum for (split /[_\-\.\/]/) 191 $idx2{$_} ||= $pos for split /[_\-\.\/]/;
110 }
111 } 192 }
112 } 193 }
113 } 194 }
114 } 195 }
196 };
197
198 for $x (0 .. $meta->{width} - 1) {
199 my $col = $meta->{map}[$x];
200 for $y (0 .. $meta->{height} - 1) {
201 $pos = pack "nCC", $docnum, $x, $y;
202
203 $add->($col->[$y]);
204 }
115 } 205 }
116 }
117 206
207 if (FAST) {
118 while (my ($k, $v) = each %idx2) { 208 while (my ($k, $v) = each %idx2) {
119 $idx{$k} = pack "n*", keys %{ { map +($_ => 1), unpack "n*", $idx{$k}.$v } }; 209 $idx{$k} .= $v;
210 }
211 } else {
212 while (my ($k, $v) = each %idx2) {
213 my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };;
214
215 if ($RESULTS >= @val) {
216 $idx{$k} = join "", @val;
217 } else {
218 delete $idx{$k};
219 undef $stop{$k};
220 }
221 }
222 }
120 } 223 }
224
225 $idx{Vstopwords} = join "\x00", keys %stop;
226
227 untie %idx;
121 228
122} elsif ($ARGV[0] eq "-s") { 229} elsif ($ARGV[0] eq "-s") {
123 shift; 230 shift;
124 231
125 search @ARGV; 232 search @ARGV;
126} else { 233} else {
127 # assume CGI 234 # assume CGI
128 chdir "/var/www/maps.deliantra.net" or exit 69; 235# chdir "/var/www/maps.deliantra.net" or exit 69;
129 236
130 print <<EOF;
131Content-Type: application/xhtml+xml
132
133EOF
134 $ENV{QUERY_STRING} =~ s/^k=//; 237 $ENV{QUERY_STRING} =~ s/^([kt])=//;
238
239 if ($1 eq "t") {
240 dotag $ENV{QUERY_STRING};
241 } else {
135 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING}; 242 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING};
243 }
136} 244}
137 245

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines