ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cfmaps/cfmapidx
Revision: 1.14
Committed: Sat Oct 31 17:26:44 2009 UTC (14 years, 6 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Changes since 1.13: +142 -40 lines
Log Message:
- support map tags in cfmap2html and cfmapidx
- reorganise index database, reduces db size considerably
- support stop words and "stop attributes"
- look into object inventories for cfmapidx
- speed up cfmapidx
- skip world-precomposed in cfmapidx

File Contents

# Content
1 #!/opt/bin/perl
2
3 # cfmapidx - inverted index for deliantra maps
4 # Copyright (C) 2005,2007,2008,2009 Marc Lehmann <cfmaps@schmorp.de>
5 #
6 # CFMAPIDX is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with cfmaps; if not, write to the Free Software
18 # Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
20 our $VERSION = '2.0';
21
22 use common::sense;
23
24 use Deliantra;
25 use Deliantra::Map;
26
27 use Fcntl;
28 use GDBM_File;
29
30 sub FAST () { 1 } # skip stopwords, skip duplicate-management
31
32 our $RESULTS = 200;
33 our %idx; # the database
34
35 sub escape_html($) {
36 local $_ = shift;
37 s/([<>&])/sprintf "&#%d;", ord $1/ge;
38 $_
39 }
40
41 sub find {
42 my @kw = @_;
43
44 tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0
45 or die ".index.dat: $!";
46
47 my $cnt;
48 my %res;
49 my @found;
50
51 for (@kw) {
52 if (exists $idx{$_}) {
53 $cnt++;
54 $res{$_}++ for unpack "(A4)*", $idx{$_};
55 push @found, $_;
56 #warn "$_ found\n";#d#
57 } else {
58 #warn "$_ not found\n";#d#
59 }
60 }
61
62 my @res;
63
64 while (my ($k, $v) = each %res) {
65 next unless $v == $cnt;
66
67 my ($docnum, $x, $y) = unpack "nCC", $k;
68 my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum};
69
70 push @res, [$path, $x, $y];
71 }
72
73 (\@res, \@found)
74 }
75
76 sub dotag {
77 my ($tag) = @_;
78
79 my ($res) = find "T$tag";
80
81 use Data::Dumper; warn Dumper $res;
82 print <<EOF;
83 Content-Type: application/xhtml+xml
84
85 EOF
86
87 print "<?xml version='1.0' encoding='utf-8'?>",
88 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
89 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
90 "<head>",
91 "<title>Deliantra Tag Search</title>",
92 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
93 "</head>",
94 "<body>";
95
96 print "<h1>Search results for tag '", (escape_html $tag), "'</h1>",
97 "<p class='searchresult'><ul>";
98
99 for (sort { $a->[0] cmp $b->[0] } @$res) {
100 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
101 }
102
103 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
104 "</body></html>";
105 }
106
107 sub search {
108 my (@kw) = @_;
109
110 my ($res, $found) = find map lc, @kw;
111
112 binmode STDOUT, ":utf8";
113
114 print <<EOF;
115 Content-Type: application/xhtml+xml
116
117 EOF
118
119 print "<?xml version='1.0' encoding='utf-8'?>",
120 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
121 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
122 "<head>",
123 "<title>Deliantra Keyword Search</title>",
124 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
125 "</head>",
126 "<body>";
127
128 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @$found), "'</h1>",
129 "<p class='searchcount'>", (scalar @$res), " results found, up to $RESULTS results shown.</p>",
130 "<p class='searchresult'><ul>";
131
132 pop @$res while @$res > $RESULTS;
133
134 for (sort { $a->[0] cmp $b->[0] } @$res) {
135 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
136 }
137
138 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
139 "</body></html>";
140 }
141
142 if ($ARGV[0] eq "-a") {
143 shift;
144
145 tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644
146 or die ".index.dat~: $!";
147
148 if ($ARGV[0] eq "-r") {
149 (tied %idx)->reorganize;
150 untie %idx;
151 exit 0;
152 }
153
154 my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords};
155
156 for my $path (@ARGV) {
157 next if $path =~ m%^world-precomposed/%;
158
159 my %idx2;
160
161 (my $base = $path) =~ s/\.map$//;
162 my $docnum = ++$idx{Vdocnum};
163 my $meta = eval { Deliantra::Map->new_from_file ("$base.map") }
164 or next;
165
166 $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9];
167
168 my ($x, $y, $pos);
169
170 my $add; $add = sub {
171 for my $a (@{ $_[0] }) {
172 # delete "obvious" crap elements
173 delete @$a{qw(elevation x y move_block move_slow move_allow)};
174
175 $add->(delete $a->{inventory})
176 if $a->{inventory};
177
178 $idx2{"T$a->{tag}"} ||= $pos
179 if exists $a->{tag};
180
181 for my $v (values %$a) {
182 $v = lc $v;
183 $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim
184
185 for (split /\s+/, $v) {
186 next if !FAST && exists $stop{$_};
187
188 $idx2{$_} ||= $pos;
189
190 if (/[_\-\.\/]/) {
191 $idx2{$_} ||= $pos for split /[_\-\.\/]/;
192 }
193 }
194 }
195 }
196 };
197
198 for $x (0 .. $meta->{width} - 1) {
199 my $col = $meta->{map}[$x];
200 for $y (0 .. $meta->{height} - 1) {
201 $pos = pack "nCC", $docnum, $x, $y;
202
203 $add->($col->[$y]);
204 }
205 }
206
207 if (FAST) {
208 while (my ($k, $v) = each %idx2) {
209 $idx{$k} .= $v;
210 }
211 } else {
212 while (my ($k, $v) = each %idx2) {
213 my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };;
214
215 if ($RESULTS >= @val) {
216 $idx{$k} = join "", @val;
217 } else {
218 delete $idx{$k};
219 undef $stop{$k};
220 }
221 }
222 }
223 }
224
225 $idx{Vstopwords} = join "\x00", keys %stop;
226
227 untie %idx;
228
229 } elsif ($ARGV[0] eq "-s") {
230 shift;
231
232 search @ARGV;
233 } else {
234 # assume CGI
235 # chdir "/var/www/maps.deliantra.net" or exit 69;
236
237 $ENV{QUERY_STRING} =~ s/^([kt])=//;
238
239 if ($1 eq "t") {
240 dotag $ENV{QUERY_STRING};
241 } else {
242 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING};
243 }
244 }
245