1 | #!/opt/bin/perl |
1 | #!/opt/bin/perl |
2 | |
2 | |
3 | # cfmap2idx - inverted index for deliantra maps |
3 | # cfmapidx - inverted index for deliantra maps |
4 | # Copyright (C) 2005,2007,2008 Marc Lehmann <cfmaps@schmorp.de> |
4 | # Copyright (C) 2005,2007,2008,2009 Marc Lehmann <cfmaps@schmorp.de> |
5 | # |
5 | # |
6 | # CFMAPIDX is free software; you can redistribute it and/or modify |
6 | # CFMAPIDX is free software; you can redistribute it and/or modify |
7 | # it under the terms of the GNU General Public License as published by |
7 | # it under the terms of the GNU General Public License as published by |
8 | # the Free Software Foundation; either version 2 of the License, or |
8 | # the Free Software Foundation; either version 2 of the License, or |
9 | # (at your option) any later version. |
9 | # (at your option) any later version. |
… | |
… | |
15 | # |
15 | # |
16 | # You should have received a copy of the GNU General Public License |
16 | # You should have received a copy of the GNU General Public License |
17 | # along with cfmaps; if not, write to the Free Software |
17 | # along with cfmaps; if not, write to the Free Software |
18 | # Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | # Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | |
19 | |
20 | our $VERSION = '0.913'; |
20 | our $VERSION = '2.0'; |
|
|
21 | |
|
|
22 | use common::sense; |
21 | |
23 | |
22 | use Deliantra; |
24 | use Deliantra; |
23 | use Deliantra::Map; |
25 | use Deliantra::Map; |
|
|
26 | |
|
|
27 | use Fcntl; |
24 | use DB_File; |
28 | use GDBM_File; |
|
|
29 | |
|
|
30 | sub FAST () { 1 } # skip stopwords, skip duplicate-management |
|
|
31 | |
|
|
32 | our $RESULTS = 200; |
|
|
33 | our %idx; # the database |
25 | |
34 | |
26 | sub escape_html($) { |
35 | sub escape_html($) { |
27 | local $_ = shift; |
36 | local $_ = shift; |
28 | s/([<>&])/sprintf "&#%d;", ord $1/ge; |
37 | s/([<>&])/sprintf "&#%d;", ord $1/ge; |
29 | $_ |
38 | $_ |
30 | } |
39 | } |
31 | |
40 | |
32 | sub search { |
41 | sub find { |
33 | my (@kw) = @_; |
42 | my @kw = @_; |
34 | |
43 | |
35 | tie %idx, DB_File, ".index.dat", O_RDONLY, 0, $DB_HASH |
44 | tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0 |
36 | or die ".index.dat: $!"; |
45 | or die ".index.dat: $!"; |
37 | |
46 | |
38 | my $cnt; |
47 | my $cnt; |
39 | my %res; |
48 | my %res; |
40 | my @found; |
49 | my @found; |
41 | |
50 | |
42 | for (map lc, @kw) { |
51 | for (@kw) { |
43 | if (exists $idx{$_}) { |
52 | if (exists $idx{$_}) { |
44 | $cnt++; |
53 | $cnt++; |
45 | $res{$_}++ for unpack "n*", $idx{$_}; |
54 | $res{$_}++ for unpack "(A4)*", $idx{$_}; |
46 | push @found, $_; |
55 | push @found, $_; |
47 | #warn "$_ found\n";#d# |
56 | #warn "$_ found\n";#d# |
48 | } else { |
57 | } else { |
49 | #warn "$_ not found\n";#d# |
58 | #warn "$_ not found\n";#d# |
50 | } |
59 | } |
51 | } |
60 | } |
52 | |
61 | |
53 | my @paths; |
62 | my @res; |
54 | |
63 | |
55 | while (my ($k, $v) = each %res) { |
64 | while (my ($k, $v) = each %res) { |
56 | next unless $v == $cnt; |
65 | next unless $v == $cnt; |
57 | push @paths, $idx{"D" . pack "n", $k}; |
66 | |
|
|
67 | my ($docnum, $x, $y) = unpack "nCC", $k; |
|
|
68 | my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum}; |
|
|
69 | |
|
|
70 | push @res, [$path, $x, $y]; |
58 | } |
71 | } |
|
|
72 | |
|
|
73 | (\@res, \@found) |
|
|
74 | } |
|
|
75 | |
|
|
76 | sub dotag { |
|
|
77 | my ($tag) = @_; |
|
|
78 | |
|
|
79 | my ($res) = find "T$tag"; |
|
|
80 | |
|
|
81 | use Data::Dumper; warn Dumper $res; |
|
|
82 | print <<EOF; |
|
|
83 | Content-Type: application/xhtml+xml |
|
|
84 | |
|
|
85 | EOF |
|
|
86 | |
|
|
87 | print "<?xml version='1.0' encoding='utf-8'?>", |
|
|
88 | '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', |
|
|
89 | "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>", |
|
|
90 | "<head>", |
|
|
91 | "<title>Deliantra Tag Search</title>", |
|
|
92 | "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n", |
|
|
93 | "</head>", |
|
|
94 | "<body>"; |
|
|
95 | |
|
|
96 | print "<h1>Search results for tag '", (escape_html $tag), "'</h1>", |
|
|
97 | "<p class='searchresult'><ul>"; |
|
|
98 | |
|
|
99 | for (sort { $a->[0] cmp $b->[0] } @$res) { |
|
|
100 | print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>"; |
|
|
101 | } |
|
|
102 | |
|
|
103 | print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>", |
|
|
104 | "</body></html>"; |
|
|
105 | } |
|
|
106 | |
|
|
107 | sub search { |
|
|
108 | my (@kw) = @_; |
|
|
109 | |
|
|
110 | my ($res, $found) = find map lc, @kw; |
59 | |
111 | |
60 | binmode STDOUT, ":utf8"; |
112 | binmode STDOUT, ":utf8"; |
|
|
113 | |
|
|
114 | print <<EOF; |
|
|
115 | Content-Type: application/xhtml+xml |
|
|
116 | |
|
|
117 | EOF |
61 | |
118 | |
62 | print "<?xml version='1.0' encoding='utf-8'?>", |
119 | print "<?xml version='1.0' encoding='utf-8'?>", |
63 | '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', |
120 | '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', |
64 | "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>", |
121 | "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>", |
65 | "<head>", |
122 | "<head>", |
66 | "<title>Deliantra Keyword Search</title>", |
123 | "<title>Deliantra Keyword Search</title>", |
67 | "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n", |
124 | "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n", |
68 | "</head>", |
125 | "</head>", |
69 | "<body>"; |
126 | "<body>"; |
70 | |
127 | |
71 | print "<h1>Search results for '", (join "' & '", map escape_html $_, @found), "'</h1>", |
128 | print "<h1>Search results for '", (join "' & '", map escape_html $_, @$found), "'</h1>", |
72 | "<p class='searchcount'>", (scalar @paths), " results found, up to 200 results shown.</p>", |
129 | "<p class='searchcount'>", (scalar @$res), " results found, up to $RESULTS results shown.</p>", |
73 | "<p class='searchresult'><ul>"; |
130 | "<p class='searchresult'><ul>"; |
74 | |
131 | |
75 | pop @paths while @paths > 200; |
132 | pop @$res while @$res > $RESULTS; |
76 | |
133 | |
77 | for (sort @paths) { |
134 | for (sort { $a->[0] cmp $b->[0] } @$res) { |
78 | print "<li><a href='$_.xhtml'>$_</a></li>"; |
135 | print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>"; |
79 | } |
136 | } |
80 | |
137 | |
81 | print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>", |
138 | print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>", |
82 | "</body></html>"; |
139 | "</body></html>"; |
83 | } |
140 | } |
84 | |
141 | |
85 | if ($ARGV[0] eq "-a") { |
142 | if ($ARGV[0] eq "-a") { |
86 | shift; |
143 | shift; |
87 | |
144 | |
88 | tie %idx, DB_File, ".index.dat~", O_RDWR|O_CREAT, 0644, $DB_HASH |
145 | tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644 |
89 | or die ".index.dat~: $!"; |
146 | or die ".index.dat~: $!"; |
90 | |
147 | |
91 | my %idx2; |
148 | if ($ARGV[0] eq "-r") { |
|
|
149 | (tied %idx)->reorganize; |
|
|
150 | untie %idx; |
|
|
151 | exit 0; |
|
|
152 | } |
|
|
153 | |
|
|
154 | my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords}; |
92 | |
155 | |
93 | for my $path (@ARGV) { |
156 | for my $path (@ARGV) { |
|
|
157 | next if $path =~ m%^world-precomposed/%; |
|
|
158 | |
|
|
159 | my %idx2; |
|
|
160 | |
94 | (my $base = $path) =~ s/\.map$//; |
161 | (my $base = $path) =~ s/\.map$//; |
95 | my $docnum = pack "n", ++$idx{Vdocnum}; |
162 | my $docnum = ++$idx{Vdocnum}; |
96 | $idx{"D$docnum"} = $base; |
|
|
97 | my $meta = Deliantra::Map->new_from_file ("$base.map"); |
163 | my $meta = eval { Deliantra::Map->new_from_file ("$base.map") } |
|
|
164 | or next; |
98 | |
165 | |
99 | for my $x (0 .. $meta->{width} - 1) { |
166 | $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9]; |
100 | for my $y (0 .. $meta->{height} - 1) { |
167 | |
101 | for my $a (@{ $meta->{map}[$x][$y] }) { |
168 | my ($x, $y, $pos); |
|
|
169 | |
|
|
170 | my $add; $add = sub { |
|
|
171 | for my $a (@{ $_[0] }) { |
|
|
172 | # delete "obvious" crap elements |
|
|
173 | delete @$a{qw(elevation x y move_block move_slow move_allow)}; |
|
|
174 | |
|
|
175 | $add->(delete $a->{inventory}) |
|
|
176 | if $a->{inventory}; |
|
|
177 | |
|
|
178 | $idx2{"T$a->{tag}"} ||= $pos |
|
|
179 | if exists $a->{tag}; |
|
|
180 | |
102 | for my $v (values %$a) { |
181 | for my $v (values %$a) { |
103 | $v = lc $v; |
182 | $v = lc $v; |
104 | $v =~ y/a-zA-Z0-9_\-.\// /c; |
183 | $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim |
|
|
184 | |
105 | for (split /\s+/, $v) { |
185 | for (split /\s+/, $v) { |
|
|
186 | next if !FAST && exists $stop{$_}; |
|
|
187 | |
106 | $idx2{$_} .= $docnum; |
188 | $idx2{$_} ||= $pos; |
107 | |
189 | |
108 | if (/[_\-\.\/]/) { |
190 | if (/[_\-\.\/]/) { |
109 | $idx2{$_} .= $docnum for (split /[_\-\.\/]/) |
191 | $idx2{$_} ||= $pos for split /[_\-\.\/]/; |
110 | } |
|
|
111 | } |
192 | } |
112 | } |
193 | } |
113 | } |
194 | } |
114 | } |
195 | } |
|
|
196 | }; |
|
|
197 | |
|
|
198 | for $x (0 .. $meta->{width} - 1) { |
|
|
199 | my $col = $meta->{map}[$x]; |
|
|
200 | for $y (0 .. $meta->{height} - 1) { |
|
|
201 | $pos = pack "nCC", $docnum, $x, $y; |
|
|
202 | |
|
|
203 | $add->($col->[$y]); |
|
|
204 | } |
115 | } |
205 | } |
116 | } |
|
|
117 | |
206 | |
|
|
207 | if (FAST) { |
118 | while (my ($k, $v) = each %idx2) { |
208 | while (my ($k, $v) = each %idx2) { |
119 | $idx{$k} = pack "n*", keys %{ { map +($_ => 1), unpack "n*", $idx{$k}.$v } }; |
209 | $idx{$k} .= $v; |
|
|
210 | } |
|
|
211 | } else { |
|
|
212 | while (my ($k, $v) = each %idx2) { |
|
|
213 | my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };; |
|
|
214 | |
|
|
215 | if ($RESULTS >= @val) { |
|
|
216 | $idx{$k} = join "", @val; |
|
|
217 | } else { |
|
|
218 | delete $idx{$k}; |
|
|
219 | undef $stop{$k}; |
|
|
220 | } |
|
|
221 | } |
|
|
222 | } |
120 | } |
223 | } |
|
|
224 | |
|
|
225 | $idx{Vstopwords} = join "\x00", keys %stop; |
|
|
226 | |
|
|
227 | untie %idx; |
121 | |
228 | |
122 | } elsif ($ARGV[0] eq "-s") { |
229 | } elsif ($ARGV[0] eq "-s") { |
123 | shift; |
230 | shift; |
124 | |
231 | |
125 | search @ARGV; |
232 | search @ARGV; |
126 | } else { |
233 | } else { |
127 | # assume CGI |
234 | # assume CGI |
128 | chdir "/var/www/maps.deliantra.net" or exit 69; |
235 | # chdir "/var/www/maps.deliantra.net" or exit 69; |
129 | |
236 | |
130 | print <<EOF; |
|
|
131 | Content-Type: application/xhtml+xml |
|
|
132 | |
|
|
133 | EOF |
|
|
134 | $ENV{QUERY_STRING} =~ s/^k=//; |
237 | $ENV{QUERY_STRING} =~ s/^([kt])=//; |
|
|
238 | |
|
|
239 | if ($1 eq "t") { |
|
|
240 | dotag $ENV{QUERY_STRING}; |
|
|
241 | } else { |
135 | search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING}; |
242 | search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING}; |
|
|
243 | } |
136 | } |
244 | } |
137 | |
245 | |