ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cfmaps/cfmapidx
(Generate patch)

Comparing cfmaps/cfmapidx (file contents):
Revision 1.9 by root, Sun Jan 6 21:12:00 2008 UTC vs.
Revision 1.14 by root, Sat Oct 31 17:26:44 2009 UTC

1#!/opt/bin/perl 1#!/opt/bin/perl
2 2
3# cfmap2idx - inverted index for deliantra maps 3# cfmapidx - inverted index for deliantra maps
4# Copyright (C) 2005,2007,2008 Marc Lehmann <cfmaps@schmorp.de> 4# Copyright (C) 2005,2007,2008,2009 Marc Lehmann <cfmaps@schmorp.de>
5# 5#
6# CFMAPIDX is free software; you can redistribute it and/or modify 6# CFMAPIDX is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by 7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or 8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version. 9# (at your option) any later version.
15# 15#
16# You should have received a copy of the GNU General Public License 16# You should have received a copy of the GNU General Public License
17# along with cfmaps; if not, write to the Free Software 17# along with cfmaps; if not, write to the Free Software
18# Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18# Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
20our $VERSION = '0.913'; 20our $VERSION = '2.0';
21
22use common::sense;
21 23
22use Deliantra; 24use Deliantra;
25use Deliantra::Map;
26
27use Fcntl;
23use DB_File; 28use GDBM_File;
29
30sub FAST () { 1 } # skip stopwords, skip duplicate-management
31
32our $RESULTS = 200;
33our %idx; # the database
24 34
25sub escape_html($) { 35sub escape_html($) {
26 local $_ = shift; 36 local $_ = shift;
27 s/([<>&])/sprintf "&#%d;", ord $1/ge; 37 s/([<>&])/sprintf "&#%d;", ord $1/ge;
28 $_ 38 $_
29} 39}
30 40
31sub search { 41sub find {
32 my (@kw) = @_; 42 my @kw = @_;
33 43
34 tie %idx, DB_File, ".index.dat", O_RDONLY, 0, $DB_HASH 44 tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0
35 or die ".index.dat: $!"; 45 or die ".index.dat: $!";
36 46
37 my $cnt; 47 my $cnt;
38 my %res; 48 my %res;
39 my @found; 49 my @found;
40 50
41 for (map lc, @kw) { 51 for (@kw) {
42 if (exists $idx{$_}) { 52 if (exists $idx{$_}) {
43 $cnt++; 53 $cnt++;
44 $res{$_}++ for unpack "n*", $idx{$_}; 54 $res{$_}++ for unpack "(A4)*", $idx{$_};
45 push @found, $_; 55 push @found, $_;
46 #warn "$_ found\n";#d# 56 #warn "$_ found\n";#d#
47 } else { 57 } else {
48 #warn "$_ not found\n";#d# 58 #warn "$_ not found\n";#d#
49 } 59 }
50 } 60 }
51 61
52 my @paths; 62 my @res;
53 63
54 while (my ($k, $v) = each %res) { 64 while (my ($k, $v) = each %res) {
55 next unless $v == $cnt; 65 next unless $v == $cnt;
56 push @paths, $idx{"D" . pack "n", $k}; 66
67 my ($docnum, $x, $y) = unpack "nCC", $k;
68 my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum};
69
70 push @res, [$path, $x, $y];
57 } 71 }
72
73 (\@res, \@found)
74}
75
76sub dotag {
77 my ($tag) = @_;
78
79 my ($res) = find "T$tag";
80
81 use Data::Dumper; warn Dumper $res;
82 print <<EOF;
83Content-Type: application/xhtml+xml
84
85EOF
86
87 print "<?xml version='1.0' encoding='utf-8'?>",
88 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
89 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
90 "<head>",
91 "<title>Deliantra Tag Search</title>",
92 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
93 "</head>",
94 "<body>";
95
96 print "<h1>Search results for tag '", (escape_html $tag), "'</h1>",
97 "<p class='searchresult'><ul>";
98
99 for (sort { $a->[0] cmp $b->[0] } @$res) {
100 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
101 }
102
103 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
104 "</body></html>";
105}
106
107sub search {
108 my (@kw) = @_;
109
110 my ($res, $found) = find map lc, @kw;
58 111
59 binmode STDOUT, ":utf8"; 112 binmode STDOUT, ":utf8";
113
114 print <<EOF;
115Content-Type: application/xhtml+xml
116
117EOF
60 118
61 print "<?xml version='1.0' encoding='utf-8'?>", 119 print "<?xml version='1.0' encoding='utf-8'?>",
62 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', 120 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
63 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>", 121 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
64 "<head>", 122 "<head>",
65 "<title>Deliantra Keyword Search</title>", 123 "<title>Deliantra Keyword Search</title>",
66 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n", 124 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
67 "</head>", 125 "</head>",
68 "<body>"; 126 "<body>";
69 127
70 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @found), "'</h1>", 128 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @$found), "'</h1>",
71 "<p class='searchcount'>", (scalar @paths), " results found, up to 200 results shown.</p>", 129 "<p class='searchcount'>", (scalar @$res), " results found, up to $RESULTS results shown.</p>",
72 "<p class='searchresult'><ul>"; 130 "<p class='searchresult'><ul>";
73 131
74 pop @paths while @paths > 200; 132 pop @$res while @$res > $RESULTS;
75 133
76 for (sort @paths) { 134 for (sort { $a->[0] cmp $b->[0] } @$res) {
77 print "<li><a href='$_.xhtml'>$_</a></li>"; 135 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
78 } 136 }
79 137
80 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>", 138 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
81 "</body></html>"; 139 "</body></html>";
82} 140}
83 141
84if ($ARGV[0] eq "-a") { 142if ($ARGV[0] eq "-a") {
85 shift; 143 shift;
86 144
87 tie %idx, DB_File, ".index.dat~", O_RDWR|O_CREAT, 0644, $DB_HASH 145 tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644
88 or die ".index.dat~: $!"; 146 or die ".index.dat~: $!";
89 147
90 my %idx2; 148 if ($ARGV[0] eq "-r") {
149 (tied %idx)->reorganize;
150 untie %idx;
151 exit 0;
152 }
153
154 my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords};
91 155
92 for my $path (@ARGV) { 156 for my $path (@ARGV) {
157 next if $path =~ m%^world-precomposed/%;
158
159 my %idx2;
160
93 (my $base = $path) =~ s/\.map$//; 161 (my $base = $path) =~ s/\.map$//;
94 my $docnum = pack "n", ++$idx{Vdocnum}; 162 my $docnum = ++$idx{Vdocnum};
95 $idx{"D$docnum"} = $base; 163 my $meta = eval { Deliantra::Map->new_from_file ("$base.map") }
96 my $meta = read_arch "$base.map"; 164 or next;
97 165
98 for my $x (0 .. $meta->{width} - 1) { 166 $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9];
99 for my $y (0 .. $meta->{height} - 1) { 167
100 for my $a (@{ $meta->{map}[$x][$y] }) { 168 my ($x, $y, $pos);
169
170 my $add; $add = sub {
171 for my $a (@{ $_[0] }) {
172 # delete "obvious" crap elements
173 delete @$a{qw(elevation x y move_block move_slow move_allow)};
174
175 $add->(delete $a->{inventory})
176 if $a->{inventory};
177
178 $idx2{"T$a->{tag}"} ||= $pos
179 if exists $a->{tag};
180
101 for my $v (values %$a) { 181 for my $v (values %$a) {
102 $v = lc $v; 182 $v = lc $v;
103 $v =~ y/a-zA-Z0-9_\-.\// /c; 183 $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim
184
104 for (split /\s+/, $v) { 185 for (split /\s+/, $v) {
186 next if !FAST && exists $stop{$_};
187
105 $idx2{$_} .= $docnum; 188 $idx2{$_} ||= $pos;
106 189
107 if (/[_\-\.\/]/) { 190 if (/[_\-\.\/]/) {
108 $idx2{$_} .= $docnum for (split /[_\-\.\/]/) 191 $idx2{$_} ||= $pos for split /[_\-\.\/]/;
109 }
110 } 192 }
111 } 193 }
112 } 194 }
113 } 195 }
196 };
197
198 for $x (0 .. $meta->{width} - 1) {
199 my $col = $meta->{map}[$x];
200 for $y (0 .. $meta->{height} - 1) {
201 $pos = pack "nCC", $docnum, $x, $y;
202
203 $add->($col->[$y]);
204 }
114 } 205 }
115 }
116 206
207 if (FAST) {
117 while (my ($k, $v) = each %idx2) { 208 while (my ($k, $v) = each %idx2) {
118 $idx{$k} = pack "n*", keys %{ { map +($_ => 1), unpack "n*", $idx{$k}.$v } }; 209 $idx{$k} .= $v;
210 }
211 } else {
212 while (my ($k, $v) = each %idx2) {
213 my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };;
214
215 if ($RESULTS >= @val) {
216 $idx{$k} = join "", @val;
217 } else {
218 delete $idx{$k};
219 undef $stop{$k};
220 }
221 }
222 }
119 } 223 }
224
225 $idx{Vstopwords} = join "\x00", keys %stop;
226
227 untie %idx;
120 228
121} elsif ($ARGV[0] eq "-s") { 229} elsif ($ARGV[0] eq "-s") {
122 shift; 230 shift;
123 231
124 search @ARGV; 232 search @ARGV;
125} else { 233} else {
126 # assume CGI 234 # assume CGI
127 chdir "/var/www/maps.deliantra.net" or exit 69; 235# chdir "/var/www/maps.deliantra.net" or exit 69;
128 236
129 print <<EOF;
130Content-Type: application/xhtml+xml
131
132EOF
133 $ENV{QUERY_STRING} =~ s/^k=//; 237 $ENV{QUERY_STRING} =~ s/^([kt])=//;
238
239 if ($1 eq "t") {
240 dotag $ENV{QUERY_STRING};
241 } else {
134 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING}; 242 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING};
243 }
135} 244}
136 245

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines