ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cfmaps/cfmapidx
(Generate patch)

Comparing cfmaps/cfmapidx (file contents):
Revision 1.12 by root, Thu Oct 22 08:19:16 2009 UTC vs.
Revision 1.14 by root, Sat Oct 31 17:26:44 2009 UTC

1#!/opt/bin/perl 1#!/opt/bin/perl
2 2
3# cfmap2idx - inverted index for deliantra maps 3# cfmapidx - inverted index for deliantra maps
4# Copyright (C) 2005,2007,2008 Marc Lehmann <cfmaps@schmorp.de> 4# Copyright (C) 2005,2007,2008,2009 Marc Lehmann <cfmaps@schmorp.de>
5# 5#
6# CFMAPIDX is free software; you can redistribute it and/or modify 6# CFMAPIDX is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by 7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or 8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version. 9# (at your option) any later version.
15# 15#
16# You should have received a copy of the GNU General Public License 16# You should have received a copy of the GNU General Public License
17# along with cfmaps; if not, write to the Free Software 17# along with cfmaps; if not, write to the Free Software
18# Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18# Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
20our $VERSION = '0.923'; 20our $VERSION = '2.0';
21
22use common::sense;
21 23
22use Deliantra; 24use Deliantra;
23use Deliantra::Map; 25use Deliantra::Map;
24 26
25use Fcntl; 27use Fcntl;
26use SDBM_File; 28use GDBM_File;
29
30sub FAST () { 1 } # skip stopwords, skip duplicate-management
31
32our $RESULTS = 200;
33our %idx; # the database
27 34
28sub escape_html($) { 35sub escape_html($) {
29 local $_ = shift; 36 local $_ = shift;
30 s/([<>&])/sprintf "&#%d;", ord $1/ge; 37 s/([<>&])/sprintf "&#%d;", ord $1/ge;
31 $_ 38 $_
32} 39}
33 40
34sub search { 41sub find {
35 my (@kw) = @_; 42 my @kw = @_;
36 43
37 tie %idx, SDBM_File, ".index.dat", O_RDONLY, 0 44 tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0
38 or die ".index.dat: $!"; 45 or die ".index.dat: $!";
39 46
40 my $cnt; 47 my $cnt;
41 my %res; 48 my %res;
42 my @found; 49 my @found;
43 50
44 for (map lc, @kw) { 51 for (@kw) {
45 if (exists $idx{$_}) { 52 if (exists $idx{$_}) {
46 $cnt++; 53 $cnt++;
47 $res{$_}++ for unpack "n*", $idx{$_}; 54 $res{$_}++ for unpack "(A4)*", $idx{$_};
48 push @found, $_; 55 push @found, $_;
49 #warn "$_ found\n";#d# 56 #warn "$_ found\n";#d#
50 } else { 57 } else {
51 #warn "$_ not found\n";#d# 58 #warn "$_ not found\n";#d#
52 } 59 }
53 } 60 }
54 61
55 my @paths; 62 my @res;
56 63
57 while (my ($k, $v) = each %res) { 64 while (my ($k, $v) = each %res) {
58 next unless $v == $cnt; 65 next unless $v == $cnt;
59 push @paths, $idx{"D" . pack "n", $k}; 66
67 my ($docnum, $x, $y) = unpack "nCC", $k;
68 my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum};
69
70 push @res, [$path, $x, $y];
60 } 71 }
72
73 (\@res, \@found)
74}
75
76sub dotag {
77 my ($tag) = @_;
78
79 my ($res) = find "T$tag";
80
81 use Data::Dumper; warn Dumper $res;
82 print <<EOF;
83Content-Type: application/xhtml+xml
84
85EOF
86
87 print "<?xml version='1.0' encoding='utf-8'?>",
88 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
89 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
90 "<head>",
91 "<title>Deliantra Tag Search</title>",
92 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
93 "</head>",
94 "<body>";
95
96 print "<h1>Search results for tag '", (escape_html $tag), "'</h1>",
97 "<p class='searchresult'><ul>";
98
99 for (sort { $a->[0] cmp $b->[0] } @$res) {
100 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
101 }
102
103 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
104 "</body></html>";
105}
106
107sub search {
108 my (@kw) = @_;
109
110 my ($res, $found) = find map lc, @kw;
61 111
62 binmode STDOUT, ":utf8"; 112 binmode STDOUT, ":utf8";
113
114 print <<EOF;
115Content-Type: application/xhtml+xml
116
117EOF
63 118
64 print "<?xml version='1.0' encoding='utf-8'?>", 119 print "<?xml version='1.0' encoding='utf-8'?>",
65 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', 120 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
66 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>", 121 "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
67 "<head>", 122 "<head>",
68 "<title>Deliantra Keyword Search</title>", 123 "<title>Deliantra Keyword Search</title>",
69 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n", 124 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
70 "</head>", 125 "</head>",
71 "<body>"; 126 "<body>";
72 127
73 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @found), "'</h1>", 128 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @$found), "'</h1>",
74 "<p class='searchcount'>", (scalar @paths), " results found, up to 200 results shown.</p>", 129 "<p class='searchcount'>", (scalar @$res), " results found, up to $RESULTS results shown.</p>",
75 "<p class='searchresult'><ul>"; 130 "<p class='searchresult'><ul>";
76 131
77 pop @paths while @paths > 200; 132 pop @$res while @$res > $RESULTS;
78 133
79 for (sort @paths) { 134 for (sort { $a->[0] cmp $b->[0] } @$res) {
80 print "<li><a href='$_.xhtml'>$_</a></li>"; 135 print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
81 } 136 }
82 137
83 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>", 138 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
84 "</body></html>"; 139 "</body></html>";
85} 140}
86 141
87if ($ARGV[0] eq "-a") { 142if ($ARGV[0] eq "-a") {
88 shift; 143 shift;
89 144
90 tie %idx, SDBM_File, ".index.dat~", O_RDWR|O_CREAT, 0644 145 tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644
91 or die ".index.dat~: $!"; 146 or die ".index.dat~: $!";
92 147
93 my %idx2; 148 if ($ARGV[0] eq "-r") {
149 (tied %idx)->reorganize;
150 untie %idx;
151 exit 0;
152 }
153
154 my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords};
94 155
95 for my $path (@ARGV) { 156 for my $path (@ARGV) {
157 next if $path =~ m%^world-precomposed/%;
158
159 my %idx2;
160
96 (my $base = $path) =~ s/\.map$//; 161 (my $base = $path) =~ s/\.map$//;
97 my $docnum = pack "n", ++$idx{Vdocnum}; 162 my $docnum = ++$idx{Vdocnum};
98 $idx{"D$docnum"} = $base;
99 my $meta = Deliantra::Map->new_from_file ("$base.map"); 163 my $meta = eval { Deliantra::Map->new_from_file ("$base.map") }
164 or next;
100 165
101 for my $x (0 .. $meta->{width} - 1) { 166 $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9];
102 for my $y (0 .. $meta->{height} - 1) { 167
103 for my $a (@{ $meta->{map}[$x][$y] }) { 168 my ($x, $y, $pos);
169
170 my $add; $add = sub {
171 for my $a (@{ $_[0] }) {
172 # delete "obvious" crap elements
173 delete @$a{qw(elevation x y move_block move_slow move_allow)};
174
175 $add->(delete $a->{inventory})
176 if $a->{inventory};
177
178 $idx2{"T$a->{tag}"} ||= $pos
179 if exists $a->{tag};
180
104 for my $v (values %$a) { 181 for my $v (values %$a) {
105 $v = lc $v; 182 $v = lc $v;
106 $v =~ y/a-zA-Z0-9_\-.\// /c; 183 $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim
184
107 for (split /\s+/, $v) { 185 for (split /\s+/, $v) {
186 next if !FAST && exists $stop{$_};
187
108 $idx2{$_} .= $docnum; 188 $idx2{$_} ||= $pos;
109 189
110 if (/[_\-\.\/]/) { 190 if (/[_\-\.\/]/) {
111 $idx2{$_} .= $docnum for (split /[_\-\.\/]/) 191 $idx2{$_} ||= $pos for split /[_\-\.\/]/;
112 }
113 } 192 }
114 } 193 }
115 } 194 }
116 } 195 }
196 };
197
198 for $x (0 .. $meta->{width} - 1) {
199 my $col = $meta->{map}[$x];
200 for $y (0 .. $meta->{height} - 1) {
201 $pos = pack "nCC", $docnum, $x, $y;
202
203 $add->($col->[$y]);
204 }
117 } 205 }
118 }
119 206
207 if (FAST) {
120 while (my ($k, $v) = each %idx2) { 208 while (my ($k, $v) = each %idx2) {
121 $idx{$k} = pack "n*", keys %{ { map +($_ => 1), unpack "n*", $idx{$k}.$v } }; 209 $idx{$k} .= $v;
210 }
211 } else {
212 while (my ($k, $v) = each %idx2) {
213 my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };;
214
215 if ($RESULTS >= @val) {
216 $idx{$k} = join "", @val;
217 } else {
218 delete $idx{$k};
219 undef $stop{$k};
220 }
221 }
222 }
122 } 223 }
224
225 $idx{Vstopwords} = join "\x00", keys %stop;
123 226
124 untie %idx; 227 untie %idx;
125 228
126} elsif ($ARGV[0] eq "-s") { 229} elsif ($ARGV[0] eq "-s") {
127 shift; 230 shift;
129 search @ARGV; 232 search @ARGV;
130} else { 233} else {
131 # assume CGI 234 # assume CGI
132# chdir "/var/www/maps.deliantra.net" or exit 69; 235# chdir "/var/www/maps.deliantra.net" or exit 69;
133 236
134 print <<EOF;
135Content-Type: application/xhtml+xml
136
137EOF
138 $ENV{QUERY_STRING} =~ s/^k=//; 237 $ENV{QUERY_STRING} =~ s/^([kt])=//;
238
239 if ($1 eq "t") {
240 dotag $ENV{QUERY_STRING};
241 } else {
139 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING}; 242 search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING};
243 }
140} 244}
141 245

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines