ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cfmaps/cfmapidx
Revision: 1.14
Committed: Sat Oct 31 17:26:44 2009 UTC (14 years, 6 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Changes since 1.13: +142 -40 lines
Log Message:
- support map tags in cfmap2html and cfmapidx
- reorganise index database, reduces db size considerably
- support stop words and "stop attributes"
- look into object inventories for cfmapidx
- speed up cfmapidx
- skip world-precomposed in cfmapidx

File Contents

# User Rev Content
1 root 1.1 #!/opt/bin/perl
2    
3 root 1.14 # cfmapidx - inverted index for deliantra maps
4     # Copyright (C) 2005,2007,2008,2009 Marc Lehmann <cfmaps@schmorp.de>
5 root 1.1 #
6     # CFMAPIDX is free software; you can redistribute it and/or modify
7     # it under the terms of the GNU General Public License as published by
8     # the Free Software Foundation; either version 2 of the License, or
9     # (at your option) any later version.
10     #
11     # This program is distributed in the hope that it will be useful,
12     # but WITHOUT ANY WARRANTY; without even the implied warranty of
13     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     # GNU General Public License for more details.
15     #
16     # You should have received a copy of the GNU General Public License
17 root 1.6 # along with cfmaps; if not, write to the Free Software
18 root 1.1 # Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19    
20 root 1.14 our $VERSION = '2.0';
21    
22     use common::sense;
23 root 1.1
24 root 1.9 use Deliantra;
25 elmex 1.10 use Deliantra::Map;
26 root 1.12
27     use Fcntl;
28 root 1.13 use GDBM_File;
29 root 1.1
30 root 1.14 sub FAST () { 1 } # skip stopwords, skip duplicate-management
31    
32     our $RESULTS = 200;
33     our %idx; # the database
34    
35 root 1.1 sub escape_html($) {
36     local $_ = shift;
37     s/([<>&])/sprintf "&#%d;", ord $1/ge;
38     $_
39     }
40    
41 root 1.14 sub find {
42     my @kw = @_;
43 root 1.3
44 root 1.14 tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0
45 root 1.3 or die ".index.dat: $!";
46    
47     my $cnt;
48     my %res;
49     my @found;
50    
51 root 1.14 for (@kw) {
52 root 1.3 if (exists $idx{$_}) {
53     $cnt++;
54 root 1.14 $res{$_}++ for unpack "(A4)*", $idx{$_};
55 root 1.3 push @found, $_;
56     #warn "$_ found\n";#d#
57     } else {
58     #warn "$_ not found\n";#d#
59     }
60     }
61    
62 root 1.14 my @res;
63 root 1.3
64     while (my ($k, $v) = each %res) {
65     next unless $v == $cnt;
66 root 1.14
67     my ($docnum, $x, $y) = unpack "nCC", $k;
68     my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum};
69    
70     push @res, [$path, $x, $y];
71     }
72    
73     (\@res, \@found)
74     }
75    
76     sub dotag {
77     my ($tag) = @_;
78    
79     my ($res) = find "T$tag";
80    
81     use Data::Dumper; warn Dumper $res;
82     print <<EOF;
83     Content-Type: application/xhtml+xml
84    
85     EOF
86    
87     print "<?xml version='1.0' encoding='utf-8'?>",
88     '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
89     "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
90     "<head>",
91     "<title>Deliantra Tag Search</title>",
92     "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
93     "</head>",
94     "<body>";
95    
96     print "<h1>Search results for tag '", (escape_html $tag), "'</h1>",
97     "<p class='searchresult'><ul>";
98    
99     for (sort { $a->[0] cmp $b->[0] } @$res) {
100     print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
101 root 1.3 }
102    
103 root 1.14 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
104     "</body></html>";
105     }
106    
107     sub search {
108     my (@kw) = @_;
109    
110     my ($res, $found) = find map lc, @kw;
111    
112 root 1.3 binmode STDOUT, ":utf8";
113    
114 root 1.14 print <<EOF;
115     Content-Type: application/xhtml+xml
116    
117     EOF
118    
119 root 1.3 print "<?xml version='1.0' encoding='utf-8'?>",
120     '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
121     "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>",
122     "<head>",
123 root 1.9 "<title>Deliantra Keyword Search</title>",
124 root 1.3 "<link rel='stylesheet' type='text/css' media='all' href='/common.css'/>\n",
125     "</head>",
126     "<body>";
127    
128 root 1.14 print "<h1>Search results for '", (join "' &amp; '", map escape_html $_, @$found), "'</h1>",
129     "<p class='searchcount'>", (scalar @$res), " results found, up to $RESULTS results shown.</p>",
130 root 1.3 "<p class='searchresult'><ul>";
131    
132 root 1.14 pop @$res while @$res > $RESULTS;
133 root 1.3
134 root 1.14 for (sort { $a->[0] cmp $b->[0] } @$res) {
135     print "<li><a href='$_->[0].xhtml'>$_->[0] ($_->[1]|$_->[2])</a></li>";
136 root 1.3 }
137    
138 root 1.5 print "</ul></p><p class='footer'>created by <a href='http://software.schmorp.de/pkg/cfmaps'>cfmapidx</a> version $VERSION</p>",
139 root 1.3 "</body></html>";
140     }
141    
142 root 1.1 if ($ARGV[0] eq "-a") {
143     shift;
144    
145 root 1.14 tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644
146 root 1.1 or die ".index.dat~: $!";
147    
148 root 1.14 if ($ARGV[0] eq "-r") {
149     (tied %idx)->reorganize;
150     untie %idx;
151     exit 0;
152     }
153    
154     my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords};
155 root 1.1
156     for my $path (@ARGV) {
157 root 1.14 next if $path =~ m%^world-precomposed/%;
158    
159     my %idx2;
160    
161 root 1.7 (my $base = $path) =~ s/\.map$//;
162 root 1.14 my $docnum = ++$idx{Vdocnum};
163 root 1.13 my $meta = eval { Deliantra::Map->new_from_file ("$base.map") }
164     or next;
165 root 1.1
166 root 1.14 $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9];
167    
168     my ($x, $y, $pos);
169    
170     my $add; $add = sub {
171     for my $a (@{ $_[0] }) {
172     # delete "obvious" crap elements
173     delete @$a{qw(elevation x y move_block move_slow move_allow)};
174    
175     $add->(delete $a->{inventory})
176     if $a->{inventory};
177    
178     $idx2{"T$a->{tag}"} ||= $pos
179     if exists $a->{tag};
180    
181     for my $v (values %$a) {
182     $v = lc $v;
183     $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim
184    
185     for (split /\s+/, $v) {
186     next if !FAST && exists $stop{$_};
187    
188     $idx2{$_} ||= $pos;
189    
190     if (/[_\-\.\/]/) {
191     $idx2{$_} ||= $pos for split /[_\-\.\/]/;
192 root 1.1 }
193     }
194     }
195     }
196 root 1.14 };
197    
198     for $x (0 .. $meta->{width} - 1) {
199     my $col = $meta->{map}[$x];
200     for $y (0 .. $meta->{height} - 1) {
201     $pos = pack "nCC", $docnum, $x, $y;
202    
203     $add->($col->[$y]);
204     }
205     }
206    
207     if (FAST) {
208     while (my ($k, $v) = each %idx2) {
209     $idx{$k} .= $v;
210     }
211     } else {
212     while (my ($k, $v) = each %idx2) {
213     my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };;
214    
215     if ($RESULTS >= @val) {
216     $idx{$k} = join "", @val;
217     } else {
218     delete $idx{$k};
219     undef $stop{$k};
220     }
221     }
222 root 1.1 }
223     }
224    
225 root 1.14 $idx{Vstopwords} = join "\x00", keys %stop;
226 root 1.1
227 root 1.12 untie %idx;
228    
229 root 1.1 } elsif ($ARGV[0] eq "-s") {
230     shift;
231    
232 root 1.3 search @ARGV;
233     } else {
234     # assume CGI
235 root 1.12 # chdir "/var/www/maps.deliantra.net" or exit 69;
236 root 1.3
237 root 1.14 $ENV{QUERY_STRING} =~ s/^([kt])=//;
238 root 1.3
239 root 1.14 if ($1 eq "t") {
240     dotag $ENV{QUERY_STRING};
241     } else {
242     search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING};
243     }
244 root 1.1 }
245