#!/opt/bin/perl # cfmapidx - inverted index for deliantra maps # Copyright (C) 2005,2007,2008,2009 Marc Lehmann # # CFMAPIDX is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with cfmaps; if not, write to the Free Software # Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA our $VERSION = '2.0'; use common::sense; use Deliantra; use Deliantra::Map; use Fcntl; use GDBM_File; sub FAST () { 1 } # skip stopwords, skip duplicate-management our $RESULTS = 200; our %idx; # the database sub escape_html($) { local $_ = shift; s/([<>&])/sprintf "&#%d;", ord $1/ge; $_ } sub find { my @kw = @_; tie %idx, "GDBM_File", ".index.dat", O_RDONLY, 0 or die ".index.dat: $!"; my $cnt; my %res; my @found; for (@kw) { if (exists $idx{$_}) { $cnt++; $res{$_}++ for unpack "(A4)*", $idx{$_}; push @found, $_; #warn "$_ found\n";#d# } else { #warn "$_ not found\n";#d# } } my @res; while (my ($k, $v) = each %res) { next unless $v == $cnt; my ($docnum, $x, $y) = unpack "nCC", $k; my ($path, $mtime) = split /\x00/, $idx{"D" . pack "n", $docnum}; push @res, [$path, $x, $y]; } (\@res, \@found) } sub dotag { my ($tag) = @_; my ($res) = find "T$tag"; use Data::Dumper; warn Dumper $res; print <", '', "", "", "Deliantra Tag Search", "\n", "", ""; print "

Search results for tag '", (escape_html $tag), "'

", "

", ""; } sub search { my (@kw) = @_; my ($res, $found) = find map lc, @kw; binmode STDOUT, ":utf8"; print <", '', "", "", "Deliantra Keyword Search", "\n", "", ""; print "

Search results for '", (join "' & '", map escape_html $_, @$found), "'

", "

", (scalar @$res), " results found, up to $RESULTS results shown.

", "

", ""; } if ($ARGV[0] eq "-a") { shift; tie %idx, "GDBM_File", ".index.dat~", O_RDWR | O_CREAT, 0644 or die ".index.dat~: $!"; if ($ARGV[0] eq "-r") { (tied %idx)->reorganize; untie %idx; exit 0; } my %stop = map +($_ => undef), split /\x00/, $idx{Vstopwords}; for my $path (@ARGV) { next if $path =~ m%^world-precomposed/%; my %idx2; (my $base = $path) =~ s/\.map$//; my $docnum = ++$idx{Vdocnum}; my $meta = eval { Deliantra::Map->new_from_file ("$base.map") } or next; $idx{"D" . pack "n", $docnum} = join "\x00", $base, (stat "$base.map")[9]; my ($x, $y, $pos); my $add; $add = sub { for my $a (@{ $_[0] }) { # delete "obvious" crap elements delete @$a{qw(elevation x y move_block move_slow move_allow)}; $add->(delete $a->{inventory}) if $a->{inventory}; $idx2{"T$a->{tag}"} ||= $pos if exists $a->{tag}; for my $v (values %$a) { $v = lc $v; $v =~ y/a-zA-Z0-9_\-.\// /c; #/ vim for (split /\s+/, $v) { next if !FAST && exists $stop{$_}; $idx2{$_} ||= $pos; if (/[_\-\.\/]/) { $idx2{$_} ||= $pos for split /[_\-\.\/]/; } } } } }; for $x (0 .. $meta->{width} - 1) { my $col = $meta->{map}[$x]; for $y (0 .. $meta->{height} - 1) { $pos = pack "nCC", $docnum, $x, $y; $add->($col->[$y]); } } if (FAST) { while (my ($k, $v) = each %idx2) { $idx{$k} .= $v; } } else { while (my ($k, $v) = each %idx2) { my @val = keys %{ { map +($_ => undef), unpack "(A4)*", $idx{$k}.$v } };; if ($RESULTS >= @val) { $idx{$k} = join "", @val; } else { delete $idx{$k}; undef $stop{$k}; } } } } $idx{Vstopwords} = join "\x00", keys %stop; untie %idx; } elsif ($ARGV[0] eq "-s") { shift; search @ARGV; } else { # assume CGI # chdir "/var/www/maps.deliantra.net" or exit 69; $ENV{QUERY_STRING} =~ s/^([kt])=//; if ($1 eq "t") { dotag $ENV{QUERY_STRING}; } else { search split /\s+|\++|(?:%20)+/, $ENV{QUERY_STRING}; } }