--- deliantra/server/lib/cf.pm 2007/01/24 22:42:49 1.194 +++ deliantra/server/lib/cf.pm 2007/03/02 13:32:52 1.221 @@ -17,12 +17,16 @@ use Coro::Semaphore; use Coro::AIO; +use BDB (); use Data::Dumper; use Digest::MD5; use Fcntl; use IO::AIO 2.32 (); use YAML::Syck (); use Time::HiRes; +use Compress::LZF; + +Compress::LZF::sfreeze_cr { }; # prime Compress::LZF so it does not use require later use Event; $Event::Eval = 1; # no idea why this is required, but it is @@ -47,9 +51,13 @@ our $TICK = MAX_TIME * 1e-6; our $TICK_WATCHER; our $AIO_POLL_WATCHER; -our $WRITE_RUNTIME_WATCHER; +our $NEXT_RUNTIME_WRITE; # when should the runtime file be written our $NEXT_TICK; our $NOW; +our $USE_FSYNC = 1; # use fsync to write maps - default off + +our $BDB_POLL_WATCHER; +our $DB_ENV; our %CFG; @@ -60,6 +68,7 @@ our %MAP; # all maps our $LINK_MAP; # the special {link} map, which is always available our $RANDOM_MAPS = cf::localdir . "/random"; +our $BDB_ENV_DIR = cf::localdir . "/db"; our $WAIT_FOR_TICK; $WAIT_FOR_TICK ||= new Coro::Signal; our $WAIT_FOR_TICK_ONE; $WAIT_FOR_TICK_ONE ||= new Coro::Signal; @@ -82,9 +91,12 @@ mkdir cf::localdir . "/" . cf::tmpdir; mkdir cf::localdir . "/" . cf::uniquedir; mkdir $RANDOM_MAPS; +mkdir $BDB_ENV_DIR; our $EMERGENCY_POSITION; +sub cf::map::normalise; + ############################################################################# =head2 GLOBAL VARIABLES @@ -263,9 +275,7 @@ my $guard = Coro::guard { $TICK_WATCHER->start; - $WRITE_RUNTIME_WATCHER->start; }; - $WRITE_RUNTIME_WATCHER->stop; $TICK_WATCHER->stop; $guard } @@ -346,6 +356,8 @@ } sub write_runtime { + my $guard = cf::lock_acquire "write_runtime"; + my $runtime = cf::localdir . "/runtime"; my $fh = aio_open "$runtime~", O_WRONLY | O_CREAT, 0644 @@ -358,6 +370,7 @@ (aio_write $fh, 0, (length $value), $value, 0) <= 0 and return; + # always fsync - this file is important aio_fsync $fh and return; @@ -752,7 +765,7 @@ if (my $fh = aio_open "$filename~", O_WRONLY | O_CREAT, 0600) { chmod SAVE_MODE, $fh; aio_write $fh, 0, (length $$rdata), $$rdata, 0; - aio_fsync $fh; + aio_fsync $fh if $cf::USE_FSYNC; close $fh; if (@$objs) { @@ -760,7 +773,7 @@ chmod SAVE_MODE, $fh; my $data = Storable::nfreeze { version => 1, objs => $objs }; aio_write $fh, 0, (length $data), $data, 0; - aio_fsync $fh; + aio_fsync $fh if $cf::USE_FSYNC; close $fh; aio_rename "$filename.pst~", "$filename.pst"; } @@ -1019,6 +1032,8 @@ sub quit_character { my ($pl) = @_; + my $name = $pl->ob->name; + $pl->{deny_save} = 1; $pl->password ("*"); # this should lock out the player until we nuked the dir @@ -1032,9 +1047,29 @@ aio_rename $path, $temp; delete $cf::PLAYER{$pl->ob->name}; $pl->destroy; + + my $prefix = qr<^~\Q$name\E/>; + + # nuke player maps + $cf::MAP{$_}->nuke for grep /$prefix/, keys %cf::MAP; + IO::AIO::aio_rmtree $temp; } +=item $pl->kick + +Kicks a player out of the game. This destroys the connection. + +=cut + +sub kick { + my ($pl, $kicker) = @_; + + $pl->invoke (cf::EVENT_PLAYER_KICK, $kicker); + $pl->killer ("kicked"); + $pl->ns->destroy; +} + =item cf::player::list_logins Returns am arrayref of all valid playernames in the system, can take a @@ -1070,6 +1105,8 @@ sub maps($) { my ($pl) = @_; + $pl = ref $pl ? $pl->ob->name : $pl; + my $files = aio_readdir playerdir $pl or return; @@ -1080,8 +1117,7 @@ next if /\.(?:pl|pst)$/; next unless /^$PATH_SEP/o; - s/\.map$//; - push @paths, "~" . $pl->ob->name . "/" . $_; + push @paths, cf::map::normalise "~$pl/$_"; } \@paths @@ -1180,6 +1216,8 @@ $path = "$path"; # make sure its a string + $path =~ s/\.map$//; + # map plan: # # /! non-realised random map exit (special hack!) @@ -1250,7 +1288,7 @@ sub load_path { my ($self) = @_; - sprintf "%s/%s/%s", cf::datadir, cf::mapdir, $self->{path} + sprintf "%s/%s/%s.map", cf::datadir, cf::mapdir, $self->{path} } # the temporary/swap location @@ -1258,7 +1296,7 @@ my ($self) = @_; (my $path = $_[0]{path}) =~ s/\//$PATH_SEP/g; - sprintf "%s/%s/%s", cf::localdir, cf::tmpdir, $path + sprintf "%s/%s/%s.map", cf::localdir, cf::tmpdir, $path } # the unique path, undef == no special unique path @@ -1290,8 +1328,8 @@ my ($self, $path) = @_; utf8::encode $path; - aio_open $path, O_RDONLY, 0 - or return; + #aio_open $path, O_RDONLY, 0 + # or return; $self->_load_header ($path) or return; @@ -1364,7 +1402,7 @@ $map->load_header or return; - if ($map->should_reset) { + if ($map->should_reset && 0) {#d#TODO# disabled, crashy (locking issue?) # doing this can freeze the server in a sync job, obviously #$cf::WAIT_FOR_TICK->wait; $map->reset; @@ -1382,6 +1420,8 @@ sub load { my ($self) = @_; + local $self->{deny_reset} = 1; # loading can take a long time + my $path = $self->{path}; my $guard = cf::lock_acquire "map_load:$path"; @@ -1579,9 +1619,18 @@ warn "resetting map ", $self->path;#d# + $self->in_memory (cf::MAP_SWAPPED); + + # need to save uniques path + unless ($self->{deny_save}) { + my $uniq = $self->uniq_path; utf8::encode $uniq; + + $self->_save_objects ($uniq, cf::IO_UNIQUES) + if $uniq; + } + delete $cf::MAP{$self->path}; - $self->in_memory (cf::MAP_SWAPPED); $self->clear; $_->clear_links_to ($self) for values %cf::MAP; @@ -1628,8 +1677,7 @@ next if /\.pst$/; next unless /^$PATH_SEP/o; - s/\.map$//; - push @paths, $_; + push @paths, cf::map::normalise $_; } \@paths @@ -1733,7 +1781,7 @@ sub link_map { unless ($LINK_MAP) { $LINK_MAP = cf::map::find "{link}" - or do { warn "FATAL: unable to provide {link} map, exiting."; exit 1 }; + or cf::cleanup "FATAL: unable to provide {link} map, exiting."; $LINK_MAP->load; } @@ -1782,6 +1830,8 @@ return unless $self->contr->active; $self->activate_recursive; + + local $self->{_prev_pos} = $link_pos; # ugly hack for rent.ext $self->enter_map ($map, $x, $y); } @@ -1804,12 +1854,12 @@ if ($pl->ob->{_link_pos}) { $pl->ob->enter_link; (async { - # we need this sleep as the login has a concurrent enter_exit running - # and this sleep increases chances of the player not ending up in scorn $pl->ob->reply (undef, "There was an internal problem at your last logout, " . "the server will try to bring you to your intended destination in a second.", cf::NDI_RED); + # we need this sleep as the login has a concurrent enter_exit running + # and this sleep increases chances of the player not ending up in scorn Coro::Timer::sleep 1; $pl->ob->leave_link; })->prio (2); @@ -1827,13 +1877,12 @@ $self->enter_link; (async { - my $map = cf::map::find $path; - $map = $map->customise_for ($self) if $map; - -# warn "entering ", $map->path, " at ($x, $y)\n" -# if $map; - - $map or $self->message ("The exit to '" . ($path->visible_name) . "' is closed", cf::NDI_UNIQUE | cf::NDI_RED); + my $map = eval { + my $map = cf::map::find $path; + $map = $map->customise_for ($self) if $map; + $map + } or + $self->message ("The exit to '$path' is closed", cf::NDI_UNIQUE | cf::NDI_RED); $self->leave_link ($map, $x, $y); })->prio (1); @@ -1874,7 +1923,7 @@ my $rmp = parse_random_map_params $exit->msg; if ($exit->map) { - $rmp->{region} = $exit->map->region_name; + $rmp->{region} = $exit->region->name; $rmp->{origin_map} = $exit->map->path; $rmp->{origin_x} = $exit->x; $rmp->{origin_y} = $exit->y; @@ -1901,16 +1950,21 @@ return unless $self->type == cf::PLAYER; + if ($exit->slaying eq "/!") { + #TODO: this should de-fi-ni-te-ly not be a sync-job + cf::sync_job { prepare_random_map $exit }; + } + + my $slaying = cf::map::normalise $exit->slaying, $exit->map && $exit->map->path; + my $hp = $exit->stats->hp; + my $sp = $exit->stats->sp; + $self->enter_link; (async { $self->deactivate_recursive; # just to be sure unless (eval { - prepare_random_map $exit - if $exit->slaying eq "/!"; - - my $path = new_from_path cf::map $exit->slaying, $exit->map && $exit->map->path; - $self->goto ($path, $exit->stats->hp, $exit->stats->sp); + $self->goto ($slaying, $hp, $sp); 1; }) { @@ -1981,6 +2035,7 @@ or return; # be conservative, not sure how that can happen, but we saw a crash here (shift @$queue)->[1]->($msg); + return unless $ns->valid; # temporary(?) workaround for callback destroying socket push @{ $ns->{query_queue} }, @$queue; @@ -2151,103 +2206,72 @@ =over 4 -=item $hashref = cf::db_get $family - -Return a hashref for use by the extension C<$family>, which can be -modified. After modifications, you have to call C or -C. - =item $value = cf::db_get $family => $key -Returns a single value from the database - -=item cf::db_put $family => $hashref - -Stores the given family hashref into the database. Updates are delayed, if -you want the data to be synced to disk immediately, use C. +Returns a single value from the database. =item cf::db_put $family => $key => $value -Stores the given C<$value> in the family hash. Updates are delayed, if you -want the data to be synced to disk immediately, use C. - -=item cf::db_dirty - -Marks the database as dirty, to be updated at a later time. - -=item cf::db_sync - -Immediately write the database to disk I. +Stores the given C<$value> in the family. =cut our $DB; -{ - my $path = cf::localdir . "/database.pst"; +sub db_init { + unless ($DB) { + $DB = BDB::db_create $DB_ENV; + + cf::sync_job { + eval { + $DB->set_flags (BDB::CHKSUM); + + BDB::db_open $DB, undef, "db", undef, BDB::BTREE, + BDB::CREATE | BDB::AUTO_COMMIT, 0666; + cf::cleanup "db_open(db): $!" if $!; + }; + cf::cleanup "db_open(db): $@" if $@; + }; - sub db_load() { - $DB = stat $path ? Storable::retrieve $path : { }; - } + my $path = cf::localdir . "/database.pst"; + if (stat $path) { + cf::sync_job { + my $pst = Storable::retrieve $path; - my $pid; + cf::db_put (board => data => $pst->{board}); + cf::db_put (guildrules => data => $pst->{guildrules}); + cf::db_put (rent => balance => $pst->{rent}{balance}); + BDB::db_env_txn_checkpoint $DB_ENV; - sub db_save() { - waitpid $pid, 0 if $pid; - if (0 == ($pid = fork)) { - $DB->{_meta}{version} = 1; - Storable::nstore $DB, "$path~"; - rename "$path~", $path; - cf::_exit 0 if defined $pid; + unlink $path; + }; } } +} - my $dirty; - - sub db_sync() { - db_save if $dirty; - undef $dirty; - } - - my $idle = Event->idle ( - reentrant => 0, - min => 10, - max => 20, - repeat => 0, - data => WF_AUTOCANCEL, - cb => \&db_sync, - ); - - sub db_dirty() { - $dirty = 1; - $idle->start; - } +sub db_get($$) { + my $key = "$_[0]/$_[1]"; - sub db_get($;$) { - @_ >= 2 - ? $DB->{$_[0]}{$_[1]} - : ($DB->{$_[0]} ||= { }) - } + cf::sync_job { + BDB::db_get $DB, undef, $key, my $data; - sub db_put($$;$) { - if (@_ >= 3) { - $DB->{$_[0]}{$_[1]} = $_[2]; - } else { - $DB->{$_[0]} = $_[1]; - } - db_dirty; + $! ? () + : Compress::LZF::sthaw $data } +} - cf::global->attach ( - prio => 10000, - on_cleanup => sub { - db_sync; - }, - ); +sub db_put($$$) { + BDB::dbreq_pri 4; + BDB::db_put $DB, undef, "$_[0]/$_[1]", Compress::LZF::sfreeze_cr $_[2], 0, sub { }; } ############################################################################# -# the server's main() +# the server's init and main functions + +sub init_resources { + load_resource_file sprintf "%s/%s/regions", cf::datadir, cf::mapdir + or die "unable to load regions file\n";#d# +} sub cfg_load { open my $fh, "<:utf8", cf::confdir . "/config" @@ -2270,6 +2294,10 @@ } } +sub init { + init_resources; +} + sub main { # we must not ever block the main coroutine local $Coro::idle = sub { @@ -2280,7 +2308,7 @@ }; cfg_load; - db_load; + db_init; load_extensions; $TICK_WATCHER->start; @@ -2329,11 +2357,22 @@ $map->save; } warn "end emergency map save\n"; + + warn "begin emergency database checkpoint\n"; + BDB::db_env_txn_checkpoint $DB_ENV; + warn "end emergency database checkpoint\n"; }; warn "leave emergency perl save\n"; } +sub post_cleanup { + my ($make_core) = @_; + + warn Carp::longmess "post_cleanup backtrace" + if $make_core; +} + sub reload() { # can/must only be called in main if ($Coro::current != $Coro::main) { @@ -2343,16 +2382,17 @@ warn "reloading..."; - warn "cancelling server ticker"; - $TICK_WATCHER->cancel; + warn "entering sync_job"; - cf::emergency_save; - - eval { - # if anything goes wrong in here, we should simply crash as we already saved + cf::sync_job { + cf::write_runtime; # external watchdog should not bark + cf::emergency_save; + cf::write_runtime; # external watchdog should not bark warn "syncing database to disk"; - cf::db_sync; + BDB::db_env_txn_checkpoint $DB_ENV; + + # if anything goes wrong in here, we should simply crash as we already saved warn "cancelling all WF_AUTOCANCEL watchers"; for (Event::all_watchers) { @@ -2361,9 +2401,10 @@ warn "flushing outstanding aio requests"; for (;;) { + BDB::flush; IO::AIO::flush; Coro::cede; - last unless IO::AIO::nreqs; + last unless IO::AIO::nreqs || BDB::nreqs; warn "iterate..."; } @@ -2428,7 +2469,6 @@ warn "loading config and database again"; cf::cfg_load; - cf::db_load; warn "loading extensions"; cf::load_extensions; @@ -2438,16 +2478,17 @@ warn "reattaching attachments to maps"; reattach $_ for values %MAP; - warn "restarting server ticker"; + warn "loading reloadable resources"; + init_resources; - $TICK_WATCHER->start; - }; + warn "leaving sync_job"; - if ($@) { + 1 + } or do { warn $@; warn "error while reloading, exiting."; exit 1; - } + }; warn "reloaded"; }; @@ -2498,12 +2539,17 @@ $RUNTIME += $TICK; $NEXT_TICK += $TICK; + if ($NOW >= $NEXT_RUNTIME_WRITE) { + $NEXT_RUNTIME_WRITE = $NOW + 10; + Coro::async_pool { + write_runtime + or warn "ERROR: unable to write runtime file: $!"; + }; + } + $WAIT_FOR_TICK->broadcast; $WAIT_FOR_TICK_ONE->send if $WAIT_FOR_TICK_ONE->awaited; - Event::sweep; - Coro::cede_notself; - # my $AFTER = Event::time; # warn $AFTER - $NOW;#d# @@ -2515,29 +2561,70 @@ }, ); -IO::AIO::max_poll_time $TICK * 0.1; +{ + BDB::max_poll_time $TICK * 0.1; + $BDB_POLL_WATCHER = Event->io ( + reentrant => 0, + fd => BDB::poll_fileno, + poll => 'r', + prio => 0, + data => WF_AUTOCANCEL, + cb => \&BDB::poll_cb, + ); + BDB::min_parallel 8; -undef $Coro::AIO::WATCHER; -$AIO_POLL_WATCHER = Event->io ( - reentrant => 0, - fd => IO::AIO::poll_fileno, - poll => 'r', - prio => 6, - data => WF_AUTOCANCEL, - cb => \&IO::AIO::poll_cb, -); + BDB::set_sync_prepare { + my $status; + my $current = $Coro::current; + ( + sub { + $status = $!; + $current->ready; undef $current; + }, + sub { + Coro::schedule while defined $current; + $! = $status; + }, + ) + }; -$WRITE_RUNTIME_WATCHER = Event->timer ( - reentrant => 0, - data => WF_AUTOCANCEL, - after => 1, - interval => 10, - prio => 6, # keep it lowest so it acts like a watchdog - cb => Coro::unblock_sub { - write_runtime - or warn "ERROR: unable to write runtime file: $!"; - }, -); + unless ($DB_ENV) { + $DB_ENV = BDB::db_env_create; + + cf::sync_job { + eval { + BDB::db_env_open + $DB_ENV, + $BDB_ENV_DIR, + BDB::INIT_LOCK | BDB::INIT_LOG | BDB::INIT_MPOOL | BDB::INIT_TXN + | BDB::RECOVER | BDB::REGISTER | BDB::USE_ENVIRON | BDB::CREATE, + 0666; + + cf::cleanup "db_env_open($BDB_ENV_DIR): $!" if $!; + + $DB_ENV->set_flags (BDB::AUTO_COMMIT | BDB::REGION_INIT | BDB::TXN_NOSYNC, 1); + $DB_ENV->set_lk_detect; + }; + + cf::cleanup "db_env_open(db): $@" if $@; + }; + } +} + +{ + IO::AIO::min_parallel 8; + + undef $Coro::AIO::WATCHER; + IO::AIO::max_poll_time $TICK * 0.1; + $AIO_POLL_WATCHER = Event->io ( + reentrant => 0, + data => WF_AUTOCANCEL, + fd => IO::AIO::poll_fileno, + poll => 'r', + prio => 6, + cb => \&IO::AIO::poll_cb, + ); +} END { cf::emergency_save }