| 1 |
=head1 NAME |
| 2 |
|
| 3 |
AnyEvent::Watchdog - generic watchdog/program restarter |
| 4 |
|
| 5 |
=head1 SYNOPSIS |
| 6 |
|
| 7 |
# MUST be use'd as the very first thing in the main program, |
| 8 |
# as it clones/forks the program before it returns. |
| 9 |
use AnyEvent::Watchdog; |
| 10 |
|
| 11 |
=head1 DESCRIPTION |
| 12 |
|
| 13 |
This module implements a watchdog that can repeatedly fork the program and |
| 14 |
thus effectively restart it - as soon as the module is use'd, it will fork |
| 15 |
the program (if possible) and continue to run it normally in the child, |
| 16 |
while the parent becomes a supervisor. |
| 17 |
|
| 18 |
The child can then ask the supervisor to restart itself instead of |
| 19 |
exiting, or ask the supervisor to restart it gracefully or forcefully. |
| 20 |
|
| 21 |
B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main |
| 22 |
program. It will cause weird effects when used from another module, as |
| 23 |
perl does not expect to be forked inside C<BEGIN> blocks. |
| 24 |
|
| 25 |
=head1 RECIPES |
| 26 |
|
| 27 |
Use AnyEvent::Watchdog solely as a convenient on-demand-restarter: |
| 28 |
|
| 29 |
use AnyEvent::Watchdog; |
| 30 |
|
| 31 |
# and whenever you want to restart (e.g. to upgrade code): |
| 32 |
use AnyEvent::Watchdog::Util; |
| 33 |
AnyEvent::Watchdog::Util::restart; |
| 34 |
|
| 35 |
Use AnyEvent::Watchdog to kill the program and exit when the event loop |
| 36 |
fails to run for more than two minutes: |
| 37 |
|
| 38 |
use AnyEvent::Watchdog autorestart => 1, heartbeat => 120; |
| 39 |
|
| 40 |
Use AnyEvent::Watchdog to automatically kill (but not restart) the program when it fails |
| 41 |
to handle events for longer than 5 minutes: |
| 42 |
|
| 43 |
use AnyEvent::Watchdog heartbeat => 300; |
| 44 |
|
| 45 |
=head1 VARIABLES/FUNCTIONS |
| 46 |
|
| 47 |
This module is controlled via the L<AnyEvent::Watchdog::Util> module: |
| 48 |
|
| 49 |
use AnyEvent::Watchdog::Util; |
| 50 |
|
| 51 |
# attempt restart |
| 52 |
AnyEvent::Watchdog::Util::restart; |
| 53 |
|
| 54 |
# check if it is running |
| 55 |
AnyEvent::Watchdog::Util::enabled |
| 56 |
or croak "not running under watchdog!"; |
| 57 |
|
| 58 |
=cut |
| 59 |
|
| 60 |
package AnyEvent::Watchdog; |
| 61 |
|
| 62 |
# load modules we will use later anyways |
| 63 |
use common::sense; |
| 64 |
|
| 65 |
use Carp (); |
| 66 |
|
| 67 |
our $VERSION = '1.02'; |
| 68 |
|
| 69 |
our $PID; # child pid |
| 70 |
our $ENABLED = 0; # also version |
| 71 |
our $AUTORESTART; # actually exit |
| 72 |
our ($P, $C); |
| 73 |
|
| 74 |
sub poll($) { |
| 75 |
(vec my $v, fileno $P, 1) = 1; |
| 76 |
CORE::select $v, undef, undef, $_[0] |
| 77 |
} |
| 78 |
|
| 79 |
sub server { |
| 80 |
my $expected;# do we expect a program exit? |
| 81 |
my $heartbeat; |
| 82 |
|
| 83 |
$AUTORESTART = 0; |
| 84 |
|
| 85 |
local $SIG{HUP} = 'IGNORE'; |
| 86 |
local $SIG{INT} = 'IGNORE'; |
| 87 |
local $SIG{TERM} = 'IGNORE'; |
| 88 |
|
| 89 |
while () { |
| 90 |
if ($heartbeat) { |
| 91 |
unless (poll $heartbeat) { |
| 92 |
$expected = 1; |
| 93 |
warn "AnyEvent::Watchdog: heartbeat failed. killing.\n"; |
| 94 |
kill 9, $PID; |
| 95 |
last; |
| 96 |
} |
| 97 |
} |
| 98 |
|
| 99 |
sysread $P, my $cmd, 1 |
| 100 |
or last; |
| 101 |
|
| 102 |
if ($cmd eq chr 0) { |
| 103 |
$AUTORESTART = 0; |
| 104 |
|
| 105 |
} elsif ($cmd eq chr 1) { |
| 106 |
$AUTORESTART = 1; |
| 107 |
|
| 108 |
} elsif ($cmd eq chr 2) { |
| 109 |
sysread $P, my $timeout, 1 |
| 110 |
or last; |
| 111 |
|
| 112 |
$timeout = ord $timeout; |
| 113 |
|
| 114 |
unless (poll $timeout) { |
| 115 |
warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n"; |
| 116 |
kill 9, $PID; |
| 117 |
} |
| 118 |
|
| 119 |
if (sysread $P, my $dummy, 1) { |
| 120 |
warn "AnyEvent::Watchdog: unexpected program output. killing.\n"; |
| 121 |
kill 9, $PID; |
| 122 |
} |
| 123 |
|
| 124 |
$expected = 1; |
| 125 |
last; |
| 126 |
|
| 127 |
} elsif ($cmd eq chr 3) { |
| 128 |
sysread $P, my $interval, 1 |
| 129 |
or last; |
| 130 |
|
| 131 |
$heartbeat = ord $interval; |
| 132 |
|
| 133 |
} elsif ($cmd eq chr 4) { |
| 134 |
# heartbeat |
| 135 |
# TODO: should only reset heartbeat timeout with \005 |
| 136 |
|
| 137 |
} else { |
| 138 |
warn "AnyEvent::Watchdog: unexpected program output. killing.\n"; |
| 139 |
kill 9, $PID; |
| 140 |
last; |
| 141 |
} |
| 142 |
} |
| 143 |
|
| 144 |
waitpid $PID, 0; |
| 145 |
|
| 146 |
require POSIX; |
| 147 |
|
| 148 |
my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?); |
| 149 |
|
| 150 |
if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) { |
| 151 |
$AUTORESTART = 0; |
| 152 |
$expected = 1; |
| 153 |
} |
| 154 |
|
| 155 |
unless ($expected) { |
| 156 |
warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n" |
| 157 |
if $? >> 8; |
| 158 |
} |
| 159 |
|
| 160 |
if ($AUTORESTART) { |
| 161 |
warn "AnyEvent::Watchdog: attempting automatic restart.\n"; |
| 162 |
} else { |
| 163 |
if ($termsig) { |
| 164 |
$SIG{$_} = 'DEFAULT' for keys %SIG; |
| 165 |
kill $termsig, $$; |
| 166 |
POSIX::_exit (127); |
| 167 |
} else { |
| 168 |
POSIX::_exit ($? >> 8); |
| 169 |
} |
| 170 |
} |
| 171 |
} |
| 172 |
|
| 173 |
our %SEEKPOS; |
| 174 |
# due to bugs in perl, try to remember file offsets for all fds, and restore them later |
| 175 |
# (the parser otherwise exhausts the input files) |
| 176 |
|
| 177 |
# this causes perlio to flush its handles internally, so |
| 178 |
# seek offsets become correct. |
| 179 |
exec "."; # toi toi toi |
| 180 |
#{ |
| 181 |
# local $SIG{CHLD} = 'DEFAULT'; |
| 182 |
# my $pid = fork; |
| 183 |
# |
| 184 |
# if ($pid) { |
| 185 |
# waitpid $pid, 0; |
| 186 |
# } else { |
| 187 |
# kill 9, $$; |
| 188 |
# } |
| 189 |
#} |
| 190 |
|
| 191 |
# now record "all" fd positions, assuming 1023 is more than enough. |
| 192 |
for (0 .. 1023) { |
| 193 |
open my $fh, "<&$_" or next; |
| 194 |
$SEEKPOS{$_} = (sysseek $fh, 0, 1 or next); |
| 195 |
} |
| 196 |
|
| 197 |
while () { |
| 198 |
if ($^O =~ /mswin32/i) { |
| 199 |
require AnyEvent::Util; |
| 200 |
($P, $C) = AnyEvent::Util::portable_socketpair () |
| 201 |
or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n"; |
| 202 |
} else { |
| 203 |
require Socket; |
| 204 |
socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0 |
| 205 |
or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n"; |
| 206 |
} |
| 207 |
|
| 208 |
local $SIG{CHLD} = 'DEFAULT'; |
| 209 |
|
| 210 |
$PID = fork; |
| 211 |
|
| 212 |
unless (defined $PID) { |
| 213 |
warn "AnyEvent::Watchdog: '$!', retrying in one second...\n"; |
| 214 |
sleep 1; |
| 215 |
} elsif ($PID) { |
| 216 |
# parent code |
| 217 |
close $C; |
| 218 |
server; |
| 219 |
} else { |
| 220 |
# child code |
| 221 |
$ENABLED = 1; # also version |
| 222 |
|
| 223 |
# restore seek offsets |
| 224 |
while (my ($k, $v) = each %SEEKPOS) { |
| 225 |
open my $fh, "<&$k" or next; |
| 226 |
sysseek $fh, $v, 0; |
| 227 |
} |
| 228 |
|
| 229 |
# continue the program normally |
| 230 |
close $P; |
| 231 |
last; |
| 232 |
} |
| 233 |
} |
| 234 |
|
| 235 |
sub import { |
| 236 |
shift; |
| 237 |
|
| 238 |
while (@_) { |
| 239 |
my $k = shift; |
| 240 |
|
| 241 |
require AnyEvent::Watchdog::Util; |
| 242 |
|
| 243 |
if ($k eq "autorestart") { |
| 244 |
AnyEvent::Watchdog::Util::autorestart (! ! shift); |
| 245 |
} elsif ($k eq "heartbeat") { |
| 246 |
AnyEvent::Watchdog::Util::heartbeat (shift || 60); |
| 247 |
} else { |
| 248 |
Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument"; |
| 249 |
} |
| 250 |
} |
| 251 |
} |
| 252 |
|
| 253 |
# used by AnyEvent::Watchdog::Util. |
| 254 |
our $end; |
| 255 |
END { $end && &$end } |
| 256 |
|
| 257 |
=head1 SEE ALSO |
| 258 |
|
| 259 |
L<AnyEvent::Watchdog::Util>, L<AnyEvent>. |
| 260 |
|
| 261 |
=head1 AUTHOR |
| 262 |
|
| 263 |
Marc Lehmann <schmorp@schmorp.de> |
| 264 |
http://home.schmorp.de/ |
| 265 |
|
| 266 |
=cut |
| 267 |
|
| 268 |
1 |
| 269 |
|