1 |
=head1 NAME |
2 |
|
3 |
AnyEvent::Watchdog - generic watchdog/program restarter |
4 |
|
5 |
=head1 SYNOPSIS |
6 |
|
7 |
# MUST be use'd as the very first thing in the main program, |
8 |
# as it clones/forks the program before it returns. |
9 |
use AnyEvent::Watchdog; |
10 |
|
11 |
=head1 DESCRIPTION |
12 |
|
13 |
This module implements a watchdog that can repeatedly fork the program and |
14 |
thus effectively restart it - as soon as the module is use'd, it will fork |
15 |
the program (if possible) and continue to run it normally in the child, |
16 |
while the parent becomes a supervisor. |
17 |
|
18 |
The child can then ask the supervisor to restart itself instead of |
19 |
exiting, or ask the supervisor to restart it gracefully or forcefully. |
20 |
|
21 |
B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main |
22 |
program. It will cause weird effects when used from another module, as |
23 |
perl does not expect to be forked inside C<BEGIN> blocks. |
24 |
|
25 |
=head1 RECIPES |
26 |
|
27 |
Use AnyEvent::Watchdog solely as a convenient on-demand-restarter: |
28 |
|
29 |
use AnyEvent::Watchdog; |
30 |
|
31 |
# and whenever you want to restart (e.g. to upgrade code): |
32 |
use AnyEvent::Watchdog::Util; |
33 |
AnyEvent::Watchdog::Util::restart; |
34 |
|
35 |
Use AnyEvent::Watchdog to kill the program and exit when the event loop |
36 |
fails to run for more than two minutes: |
37 |
|
38 |
use AnyEvent::Watchdog autorestart => 1, heartbeat => 120; |
39 |
|
40 |
Use AnyEvent::Watchdog to automatically kill (but not restart) the program when it fails |
41 |
to handle events for longer than 5 minutes: |
42 |
|
43 |
use AnyEvent::Watchdog heartbeat => 300; |
44 |
|
45 |
=head1 VARIABLES/FUNCTIONS |
46 |
|
47 |
This module is controlled via the L<AnyEvent::Watchdog::Util> module: |
48 |
|
49 |
use AnyEvent::Watchdog::Util; |
50 |
|
51 |
# attempt restart |
52 |
AnyEvent::Watchdog::Util::restart; |
53 |
|
54 |
# check if it is running |
55 |
AnyEvent::Watchdog::Util::enabled |
56 |
or croak "not running under watchdog!"; |
57 |
|
58 |
=cut |
59 |
|
60 |
package AnyEvent::Watchdog; |
61 |
|
62 |
# load modules we will use later anyways |
63 |
use common::sense; |
64 |
|
65 |
use Carp (); |
66 |
|
67 |
our $VERSION = '1.0'; |
68 |
|
69 |
our $PID; # child pid |
70 |
our $ENABLED = 0; # also version |
71 |
our $AUTORESTART; # actually exit |
72 |
our ($P, $C); |
73 |
|
74 |
sub poll($) { |
75 |
(vec my $v, fileno $P, 1) = 1; |
76 |
CORE::select $v, undef, undef, $_[0] |
77 |
} |
78 |
|
79 |
sub server { |
80 |
my $expected;# do we expect a program exit? |
81 |
my $heartbeat; |
82 |
|
83 |
$AUTORESTART = 0; |
84 |
|
85 |
local $SIG{HUP} = 'IGNORE'; |
86 |
local $SIG{INT} = 'IGNORE'; |
87 |
local $SIG{TERM} = 'IGNORE'; |
88 |
|
89 |
while () { |
90 |
if ($heartbeat) { |
91 |
unless (poll $heartbeat) { |
92 |
$expected = 1; |
93 |
warn "AnyEvent::Watchdog: heartbeat failed. killing.\n"; |
94 |
kill 9, $PID; |
95 |
last; |
96 |
} |
97 |
} |
98 |
|
99 |
sysread $P, my $cmd, 1 |
100 |
or last; |
101 |
|
102 |
if ($cmd eq chr 0) { |
103 |
$AUTORESTART = 0; |
104 |
|
105 |
} elsif ($cmd eq chr 1) { |
106 |
$AUTORESTART = 1; |
107 |
|
108 |
} elsif ($cmd eq chr 2) { |
109 |
sysread $P, my $timeout, 1 |
110 |
or last; |
111 |
|
112 |
$timeout = ord $timeout; |
113 |
|
114 |
unless (poll $timeout) { |
115 |
warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n"; |
116 |
kill 9, $PID; |
117 |
} |
118 |
|
119 |
if (sysread $P, my $dummy, 1) { |
120 |
warn "AnyEvent::Watchdog: unexpected program output. killing.\n"; |
121 |
kill 9, $PID; |
122 |
} |
123 |
|
124 |
$expected = 1; |
125 |
last; |
126 |
|
127 |
} elsif ($cmd eq chr 3) { |
128 |
sysread $P, my $interval, 1 |
129 |
or last; |
130 |
|
131 |
$heartbeat = ord $interval; |
132 |
|
133 |
} elsif ($cmd eq chr 4) { |
134 |
# heartbeat |
135 |
# TODO: should only reset heartbeat timeout with \005 |
136 |
|
137 |
} else { |
138 |
warn "AnyEvent::Watchdog: unexpected program output. killing.\n"; |
139 |
kill 9, $PID; |
140 |
last; |
141 |
} |
142 |
} |
143 |
|
144 |
waitpid $PID, 0; |
145 |
|
146 |
require POSIX; |
147 |
|
148 |
my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?); |
149 |
|
150 |
if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) { |
151 |
$AUTORESTART = 0; |
152 |
$expected = 1; |
153 |
} |
154 |
|
155 |
unless ($expected) { |
156 |
warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n" |
157 |
if $? >> 8; |
158 |
} |
159 |
|
160 |
if ($AUTORESTART) { |
161 |
warn "AnyEvent::Watchdog: attempting automatic restart.\n"; |
162 |
} else { |
163 |
if ($termsig) { |
164 |
$SIG{$_} = 'DEFAULT' for keys %SIG; |
165 |
kill $termsig, $$; |
166 |
POSIX::_exit (127); |
167 |
} else { |
168 |
POSIX::_exit ($? >> 8); |
169 |
} |
170 |
} |
171 |
} |
172 |
|
173 |
our %SEEKPOS; |
174 |
# due to bugs in perl, try to remember file offsets for all fds, and restore them later |
175 |
# (the parser otherwise exhausts the input files) |
176 |
|
177 |
# this causes perlio to flush its handles internally, so |
178 |
# seek offsets become correct. |
179 |
exec "."; # toi toi toi |
180 |
#{ |
181 |
# local $SIG{CHLD} = 'DEFAULT'; |
182 |
# my $pid = fork; |
183 |
# |
184 |
# if ($pid) { |
185 |
# waitpid $pid, 0; |
186 |
# } else { |
187 |
# kill 9, $$; |
188 |
# } |
189 |
#} |
190 |
|
191 |
# now record "all" fd positions, assuming 1023 is more than enough. |
192 |
for (0 .. 1023) { |
193 |
open my $fh, "<&$_" or next; |
194 |
$SEEKPOS{$_} = (sysseek $fh, 0, 1 or next); |
195 |
} |
196 |
|
197 |
while () { |
198 |
if ($^O =~ /mswin32/i) { |
199 |
require AnyEvent::Util; |
200 |
($P, $C) = AnyEvent::Util::portable_socketpair () |
201 |
or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n"; |
202 |
} else { |
203 |
require Socket; |
204 |
socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0 |
205 |
or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n"; |
206 |
} |
207 |
|
208 |
local $SIG{CHLD} = 'DEFAULT'; |
209 |
|
210 |
$PID = fork; |
211 |
|
212 |
unless (defined $PID) { |
213 |
warn "AnyEvent::Watchdog: '$!', retrying in one second...\n"; |
214 |
sleep 1; |
215 |
} elsif ($PID) { |
216 |
# parent code |
217 |
close $C; |
218 |
server; |
219 |
} else { |
220 |
# child code |
221 |
$ENABLED = 1; # also version |
222 |
|
223 |
# restore seek offsets |
224 |
while (my ($k, $v) = each %SEEKPOS) { |
225 |
open my $fh, "<&$k" or next; |
226 |
sysseek $fh, $v, 0; |
227 |
} |
228 |
|
229 |
# continue the program normally |
230 |
close $P; |
231 |
last; |
232 |
} |
233 |
} |
234 |
|
235 |
sub import { |
236 |
shift; |
237 |
|
238 |
while (@_) { |
239 |
my $k = shift; |
240 |
|
241 |
require AnyEvent::Watchdog::Util; |
242 |
|
243 |
if ($k eq "autorestart") { |
244 |
AnyEvent::Watchdog::Util::autorestart (! ! shift); |
245 |
} elsif ($k eq "heartbeat") { |
246 |
AnyEvent::Watchdog::Util::heartbeat (shift || 60); |
247 |
} else { |
248 |
Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument"; |
249 |
} |
250 |
} |
251 |
} |
252 |
|
253 |
# used by AnyEvent::Watchdog::Util. |
254 |
our $end; |
255 |
END { $end && &$end } |
256 |
|
257 |
=head1 SEE ALSO |
258 |
|
259 |
L<AnyEvent::Watchdg::Util>, L<AnyEvent>. |
260 |
|
261 |
=head1 AUTHOR |
262 |
|
263 |
Marc Lehmann <schmorp@schmorp.de> |
264 |
http://home.schmorp.de/ |
265 |
|
266 |
=cut |
267 |
|
268 |
1 |
269 |
|