… | |
… | |
2 | |
2 | |
3 | AnyEvent::Watchdog - generic watchdog/program restarter |
3 | AnyEvent::Watchdog - generic watchdog/program restarter |
4 | |
4 | |
5 | =head1 SYNOPSIS |
5 | =head1 SYNOPSIS |
6 | |
6 | |
7 | # MUST be use'd as the very first thing in the main program |
7 | # MUST be use'd as the very first thing in the main program, |
|
|
8 | # as it clones/forks the program before it returns. |
8 | use AnyEvent::Watchdog; |
9 | use AnyEvent::Watchdog; |
9 | |
10 | |
10 | =head1 DESCRIPTION |
11 | =head1 DESCRIPTION |
11 | |
12 | |
12 | This module implements a watchdog that can repeatedly fork the program and |
13 | This module implements a watchdog that can repeatedly fork the program and |
… | |
… | |
21 | program. It will cause weird effects when used from another module, as |
22 | program. It will cause weird effects when used from another module, as |
22 | perl does not expect to be forked inside C<BEGIN> blocks. |
23 | perl does not expect to be forked inside C<BEGIN> blocks. |
23 | |
24 | |
24 | =head1 RECIPES |
25 | =head1 RECIPES |
25 | |
26 | |
26 | Use AnyEvent::Watchdog solely as a convinient on-demand-restarter: |
27 | Use AnyEvent::Watchdog solely as a convenient on-demand-restarter: |
27 | |
28 | |
28 | use AnyEvent::Watchdog; |
29 | use AnyEvent::Watchdog; |
29 | |
30 | |
30 | # and whenever you wnat to restart (e.g. to upgrade code): |
31 | # and whenever you want to restart (e.g. to upgrade code): |
|
|
32 | use AnyEvent::Watchdog::Util; |
31 | AnyEvent::Watchdog::restart; |
33 | AnyEvent::Watchdog::Util::restart; |
32 | |
34 | |
33 | Use AnyEvent::Watchdog to kill the program and exit when the event loop |
35 | Use AnyEvent::Watchdog to kill the program and exit when the event loop |
34 | fails to run for more than two minutes: |
36 | fails to run for more than two minutes: |
35 | |
37 | |
36 | use AnyEvent::Watchdog qw(autorestart heartbeat=120); |
38 | use AnyEvent::Watchdog autorestart => 1, heartbeat => 120; |
37 | |
39 | |
38 | Use AnyEvent::Watchdog to automatically restart the program |
40 | Use AnyEvent::Watchdog to automatically kill (but not restart) the program when it fails |
39 | when it fails to handle events for longer than 5 minutes: |
41 | to handle events for longer than 5 minutes: |
40 | |
42 | |
41 | use AnyEvent::Watchdog qw(autorestart heartbeat=300); |
43 | use AnyEvent::Watchdog heartbeat => 300; |
42 | |
44 | |
43 | =head1 FUNCTIONS |
45 | =head1 VARIABLES/FUNCTIONS |
44 | |
46 | |
45 | The module supports the following functions: |
47 | This module is controlled via the L<AnyEvent::Watchdog::Util> module: |
46 | |
48 | |
47 | =over 4 |
49 | use AnyEvent::Watchdog::Util; |
|
|
50 | |
|
|
51 | # attempt restart |
|
|
52 | AnyEvent::Watchdog::Util::restart; |
|
|
53 | |
|
|
54 | # check if it is running |
|
|
55 | AnyEvent::Watchdog::Util::enabled |
|
|
56 | or croak "not running under watchdog!"; |
48 | |
57 | |
49 | =cut |
58 | =cut |
50 | |
59 | |
51 | package AnyEvent::Watchdog; |
60 | package AnyEvent::Watchdog; |
52 | |
61 | |
53 | # load modules we will use later anyways |
62 | # load modules we will use later anyways |
54 | use common::sense; |
63 | use common::sense; |
55 | |
64 | |
56 | use Carp (); |
65 | use Carp (); |
57 | |
66 | |
58 | our $VERSION = '0.1'; |
67 | our $VERSION = '1.0'; |
59 | |
68 | |
60 | our $PID; # child pid |
69 | our $PID; # child pid |
61 | our $ENABLED = 1; |
70 | our $ENABLED = 0; # also version |
62 | our $AUTORESTART; # actually exit |
71 | our $AUTORESTART; # actually exit |
63 | our $HEARTBEAT; |
|
|
64 | our ($P, $C); |
72 | our ($P, $C); |
65 | |
73 | |
66 | sub poll($) { |
74 | sub poll($) { |
67 | (vec my $v, fileno $P, 1) = 1; |
75 | (vec my $v, fileno $P, 1) = 1; |
68 | CORE::select $v, undef, undef, $_[0] |
76 | CORE::select $v, undef, undef, $_[0] |
… | |
… | |
118 | |
126 | |
119 | } elsif ($cmd eq chr 3) { |
127 | } elsif ($cmd eq chr 3) { |
120 | sysread $P, my $interval, 1 |
128 | sysread $P, my $interval, 1 |
121 | or last; |
129 | or last; |
122 | |
130 | |
123 | $heartbeat = ord $interval |
131 | $heartbeat = ord $interval; |
124 | unless defined $heartbeat; |
|
|
125 | |
132 | |
126 | } elsif ($cmd eq chr 4) { |
133 | } elsif ($cmd eq chr 4) { |
127 | # heartbeat |
134 | # heartbeat |
128 | # TODO: should only reset heartbeat timeout with \005 |
135 | # TODO: should only reset heartbeat timeout with \005 |
129 | |
136 | |
… | |
… | |
165 | |
172 | |
166 | our %SEEKPOS; |
173 | our %SEEKPOS; |
167 | # due to bugs in perl, try to remember file offsets for all fds, and restore them later |
174 | # due to bugs in perl, try to remember file offsets for all fds, and restore them later |
168 | # (the parser otherwise exhausts the input files) |
175 | # (the parser otherwise exhausts the input files) |
169 | |
176 | |
170 | # this causes perlio to flush it's handles internally, so |
177 | # this causes perlio to flush its handles internally, so |
171 | # seek offsets become correct. |
178 | # seek offsets become correct. |
172 | exec "."; # toi toi toi |
179 | exec "."; # toi toi toi |
173 | #{ |
180 | #{ |
174 | # local $SIG{CHLD} = 'DEFAULT'; |
181 | # local $SIG{CHLD} = 'DEFAULT'; |
175 | # my $pid = fork; |
182 | # my $pid = fork; |
… | |
… | |
179 | # } else { |
186 | # } else { |
180 | # kill 9, $$; |
187 | # kill 9, $$; |
181 | # } |
188 | # } |
182 | #} |
189 | #} |
183 | |
190 | |
184 | # now records all fd positions |
191 | # now record "all" fd positions, assuming 1023 is more than enough. |
185 | for (0 .. 1023) { |
192 | for (0 .. 1023) { |
186 | open my $fh, "<&$_" or next; |
193 | open my $fh, "<&$_" or next; |
187 | $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next); |
194 | $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next); |
188 | } |
195 | } |
189 | |
196 | |
… | |
… | |
204 | |
211 | |
205 | unless (defined $PID) { |
212 | unless (defined $PID) { |
206 | warn "AnyEvent::Watchdog: '$!', retrying in one second...\n"; |
213 | warn "AnyEvent::Watchdog: '$!', retrying in one second...\n"; |
207 | sleep 1; |
214 | sleep 1; |
208 | } elsif ($PID) { |
215 | } elsif ($PID) { |
|
|
216 | # parent code |
209 | close $C; |
217 | close $C; |
210 | server; |
218 | server; |
211 | } else { |
219 | } else { |
|
|
220 | # child code |
|
|
221 | $ENABLED = 1; # also version |
|
|
222 | |
212 | # restore seek offsets |
223 | # restore seek offsets |
213 | while (my ($k, $v) = each %SEEKPOS) { |
224 | while (my ($k, $v) = each %SEEKPOS) { |
214 | open my $fh, "<&$k" or next; |
225 | open my $fh, "<&$k" or next; |
215 | sysseek $fh, $v, 0; |
226 | sysseek $fh, $v, 0; |
216 | } |
227 | } |
… | |
… | |
219 | close $P; |
230 | close $P; |
220 | last; |
231 | last; |
221 | } |
232 | } |
222 | } |
233 | } |
223 | |
234 | |
224 | =item AnyEvent::Watchdog::restart [$timeout] |
|
|
225 | |
|
|
226 | Tells the supervisor to restart the process when it exits, or forcefully |
|
|
227 | after C<$timeout> seconds (minimum 1, maximum 255, default 60). |
|
|
228 | |
|
|
229 | Calls C<exit 0> to exit the process cleanly. |
|
|
230 | |
|
|
231 | =cut |
|
|
232 | |
|
|
233 | sub restart(;$) { |
|
|
234 | my ($timeout) = @_; |
|
|
235 | |
|
|
236 | $timeout = 60 unless defined $timeout; |
|
|
237 | $timeout = 1 if $timeout < 1; |
|
|
238 | $timeout = 255 if $timeout > 255; |
|
|
239 | |
|
|
240 | syswrite $C, "\x01\x02" . chr $timeout; |
|
|
241 | exit 0; |
|
|
242 | } |
|
|
243 | |
|
|
244 | =item AnyEvent::Watchdog::autorestart [$boolean] |
|
|
245 | |
|
|
246 | =item use AnyEvent::Watchdog qw(autorestart[=$boolean]) |
|
|
247 | |
|
|
248 | Enables or disables autorestart (initially disabled, default for |
|
|
249 | C<$boolean> is to enable): By default, the supervisor will exit if the |
|
|
250 | program exits or dies in any way. When enabling autorestart behaviour, |
|
|
251 | then the supervisor will try to restart the program after it dies. |
|
|
252 | |
|
|
253 | Note that the supervisor will never autorestart when the child died with |
|
|
254 | SIGINT or SIGTERM. |
|
|
255 | |
|
|
256 | =cut |
|
|
257 | |
|
|
258 | sub autorestart(;$) { |
|
|
259 | syswrite $C, !@_ || $_[0] ? "\x01" : "\x00"; |
|
|
260 | } |
|
|
261 | |
|
|
262 | =item AnyEvent::Watchdog::heartbeat [$interval] |
|
|
263 | |
|
|
264 | =item use AnyEvent::Watchdog qw(heartbeat[=$interval]) |
|
|
265 | |
|
|
266 | Tells the supervisor to automatically kill the program if it doesn't |
|
|
267 | react for C<$interval> seconds (minium 1, maximum 255, default 60) , then |
|
|
268 | installs an AnyEvent timer the sends a regular heartbeat to the supervisor |
|
|
269 | twice as often. |
|
|
270 | |
|
|
271 | Exit behaviour isn't changed, so if you want a restart instead of an exit, |
|
|
272 | you have to call C<autorestart>. |
|
|
273 | |
|
|
274 | Once enabled, the heartbeat cannot be switched off. |
|
|
275 | |
|
|
276 | =cut |
|
|
277 | |
|
|
278 | sub heartbeat(;$) { |
|
|
279 | my ($interval) = @_; |
|
|
280 | |
|
|
281 | $interval = 60 unless defined $interval; |
|
|
282 | $interval = 1 if $interval < 1; |
|
|
283 | $interval = 255 if $interval > 255; |
|
|
284 | |
|
|
285 | syswrite $C, "\x03" . chr $interval; |
|
|
286 | |
|
|
287 | require AE; |
|
|
288 | $HEARTBEAT = AE::timer (0, $interval * 0.5, sub { |
|
|
289 | syswrite $C, "\x04"; |
|
|
290 | }); |
|
|
291 | } |
|
|
292 | |
|
|
293 | sub import { |
235 | sub import { |
294 | shift; |
236 | shift; |
295 | |
237 | |
296 | for (@_) { |
238 | while (@_) { |
297 | if (/^autorestart(?:=(.*))?$/) { |
239 | my $k = shift; |
298 | autorestart defined $1 ? $1 : 1; |
240 | |
299 | } elsif (/^heartbeat(?:=(.*))?$/) { |
241 | require AnyEvent::Watchdog::Util; |
300 | heartbeat $1; |
242 | |
|
|
243 | if ($k eq "autorestart") { |
|
|
244 | AnyEvent::Watchdog::Util::autorestart (! ! shift); |
|
|
245 | } elsif ($k eq "heartbeat") { |
|
|
246 | AnyEvent::Watchdog::Util::heartbeat (shift || 60); |
301 | } else { |
247 | } else { |
302 | Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument"; |
248 | Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument"; |
303 | } |
249 | } |
304 | } |
250 | } |
305 | } |
251 | } |
306 | |
252 | |
|
|
253 | # used by AnyEvent::Watchdog::Util. |
|
|
254 | our $end; |
|
|
255 | END { $end && &$end } |
|
|
256 | |
307 | =head1 SEE ALSO |
257 | =head1 SEE ALSO |
308 | |
258 | |
309 | L<AnyEvent>. |
259 | L<AnyEvent::Watchdg::Util>, L<AnyEvent>. |
310 | |
260 | |
311 | =head1 AUTHOR |
261 | =head1 AUTHOR |
312 | |
262 | |
313 | Marc Lehmann <schmorp@schmorp.de> |
263 | Marc Lehmann <schmorp@schmorp.de> |
314 | http://home.schmorp.de/ |
264 | http://home.schmorp.de/ |