ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.6
Committed: Sun Aug 30 17:20:27 2009 UTC (14 years, 8 months ago) by root
Branch: MAIN
Changes since 1.5: +25 -3 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 AnyEvent::Watchdog - generic watchdog/program restarter
4
5 =head1 SYNOPSIS
6
7 # MUST be use'd as the very first thing in the main program
8 use AnyEvent::Watchdog;
9
10 =head1 DESCRIPTION
11
12 This module implements a watchdog that can repeatedly fork the program and
13 thus effectively restart it - as soon as the module is use'd, it will fork
14 the program (if possible) and continue to run it normally in the child,
15 while the parent becomes a supervisor.
16
17 The child can then ask the supervisor to restart itself instead of
18 exiting, or ask the supervisor to restart it gracefully or forcefully.
19
20 B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
21 program. It will cause weird effects when used from another module, as
22 perl does not expect to be forked inside C<BEGIN> blocks.
23
24 =head1 RECIPES
25
26 Use AnyEvent::Watchdog solely as a convinient on-demand-restarter:
27
28 use AnyEvent::Watchdog;
29
30 # and whenever you wnat to restart (e.g. to upgrade code):
31 AnyEvent::Watchdog::restart;
32
33 Use AnyEvent::Watchdog to kill the program and exit when the event loop
34 fails to run for more than two minutes:
35
36 use AnyEvent::Watchdog qw(autorestart heartbeat=120);
37
38 Use AnyEvent::Watchdog to automatically restart the program
39 when it fails to handle events for longer than 5 minutes:
40
41 use AnyEvent::Watchdog qw(autorestart heartbeat=300);
42
43 =head1 VARIABLES/FUNCTIONS
44
45 The module supports the following variables and functions:
46
47 =over 4
48
49 =cut
50
51 package AnyEvent::Watchdog;
52
53 # load modules we will use later anyways
54 use common::sense;
55
56 use Carp ();
57
58 our $VERSION = '0.9';
59
60 =item $AnyEvent::Watchdog::ENABLED
61
62 This is true when the program is running under the regime of
63 AnyEvent::Watchdog. Semi-obviously, you should I<NOT> C<use> or C<require>
64 this module before looking at this variable, and neither should you try
65 to load this module unless in the main program, rather use an idiom like
66 this:
67
68 $AnyEvent::Watchdog::ENABLED
69 or die "watchdog not enabled...";
70 AnyEvent::Watchdog::restart (60); # MUST use ()
71
72 Note that if this variable is defined, but false, then AnyEvent::Watchdog
73 is running, but you are in the watchdog process - you probably did
74 something very wrong in this case.
75
76 =cut
77
78 our $PID; # child pid
79 our $ENABLED = 0;
80 our $AUTORESTART; # actually exit
81 our $HEARTBEAT;
82 our ($P, $C);
83
84 sub poll($) {
85 (vec my $v, fileno $P, 1) = 1;
86 CORE::select $v, undef, undef, $_[0]
87 }
88
89 sub server {
90 my $expected;# do we expect a program exit?
91 my $heartbeat;
92
93 $AUTORESTART = 0;
94
95 local $SIG{HUP} = 'IGNORE';
96 local $SIG{INT} = 'IGNORE';
97 local $SIG{TERM} = 'IGNORE';
98
99 while () {
100 if ($heartbeat) {
101 unless (poll $heartbeat) {
102 $expected = 1;
103 warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
104 kill 9, $PID;
105 last;
106 }
107 }
108
109 sysread $P, my $cmd, 1
110 or last;
111
112 if ($cmd eq chr 0) {
113 $AUTORESTART = 0;
114
115 } elsif ($cmd eq chr 1) {
116 $AUTORESTART = 1;
117
118 } elsif ($cmd eq chr 2) {
119 sysread $P, my $timeout, 1
120 or last;
121
122 $timeout = ord $timeout;
123
124 unless (poll $timeout) {
125 warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
126 kill 9, $PID;
127 }
128
129 if (sysread $P, my $dummy, 1) {
130 warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
131 kill 9, $PID;
132 }
133
134 $expected = 1;
135 last;
136
137 } elsif ($cmd eq chr 3) {
138 sysread $P, my $interval, 1
139 or last;
140
141 $heartbeat = ord $interval;
142
143 } elsif ($cmd eq chr 4) {
144 # heartbeat
145 # TODO: should only reset heartbeat timeout with \005
146
147 } else {
148 warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
149 kill 9, $PID;
150 last;
151 }
152 }
153
154 waitpid $PID, 0;
155
156 require POSIX;
157
158 my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
159
160 if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
161 $AUTORESTART = 0;
162 $expected = 1;
163 }
164
165 unless ($expected) {
166 warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
167 if $? >> 8;
168 }
169
170 if ($AUTORESTART) {
171 warn "AnyEvent::Watchdog: attempting automatic restart.\n";
172 } else {
173 if ($termsig) {
174 $SIG{$_} = 'DEFAULT' for keys %SIG;
175 kill $termsig, $$;
176 POSIX::_exit (127);
177 } else {
178 POSIX::_exit ($? >> 8);
179 }
180 }
181 }
182
183 our %SEEKPOS;
184 # due to bugs in perl, try to remember file offsets for all fds, and restore them later
185 # (the parser otherwise exhausts the input files)
186
187 # this causes perlio to flush its handles internally, so
188 # seek offsets become correct.
189 exec "."; # toi toi toi
190 #{
191 # local $SIG{CHLD} = 'DEFAULT';
192 # my $pid = fork;
193 #
194 # if ($pid) {
195 # waitpid $pid, 0;
196 # } else {
197 # kill 9, $$;
198 # }
199 #}
200
201 # now record "all" fd positions, assuming 1023 is more than enough.
202 for (0 .. 1023) {
203 open my $fh, "<&$_" or next;
204 $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
205 }
206
207 while () {
208 if ($^O =~ /mswin32/i) {
209 require AnyEvent::Util;
210 ($P, $C) = AnyEvent::Util::portable_socketpair ()
211 or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
212 } else {
213 require Socket;
214 socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
215 or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
216 }
217
218 local $SIG{CHLD} = 'DEFAULT';
219
220 $PID = fork;
221
222 unless (defined $PID) {
223 warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
224 sleep 1;
225 } elsif ($PID) {
226 # parent code
227 close $C;
228 server;
229 } else {
230 # child code
231 $ENABLED = 1;
232
233 # restore seek offsets
234 while (my ($k, $v) = each %SEEKPOS) {
235 open my $fh, "<&$k" or next;
236 sysseek $fh, $v, 0;
237 }
238
239 # continue the program normally
240 close $P;
241 last;
242 }
243 }
244
245 =item AnyEvent::Watchdog::restart [$timeout]
246
247 Tells the supervisor to restart the process when it exits, or forcefully
248 after C<$timeout> seconds (minimum 1, maximum 255, default 60).
249
250 Calls C<exit 0> to exit the process cleanly.
251
252 =cut
253
254 sub restart(;$) {
255 my ($timeout) = @_;
256
257 $timeout = 60 unless defined $timeout;
258 $timeout = 1 if $timeout < 1;
259 $timeout = 255 if $timeout > 255;
260
261 syswrite $C, "\x01\x02" . chr $timeout;
262 exit 0;
263 }
264
265 =item AnyEvent::Watchdog::autorestart [$boolean]
266
267 =item use AnyEvent::Watchdog qw(autorestart[=$boolean])
268
269 Enables or disables autorestart (initially disabled, default for
270 C<$boolean> is to enable): By default, the supervisor will exit if the
271 program exits or dies in any way. When enabling autorestart behaviour,
272 then the supervisor will try to restart the program after it dies.
273
274 Note that the supervisor will never autorestart when the child died with
275 SIGINT or SIGTERM.
276
277 =cut
278
279 sub autorestart(;$) {
280 syswrite $C, !@_ || $_[0] ? "\x01" : "\x00";
281 }
282
283 =item AnyEvent::Watchdog::heartbeat [$interval]
284
285 =item use AnyEvent::Watchdog qw(heartbeat[=$interval])
286
287 Tells the supervisor to automatically kill the program if it doesn't
288 react for C<$interval> seconds (minium 1, maximum 255, default 60) , then
289 installs an AnyEvent timer the sends a regular heartbeat to the supervisor
290 twice as often.
291
292 Exit behaviour isn't changed, so if you want a restart instead of an exit,
293 you have to call C<autorestart>.
294
295 The heartbeat frequency can be changed as often as you want, an interval
296 of C<0> disables the heartbeat check again.
297
298 =cut
299
300 sub heartbeat(;$) {
301 my ($interval) = @_;
302
303 $interval = 60 unless defined $interval;
304 $interval = 1 if $interval < 1;
305 $interval = 255 if $interval > 255;
306
307 syswrite $C, "\x03" . chr $interval;
308
309 require AE;
310 $HEARTBEAT = AE::timer (0, $interval * 0.5, sub {
311 syswrite $C, "\x04";
312 });
313 }
314
315 sub import {
316 shift;
317
318 for (@_) {
319 if (/^autorestart(?:=(.*))?$/) {
320 autorestart defined $1 ? $1 : 1;
321 } elsif (/^heartbeat(?:=(.*))?$/) {
322 heartbeat $1;
323 } else {
324 Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
325 }
326 }
327 }
328
329 =back
330
331 =head1 SEE ALSO
332
333 L<AnyEvent>.
334
335 =head1 AUTHOR
336
337 Marc Lehmann <schmorp@schmorp.de>
338 http://home.schmorp.de/
339
340 =cut
341
342 1
343