ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.5
Committed: Fri Aug 14 23:08:54 2009 UTC (14 years, 9 months ago) by root
Branch: MAIN
Changes since 1.4: +2 -2 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 AnyEvent::Watchdog - generic watchdog/program restarter
4
5 =head1 SYNOPSIS
6
7 # MUST be use'd as the very first thing in the main program
8 use AnyEvent::Watchdog;
9
10 =head1 DESCRIPTION
11
12 This module implements a watchdog that can repeatedly fork the program and
13 thus effectively restart it - as soon as the module is use'd, it will fork
14 the program (if possible) and continue to run it normally in the child,
15 while the parent becomes a supervisor.
16
17 The child can then ask the supervisor to restart itself instead of
18 exiting, or ask the supervisor to restart it gracefully or forcefully.
19
20 B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
21 program. It will cause weird effects when used from another module, as
22 perl does not expect to be forked inside C<BEGIN> blocks.
23
24 =head1 RECIPES
25
26 Use AnyEvent::Watchdog solely as a convinient on-demand-restarter:
27
28 use AnyEvent::Watchdog;
29
30 # and whenever you wnat to restart (e.g. to upgrade code):
31 AnyEvent::Watchdog::restart;
32
33 Use AnyEvent::Watchdog to kill the program and exit when the event loop
34 fails to run for more than two minutes:
35
36 use AnyEvent::Watchdog qw(autorestart heartbeat=120);
37
38 Use AnyEvent::Watchdog to automatically restart the program
39 when it fails to handle events for longer than 5 minutes:
40
41 use AnyEvent::Watchdog qw(autorestart heartbeat=300);
42
43 =head1 FUNCTIONS
44
45 The module supports the following functions:
46
47 =over 4
48
49 =cut
50
51 package AnyEvent::Watchdog;
52
53 # load modules we will use later anyways
54 use common::sense;
55
56 use Carp ();
57
58 our $VERSION = '0.9';
59
60 our $PID; # child pid
61 our $ENABLED = 1;
62 our $AUTORESTART; # actually exit
63 our $HEARTBEAT;
64 our ($P, $C);
65
66 sub poll($) {
67 (vec my $v, fileno $P, 1) = 1;
68 CORE::select $v, undef, undef, $_[0]
69 }
70
71 sub server {
72 my $expected;# do we expect a program exit?
73 my $heartbeat;
74
75 $AUTORESTART = 0;
76
77 local $SIG{HUP} = 'IGNORE';
78 local $SIG{INT} = 'IGNORE';
79 local $SIG{TERM} = 'IGNORE';
80
81 while () {
82 if ($heartbeat) {
83 unless (poll $heartbeat) {
84 $expected = 1;
85 warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
86 kill 9, $PID;
87 last;
88 }
89 }
90
91 sysread $P, my $cmd, 1
92 or last;
93
94 if ($cmd eq chr 0) {
95 $AUTORESTART = 0;
96
97 } elsif ($cmd eq chr 1) {
98 $AUTORESTART = 1;
99
100 } elsif ($cmd eq chr 2) {
101 sysread $P, my $timeout, 1
102 or last;
103
104 $timeout = ord $timeout;
105
106 unless (poll $timeout) {
107 warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
108 kill 9, $PID;
109 }
110
111 if (sysread $P, my $dummy, 1) {
112 warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
113 kill 9, $PID;
114 }
115
116 $expected = 1;
117 last;
118
119 } elsif ($cmd eq chr 3) {
120 sysread $P, my $interval, 1
121 or last;
122
123 $heartbeat = ord $interval;
124
125 } elsif ($cmd eq chr 4) {
126 # heartbeat
127 # TODO: should only reset heartbeat timeout with \005
128
129 } else {
130 warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
131 kill 9, $PID;
132 last;
133 }
134 }
135
136 waitpid $PID, 0;
137
138 require POSIX;
139
140 my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
141
142 if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
143 $AUTORESTART = 0;
144 $expected = 1;
145 }
146
147 unless ($expected) {
148 warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
149 if $? >> 8;
150 }
151
152 if ($AUTORESTART) {
153 warn "AnyEvent::Watchdog: attempting automatic restart.\n";
154 } else {
155 if ($termsig) {
156 $SIG{$_} = 'DEFAULT' for keys %SIG;
157 kill $termsig, $$;
158 POSIX::_exit (127);
159 } else {
160 POSIX::_exit ($? >> 8);
161 }
162 }
163 }
164
165 our %SEEKPOS;
166 # due to bugs in perl, try to remember file offsets for all fds, and restore them later
167 # (the parser otherwise exhausts the input files)
168
169 # this causes perlio to flush its handles internally, so
170 # seek offsets become correct.
171 exec "."; # toi toi toi
172 #{
173 # local $SIG{CHLD} = 'DEFAULT';
174 # my $pid = fork;
175 #
176 # if ($pid) {
177 # waitpid $pid, 0;
178 # } else {
179 # kill 9, $$;
180 # }
181 #}
182
183 # now record "all" fd positions, assuming 1023 is more than enough.
184 for (0 .. 1023) {
185 open my $fh, "<&$_" or next;
186 $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
187 }
188
189 while () {
190 if ($^O =~ /mswin32/i) {
191 require AnyEvent::Util;
192 ($P, $C) = AnyEvent::Util::portable_socketpair ()
193 or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
194 } else {
195 require Socket;
196 socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
197 or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
198 }
199
200 local $SIG{CHLD} = 'DEFAULT';
201
202 $PID = fork;
203
204 unless (defined $PID) {
205 warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
206 sleep 1;
207 } elsif ($PID) {
208 close $C;
209 server;
210 } else {
211 # restore seek offsets
212 while (my ($k, $v) = each %SEEKPOS) {
213 open my $fh, "<&$k" or next;
214 sysseek $fh, $v, 0;
215 }
216
217 # continue the program normally
218 close $P;
219 last;
220 }
221 }
222
223 =item AnyEvent::Watchdog::restart [$timeout]
224
225 Tells the supervisor to restart the process when it exits, or forcefully
226 after C<$timeout> seconds (minimum 1, maximum 255, default 60).
227
228 Calls C<exit 0> to exit the process cleanly.
229
230 =cut
231
232 sub restart(;$) {
233 my ($timeout) = @_;
234
235 $timeout = 60 unless defined $timeout;
236 $timeout = 1 if $timeout < 1;
237 $timeout = 255 if $timeout > 255;
238
239 syswrite $C, "\x01\x02" . chr $timeout;
240 exit 0;
241 }
242
243 =item AnyEvent::Watchdog::autorestart [$boolean]
244
245 =item use AnyEvent::Watchdog qw(autorestart[=$boolean])
246
247 Enables or disables autorestart (initially disabled, default for
248 C<$boolean> is to enable): By default, the supervisor will exit if the
249 program exits or dies in any way. When enabling autorestart behaviour,
250 then the supervisor will try to restart the program after it dies.
251
252 Note that the supervisor will never autorestart when the child died with
253 SIGINT or SIGTERM.
254
255 =cut
256
257 sub autorestart(;$) {
258 syswrite $C, !@_ || $_[0] ? "\x01" : "\x00";
259 }
260
261 =item AnyEvent::Watchdog::heartbeat [$interval]
262
263 =item use AnyEvent::Watchdog qw(heartbeat[=$interval])
264
265 Tells the supervisor to automatically kill the program if it doesn't
266 react for C<$interval> seconds (minium 1, maximum 255, default 60) , then
267 installs an AnyEvent timer the sends a regular heartbeat to the supervisor
268 twice as often.
269
270 Exit behaviour isn't changed, so if you want a restart instead of an exit,
271 you have to call C<autorestart>.
272
273 The heartbeat frequency can be changed as often as you want, an interval
274 of C<0> disables the heartbeat check again.
275
276 =cut
277
278 sub heartbeat(;$) {
279 my ($interval) = @_;
280
281 $interval = 60 unless defined $interval;
282 $interval = 1 if $interval < 1;
283 $interval = 255 if $interval > 255;
284
285 syswrite $C, "\x03" . chr $interval;
286
287 require AE;
288 $HEARTBEAT = AE::timer (0, $interval * 0.5, sub {
289 syswrite $C, "\x04";
290 });
291 }
292
293 sub import {
294 shift;
295
296 for (@_) {
297 if (/^autorestart(?:=(.*))?$/) {
298 autorestart defined $1 ? $1 : 1;
299 } elsif (/^heartbeat(?:=(.*))?$/) {
300 heartbeat $1;
301 } else {
302 Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
303 }
304 }
305 }
306
307 =back
308
309 =head1 SEE ALSO
310
311 L<AnyEvent>.
312
313 =head1 AUTHOR
314
315 Marc Lehmann <schmorp@schmorp.de>
316 http://home.schmorp.de/
317
318 =cut
319
320 1
321