ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.2
Committed: Sun Aug 2 15:49:25 2009 UTC (14 years, 9 months ago) by root
Branch: MAIN
CVS Tags: rel-0_1
Changes since 1.1: +2 -0 lines
Log Message:
0.1

File Contents

# Content
1 =head1 NAME
2
3 AnyEvent::Watchdog - generic watchdog/program restarter
4
5 =head1 SYNOPSIS
6
7 # MUST be use'd as the very first thing in the main program
8 use AnyEvent::Watchdog;
9
10 =head1 DESCRIPTION
11
12 This module implements a watchdog that can repeatedly fork the program and
13 thus effectively restart it - as soon as the module is use'd, it will fork
14 the program (if possible) and continue to run it normally in the child,
15 while the parent becomes a supervisor.
16
17 The child can then ask the supervisor to restart itself instead of
18 exiting, or ask the supervisor to restart it gracefully or forcefully.
19
20 B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
21 program. It will cause weird effects when used from another module, as
22 perl does not expect to be forked inside C<BEGIN> blocks.
23
24 =head1 RECIPES
25
26 Use AnyEvent::Watchdog solely as a convinient on-demand-restarter:
27
28 use AnyEvent::Watchdog;
29
30 # and whenever you wnat to restart (e.g. to upgrade code):
31 AnyEvent::Watchdog::restart;
32
33 Use AnyEvent::Watchdog to kill the program and exit when the event loop
34 fails to run for more than two minutes:
35
36 use AnyEvent::Watchdog qw(autorestart heartbeat=120);
37
38 Use AnyEvent::Watchdog to automatically restart the program
39 when it fails to handle events for longer than 5 minutes:
40
41 use AnyEvent::Watchdog qw(autorestart heartbeat=300);
42
43 =head1 FUNCTIONS
44
45 The module supports the following functions:
46
47 =over 4
48
49 =cut
50
51 package AnyEvent::Watchdog;
52
53 # load modules we will use later anyways
54 use common::sense;
55
56 use Carp ();
57
58 our $VERSION = '0.1';
59
60 our $PID; # child pid
61 our $ENABLED = 1;
62 our $AUTORESTART; # actually exit
63 our $HEARTBEAT;
64 our ($P, $C);
65
66 sub poll($) {
67 (vec my $v, fileno $P, 1) = 1;
68 CORE::select $v, undef, undef, $_[0]
69 }
70
71 sub server {
72 my $expected;# do we expect a program exit?
73 my $heartbeat;
74
75 $AUTORESTART = 0;
76
77 local $SIG{HUP} = 'IGNORE';
78 local $SIG{INT} = 'IGNORE';
79 local $SIG{TERM} = 'IGNORE';
80
81 while () {
82 if ($heartbeat) {
83 unless (poll $heartbeat) {
84 $expected = 1;
85 warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
86 kill 9, $PID;
87 last;
88 }
89 }
90
91 sysread $P, my $cmd, 1
92 or last;
93
94 if ($cmd eq chr 0) {
95 $AUTORESTART = 0;
96
97 } elsif ($cmd eq chr 1) {
98 $AUTORESTART = 1;
99
100 } elsif ($cmd eq chr 2) {
101 sysread $P, my $timeout, 1
102 or last;
103
104 $timeout = ord $timeout;
105
106 unless (poll $timeout) {
107 warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
108 kill 9, $PID;
109 }
110
111 if (sysread $P, my $dummy, 1) {
112 warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
113 kill 9, $PID;
114 }
115
116 $expected = 1;
117 last;
118
119 } elsif ($cmd eq chr 3) {
120 sysread $P, my $interval, 1
121 or last;
122
123 $heartbeat = ord $interval
124 unless defined $heartbeat;
125
126 } elsif ($cmd eq chr 4) {
127 # heartbeat
128 # TODO: should only reset heartbeat timeout with \005
129
130 } else {
131 warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
132 kill 9, $PID;
133 last;
134 }
135 }
136
137 waitpid $PID, 0;
138
139 require POSIX;
140
141 my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
142
143 if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
144 $AUTORESTART = 0;
145 $expected = 1;
146 }
147
148 unless ($expected) {
149 warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
150 if $? >> 8;
151 }
152
153 if ($AUTORESTART) {
154 warn "AnyEvent::Watchdog: attempting automatic restart.\n";
155 } else {
156 if ($termsig) {
157 $SIG{$_} = 'DEFAULT' for keys %SIG;
158 kill $termsig, $$;
159 POSIX::_exit (127);
160 } else {
161 POSIX::_exit ($? >> 8);
162 }
163 }
164 }
165
166 our %SEEKPOS;
167 # due to bugs in perl, try to remember file offsets for all fds, and restore them later
168 # (the parser otherwise exhausts the input files)
169
170 # this causes perlio to flush it's handles internally, so
171 # seek offsets become correct.
172 exec "."; # toi toi toi
173 #{
174 # local $SIG{CHLD} = 'DEFAULT';
175 # my $pid = fork;
176 #
177 # if ($pid) {
178 # waitpid $pid, 0;
179 # } else {
180 # kill 9, $$;
181 # }
182 #}
183
184 # now records all fd positions
185 for (0 .. 1023) {
186 open my $fh, "<&$_" or next;
187 $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
188 }
189
190 while () {
191 if ($^O =~ /mswin32/i) {
192 require AnyEvent::Util;
193 ($P, $C) = AnyEvent::Util::portable_socketpair ()
194 or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
195 } else {
196 require Socket;
197 socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
198 or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
199 }
200
201 local $SIG{CHLD} = 'DEFAULT';
202
203 $PID = fork;
204
205 unless (defined $PID) {
206 warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
207 sleep 1;
208 } elsif ($PID) {
209 close $C;
210 server;
211 } else {
212 # restore seek offsets
213 while (my ($k, $v) = each %SEEKPOS) {
214 open my $fh, "<&$k" or next;
215 sysseek $fh, $v, 0;
216 }
217
218 # continue the program normally
219 close $P;
220 last;
221 }
222 }
223
224 =item AnyEvent::Watchdog::restart [$timeout]
225
226 Tells the supervisor to restart the process when it exits, or forcefully
227 after C<$timeout> seconds (minimum 1, maximum 255, default 60).
228
229 Calls C<exit 0> to exit the process cleanly.
230
231 =cut
232
233 sub restart(;$) {
234 my ($timeout) = @_;
235
236 $timeout = 60 unless defined $timeout;
237 $timeout = 1 if $timeout < 1;
238 $timeout = 255 if $timeout > 255;
239
240 syswrite $C, "\x01\x02" . chr $timeout;
241 exit 0;
242 }
243
244 =item AnyEvent::Watchdog::autorestart [$boolean]
245
246 =item use AnyEvent::Watchdog qw(autorestart[=$boolean])
247
248 Enables or disables autorestart (initially disabled, default for
249 C<$boolean> is to enable): By default, the supervisor will exit if the
250 program exits or dies in any way. When enabling autorestart behaviour,
251 then the supervisor will try to restart the program after it dies.
252
253 Note that the supervisor will never autorestart when the child died with
254 SIGINT or SIGTERM.
255
256 =cut
257
258 sub autorestart(;$) {
259 syswrite $C, !@_ || $_[0] ? "\x01" : "\x00";
260 }
261
262 =item AnyEvent::Watchdog::heartbeat [$interval]
263
264 =item use AnyEvent::Watchdog qw(heartbeat[=$interval])
265
266 Tells the supervisor to automatically kill the program if it doesn't
267 react for C<$interval> seconds (minium 1, maximum 255, default 60) , then
268 installs an AnyEvent timer the sends a regular heartbeat to the supervisor
269 twice as often.
270
271 Exit behaviour isn't changed, so if you want a restart instead of an exit,
272 you have to call C<autorestart>.
273
274 Once enabled, the heartbeat cannot be switched off.
275
276 =cut
277
278 sub heartbeat(;$) {
279 my ($interval) = @_;
280
281 $interval = 60 unless defined $interval;
282 $interval = 1 if $interval < 1;
283 $interval = 255 if $interval > 255;
284
285 syswrite $C, "\x03" . chr $interval;
286
287 require AE;
288 $HEARTBEAT = AE::timer (0, $interval * 0.5, sub {
289 syswrite $C, "\x04";
290 });
291 }
292
293 sub import {
294 shift;
295
296 for (@_) {
297 if (/^autorestart(?:=(.*))?$/) {
298 autorestart defined $1 ? $1 : 1;
299 } elsif (/^heartbeat(?:=(.*))?$/) {
300 heartbeat $1;
301 } else {
302 Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
303 }
304 }
305 }
306
307 =back
308
309 =head1 SEE ALSO
310
311 L<AnyEvent>.
312
313 =head1 AUTHOR
314
315 Marc Lehmann <schmorp@schmorp.de>
316 http://home.schmorp.de/
317
318 =cut
319
320 1
321