ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.6
Committed: Sun Aug 30 17:20:27 2009 UTC (14 years, 8 months ago) by root
Branch: MAIN
Changes since 1.5: +25 -3 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::Watchdog - generic watchdog/program restarter
4    
5     =head1 SYNOPSIS
6    
7     # MUST be use'd as the very first thing in the main program
8     use AnyEvent::Watchdog;
9    
10     =head1 DESCRIPTION
11    
12     This module implements a watchdog that can repeatedly fork the program and
13     thus effectively restart it - as soon as the module is use'd, it will fork
14     the program (if possible) and continue to run it normally in the child,
15     while the parent becomes a supervisor.
16    
17     The child can then ask the supervisor to restart itself instead of
18     exiting, or ask the supervisor to restart it gracefully or forcefully.
19    
20     B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
21     program. It will cause weird effects when used from another module, as
22     perl does not expect to be forked inside C<BEGIN> blocks.
23    
24     =head1 RECIPES
25    
26     Use AnyEvent::Watchdog solely as a convinient on-demand-restarter:
27    
28     use AnyEvent::Watchdog;
29    
30     # and whenever you wnat to restart (e.g. to upgrade code):
31     AnyEvent::Watchdog::restart;
32    
33     Use AnyEvent::Watchdog to kill the program and exit when the event loop
34     fails to run for more than two minutes:
35    
36     use AnyEvent::Watchdog qw(autorestart heartbeat=120);
37    
38     Use AnyEvent::Watchdog to automatically restart the program
39     when it fails to handle events for longer than 5 minutes:
40    
41     use AnyEvent::Watchdog qw(autorestart heartbeat=300);
42    
43 root 1.6 =head1 VARIABLES/FUNCTIONS
44 root 1.1
45 root 1.6 The module supports the following variables and functions:
46 root 1.1
47     =over 4
48    
49     =cut
50    
51     package AnyEvent::Watchdog;
52    
53     # load modules we will use later anyways
54     use common::sense;
55    
56     use Carp ();
57    
58 root 1.4 our $VERSION = '0.9';
59 root 1.1
60 root 1.6 =item $AnyEvent::Watchdog::ENABLED
61    
62     This is true when the program is running under the regime of
63     AnyEvent::Watchdog. Semi-obviously, you should I<NOT> C<use> or C<require>
64     this module before looking at this variable, and neither should you try
65     to load this module unless in the main program, rather use an idiom like
66     this:
67    
68     $AnyEvent::Watchdog::ENABLED
69     or die "watchdog not enabled...";
70     AnyEvent::Watchdog::restart (60); # MUST use ()
71    
72     Note that if this variable is defined, but false, then AnyEvent::Watchdog
73     is running, but you are in the watchdog process - you probably did
74     something very wrong in this case.
75    
76     =cut
77    
78 root 1.1 our $PID; # child pid
79 root 1.6 our $ENABLED = 0;
80 root 1.1 our $AUTORESTART; # actually exit
81     our $HEARTBEAT;
82     our ($P, $C);
83    
84     sub poll($) {
85     (vec my $v, fileno $P, 1) = 1;
86     CORE::select $v, undef, undef, $_[0]
87     }
88    
89     sub server {
90     my $expected;# do we expect a program exit?
91     my $heartbeat;
92    
93     $AUTORESTART = 0;
94    
95     local $SIG{HUP} = 'IGNORE';
96     local $SIG{INT} = 'IGNORE';
97     local $SIG{TERM} = 'IGNORE';
98    
99     while () {
100     if ($heartbeat) {
101     unless (poll $heartbeat) {
102     $expected = 1;
103     warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
104     kill 9, $PID;
105     last;
106     }
107     }
108    
109     sysread $P, my $cmd, 1
110     or last;
111    
112     if ($cmd eq chr 0) {
113     $AUTORESTART = 0;
114    
115     } elsif ($cmd eq chr 1) {
116     $AUTORESTART = 1;
117    
118     } elsif ($cmd eq chr 2) {
119     sysread $P, my $timeout, 1
120     or last;
121    
122     $timeout = ord $timeout;
123    
124     unless (poll $timeout) {
125     warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
126     kill 9, $PID;
127     }
128    
129     if (sysread $P, my $dummy, 1) {
130     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
131     kill 9, $PID;
132     }
133    
134     $expected = 1;
135     last;
136    
137     } elsif ($cmd eq chr 3) {
138     sysread $P, my $interval, 1
139     or last;
140    
141 root 1.3 $heartbeat = ord $interval;
142 root 1.1
143     } elsif ($cmd eq chr 4) {
144     # heartbeat
145     # TODO: should only reset heartbeat timeout with \005
146    
147     } else {
148     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
149     kill 9, $PID;
150     last;
151     }
152     }
153    
154     waitpid $PID, 0;
155    
156     require POSIX;
157    
158     my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
159    
160     if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
161     $AUTORESTART = 0;
162     $expected = 1;
163     }
164    
165     unless ($expected) {
166     warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
167     if $? >> 8;
168     }
169    
170     if ($AUTORESTART) {
171     warn "AnyEvent::Watchdog: attempting automatic restart.\n";
172     } else {
173     if ($termsig) {
174     $SIG{$_} = 'DEFAULT' for keys %SIG;
175     kill $termsig, $$;
176     POSIX::_exit (127);
177     } else {
178     POSIX::_exit ($? >> 8);
179     }
180     }
181     }
182    
183     our %SEEKPOS;
184     # due to bugs in perl, try to remember file offsets for all fds, and restore them later
185     # (the parser otherwise exhausts the input files)
186    
187 root 1.5 # this causes perlio to flush its handles internally, so
188 root 1.1 # seek offsets become correct.
189     exec "."; # toi toi toi
190     #{
191     # local $SIG{CHLD} = 'DEFAULT';
192     # my $pid = fork;
193     #
194     # if ($pid) {
195     # waitpid $pid, 0;
196     # } else {
197     # kill 9, $$;
198     # }
199     #}
200    
201 root 1.5 # now record "all" fd positions, assuming 1023 is more than enough.
202 root 1.1 for (0 .. 1023) {
203     open my $fh, "<&$_" or next;
204     $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
205     }
206    
207     while () {
208     if ($^O =~ /mswin32/i) {
209     require AnyEvent::Util;
210     ($P, $C) = AnyEvent::Util::portable_socketpair ()
211     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
212     } else {
213     require Socket;
214     socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
215     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
216     }
217    
218     local $SIG{CHLD} = 'DEFAULT';
219    
220     $PID = fork;
221    
222     unless (defined $PID) {
223     warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
224     sleep 1;
225     } elsif ($PID) {
226 root 1.6 # parent code
227 root 1.1 close $C;
228     server;
229     } else {
230 root 1.6 # child code
231     $ENABLED = 1;
232    
233 root 1.1 # restore seek offsets
234     while (my ($k, $v) = each %SEEKPOS) {
235     open my $fh, "<&$k" or next;
236     sysseek $fh, $v, 0;
237     }
238    
239     # continue the program normally
240     close $P;
241     last;
242     }
243     }
244    
245     =item AnyEvent::Watchdog::restart [$timeout]
246    
247     Tells the supervisor to restart the process when it exits, or forcefully
248     after C<$timeout> seconds (minimum 1, maximum 255, default 60).
249    
250     Calls C<exit 0> to exit the process cleanly.
251    
252     =cut
253    
254     sub restart(;$) {
255     my ($timeout) = @_;
256    
257     $timeout = 60 unless defined $timeout;
258     $timeout = 1 if $timeout < 1;
259     $timeout = 255 if $timeout > 255;
260    
261     syswrite $C, "\x01\x02" . chr $timeout;
262     exit 0;
263     }
264    
265     =item AnyEvent::Watchdog::autorestart [$boolean]
266    
267     =item use AnyEvent::Watchdog qw(autorestart[=$boolean])
268    
269     Enables or disables autorestart (initially disabled, default for
270     C<$boolean> is to enable): By default, the supervisor will exit if the
271     program exits or dies in any way. When enabling autorestart behaviour,
272     then the supervisor will try to restart the program after it dies.
273    
274     Note that the supervisor will never autorestart when the child died with
275     SIGINT or SIGTERM.
276    
277     =cut
278    
279     sub autorestart(;$) {
280     syswrite $C, !@_ || $_[0] ? "\x01" : "\x00";
281     }
282    
283     =item AnyEvent::Watchdog::heartbeat [$interval]
284    
285     =item use AnyEvent::Watchdog qw(heartbeat[=$interval])
286    
287     Tells the supervisor to automatically kill the program if it doesn't
288     react for C<$interval> seconds (minium 1, maximum 255, default 60) , then
289     installs an AnyEvent timer the sends a regular heartbeat to the supervisor
290     twice as often.
291    
292     Exit behaviour isn't changed, so if you want a restart instead of an exit,
293     you have to call C<autorestart>.
294    
295 root 1.3 The heartbeat frequency can be changed as often as you want, an interval
296     of C<0> disables the heartbeat check again.
297 root 1.1
298     =cut
299    
300     sub heartbeat(;$) {
301     my ($interval) = @_;
302    
303     $interval = 60 unless defined $interval;
304     $interval = 1 if $interval < 1;
305     $interval = 255 if $interval > 255;
306    
307     syswrite $C, "\x03" . chr $interval;
308    
309     require AE;
310     $HEARTBEAT = AE::timer (0, $interval * 0.5, sub {
311     syswrite $C, "\x04";
312     });
313     }
314    
315     sub import {
316     shift;
317    
318     for (@_) {
319     if (/^autorestart(?:=(.*))?$/) {
320     autorestart defined $1 ? $1 : 1;
321     } elsif (/^heartbeat(?:=(.*))?$/) {
322     heartbeat $1;
323     } else {
324     Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
325     }
326     }
327     }
328    
329 root 1.2 =back
330    
331 root 1.1 =head1 SEE ALSO
332    
333     L<AnyEvent>.
334    
335     =head1 AUTHOR
336    
337     Marc Lehmann <schmorp@schmorp.de>
338     http://home.schmorp.de/
339    
340     =cut
341    
342     1
343