ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.1
Committed: Sun Aug 2 14:51:29 2009 UTC (14 years, 9 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::Watchdog - generic watchdog/program restarter
4    
5     =head1 SYNOPSIS
6    
7     # MUST be use'd as the very first thing in the main program
8     use AnyEvent::Watchdog;
9    
10     =head1 DESCRIPTION
11    
12     This module implements a watchdog that can repeatedly fork the program and
13     thus effectively restart it - as soon as the module is use'd, it will fork
14     the program (if possible) and continue to run it normally in the child,
15     while the parent becomes a supervisor.
16    
17     The child can then ask the supervisor to restart itself instead of
18     exiting, or ask the supervisor to restart it gracefully or forcefully.
19    
20     B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
21     program. It will cause weird effects when used from another module, as
22     perl does not expect to be forked inside C<BEGIN> blocks.
23    
24     =head1 RECIPES
25    
26     Use AnyEvent::Watchdog solely as a convinient on-demand-restarter:
27    
28     use AnyEvent::Watchdog;
29    
30     # and whenever you wnat to restart (e.g. to upgrade code):
31     AnyEvent::Watchdog::restart;
32    
33     Use AnyEvent::Watchdog to kill the program and exit when the event loop
34     fails to run for more than two minutes:
35    
36     use AnyEvent::Watchdog qw(autorestart heartbeat=120);
37    
38     Use AnyEvent::Watchdog to automatically restart the program
39     when it fails to handle events for longer than 5 minutes:
40    
41     use AnyEvent::Watchdog qw(autorestart heartbeat=300);
42    
43     =head1 FUNCTIONS
44    
45     The module supports the following functions:
46    
47     =over 4
48    
49     =cut
50    
51     package AnyEvent::Watchdog;
52    
53     # load modules we will use later anyways
54     use common::sense;
55    
56     use Carp ();
57    
58     our $VERSION = '0.1';
59    
60     our $PID; # child pid
61     our $ENABLED = 1;
62     our $AUTORESTART; # actually exit
63     our $HEARTBEAT;
64     our ($P, $C);
65    
66     sub poll($) {
67     (vec my $v, fileno $P, 1) = 1;
68     CORE::select $v, undef, undef, $_[0]
69     }
70    
71     sub server {
72     my $expected;# do we expect a program exit?
73     my $heartbeat;
74    
75     $AUTORESTART = 0;
76    
77     local $SIG{HUP} = 'IGNORE';
78     local $SIG{INT} = 'IGNORE';
79     local $SIG{TERM} = 'IGNORE';
80    
81     while () {
82     if ($heartbeat) {
83     unless (poll $heartbeat) {
84     $expected = 1;
85     warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
86     kill 9, $PID;
87     last;
88     }
89     }
90    
91     sysread $P, my $cmd, 1
92     or last;
93    
94     if ($cmd eq chr 0) {
95     $AUTORESTART = 0;
96    
97     } elsif ($cmd eq chr 1) {
98     $AUTORESTART = 1;
99    
100     } elsif ($cmd eq chr 2) {
101     sysread $P, my $timeout, 1
102     or last;
103    
104     $timeout = ord $timeout;
105    
106     unless (poll $timeout) {
107     warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
108     kill 9, $PID;
109     }
110    
111     if (sysread $P, my $dummy, 1) {
112     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
113     kill 9, $PID;
114     }
115    
116     $expected = 1;
117     last;
118    
119     } elsif ($cmd eq chr 3) {
120     sysread $P, my $interval, 1
121     or last;
122    
123     $heartbeat = ord $interval
124     unless defined $heartbeat;
125    
126     } elsif ($cmd eq chr 4) {
127     # heartbeat
128     # TODO: should only reset heartbeat timeout with \005
129    
130     } else {
131     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
132     kill 9, $PID;
133     last;
134     }
135     }
136    
137     waitpid $PID, 0;
138    
139     require POSIX;
140    
141     my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
142    
143     if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
144     $AUTORESTART = 0;
145     $expected = 1;
146     }
147    
148     unless ($expected) {
149     warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
150     if $? >> 8;
151     }
152    
153     if ($AUTORESTART) {
154     warn "AnyEvent::Watchdog: attempting automatic restart.\n";
155     } else {
156     if ($termsig) {
157     $SIG{$_} = 'DEFAULT' for keys %SIG;
158     kill $termsig, $$;
159     POSIX::_exit (127);
160     } else {
161     POSIX::_exit ($? >> 8);
162     }
163     }
164     }
165    
166     our %SEEKPOS;
167     # due to bugs in perl, try to remember file offsets for all fds, and restore them later
168     # (the parser otherwise exhausts the input files)
169    
170     # this causes perlio to flush it's handles internally, so
171     # seek offsets become correct.
172     exec "."; # toi toi toi
173     #{
174     # local $SIG{CHLD} = 'DEFAULT';
175     # my $pid = fork;
176     #
177     # if ($pid) {
178     # waitpid $pid, 0;
179     # } else {
180     # kill 9, $$;
181     # }
182     #}
183    
184     # now records all fd positions
185     for (0 .. 1023) {
186     open my $fh, "<&$_" or next;
187     $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
188     }
189    
190     while () {
191     if ($^O =~ /mswin32/i) {
192     require AnyEvent::Util;
193     ($P, $C) = AnyEvent::Util::portable_socketpair ()
194     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
195     } else {
196     require Socket;
197     socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
198     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
199     }
200    
201     local $SIG{CHLD} = 'DEFAULT';
202    
203     $PID = fork;
204    
205     unless (defined $PID) {
206     warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
207     sleep 1;
208     } elsif ($PID) {
209     close $C;
210     server;
211     } else {
212     # restore seek offsets
213     while (my ($k, $v) = each %SEEKPOS) {
214     open my $fh, "<&$k" or next;
215     sysseek $fh, $v, 0;
216     }
217    
218     # continue the program normally
219     close $P;
220     last;
221     }
222     }
223    
224     =item AnyEvent::Watchdog::restart [$timeout]
225    
226     Tells the supervisor to restart the process when it exits, or forcefully
227     after C<$timeout> seconds (minimum 1, maximum 255, default 60).
228    
229     Calls C<exit 0> to exit the process cleanly.
230    
231     =cut
232    
233     sub restart(;$) {
234     my ($timeout) = @_;
235    
236     $timeout = 60 unless defined $timeout;
237     $timeout = 1 if $timeout < 1;
238     $timeout = 255 if $timeout > 255;
239    
240     syswrite $C, "\x01\x02" . chr $timeout;
241     exit 0;
242     }
243    
244     =item AnyEvent::Watchdog::autorestart [$boolean]
245    
246     =item use AnyEvent::Watchdog qw(autorestart[=$boolean])
247    
248     Enables or disables autorestart (initially disabled, default for
249     C<$boolean> is to enable): By default, the supervisor will exit if the
250     program exits or dies in any way. When enabling autorestart behaviour,
251     then the supervisor will try to restart the program after it dies.
252    
253     Note that the supervisor will never autorestart when the child died with
254     SIGINT or SIGTERM.
255    
256     =cut
257    
258     sub autorestart(;$) {
259     syswrite $C, !@_ || $_[0] ? "\x01" : "\x00";
260     }
261    
262     =item AnyEvent::Watchdog::heartbeat [$interval]
263    
264     =item use AnyEvent::Watchdog qw(heartbeat[=$interval])
265    
266     Tells the supervisor to automatically kill the program if it doesn't
267     react for C<$interval> seconds (minium 1, maximum 255, default 60) , then
268     installs an AnyEvent timer the sends a regular heartbeat to the supervisor
269     twice as often.
270    
271     Exit behaviour isn't changed, so if you want a restart instead of an exit,
272     you have to call C<autorestart>.
273    
274     Once enabled, the heartbeat cannot be switched off.
275    
276     =cut
277    
278     sub heartbeat(;$) {
279     my ($interval) = @_;
280    
281     $interval = 60 unless defined $interval;
282     $interval = 1 if $interval < 1;
283     $interval = 255 if $interval > 255;
284    
285     syswrite $C, "\x03" . chr $interval;
286    
287     require AE;
288     $HEARTBEAT = AE::timer (0, $interval * 0.5, sub {
289     syswrite $C, "\x04";
290     });
291     }
292    
293     sub import {
294     shift;
295    
296     for (@_) {
297     if (/^autorestart(?:=(.*))?$/) {
298     autorestart defined $1 ? $1 : 1;
299     } elsif (/^heartbeat(?:=(.*))?$/) {
300     heartbeat $1;
301     } else {
302     Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
303     }
304     }
305     }
306    
307     =head1 SEE ALSO
308    
309     L<AnyEvent>.
310    
311     =head1 AUTHOR
312    
313     Marc Lehmann <schmorp@schmorp.de>
314     http://home.schmorp.de/
315    
316     =cut
317    
318     1
319