ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.4
Committed: Fri Aug 14 23:07:09 2009 UTC (14 years, 9 months ago) by root
Branch: MAIN
CVS Tags: rel-0_9
Changes since 1.3: +1 -1 lines
Log Message:
0.9

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::Watchdog - generic watchdog/program restarter
4    
5     =head1 SYNOPSIS
6    
7     # MUST be use'd as the very first thing in the main program
8     use AnyEvent::Watchdog;
9    
10     =head1 DESCRIPTION
11    
12     This module implements a watchdog that can repeatedly fork the program and
13     thus effectively restart it - as soon as the module is use'd, it will fork
14     the program (if possible) and continue to run it normally in the child,
15     while the parent becomes a supervisor.
16    
17     The child can then ask the supervisor to restart itself instead of
18     exiting, or ask the supervisor to restart it gracefully or forcefully.
19    
20     B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
21     program. It will cause weird effects when used from another module, as
22     perl does not expect to be forked inside C<BEGIN> blocks.
23    
24     =head1 RECIPES
25    
26     Use AnyEvent::Watchdog solely as a convinient on-demand-restarter:
27    
28     use AnyEvent::Watchdog;
29    
30     # and whenever you wnat to restart (e.g. to upgrade code):
31     AnyEvent::Watchdog::restart;
32    
33     Use AnyEvent::Watchdog to kill the program and exit when the event loop
34     fails to run for more than two minutes:
35    
36     use AnyEvent::Watchdog qw(autorestart heartbeat=120);
37    
38     Use AnyEvent::Watchdog to automatically restart the program
39     when it fails to handle events for longer than 5 minutes:
40    
41     use AnyEvent::Watchdog qw(autorestart heartbeat=300);
42    
43     =head1 FUNCTIONS
44    
45     The module supports the following functions:
46    
47     =over 4
48    
49     =cut
50    
51     package AnyEvent::Watchdog;
52    
53     # load modules we will use later anyways
54     use common::sense;
55    
56     use Carp ();
57    
58 root 1.4 our $VERSION = '0.9';
59 root 1.1
60     our $PID; # child pid
61     our $ENABLED = 1;
62     our $AUTORESTART; # actually exit
63     our $HEARTBEAT;
64     our ($P, $C);
65    
66     sub poll($) {
67     (vec my $v, fileno $P, 1) = 1;
68     CORE::select $v, undef, undef, $_[0]
69     }
70    
71     sub server {
72     my $expected;# do we expect a program exit?
73     my $heartbeat;
74    
75     $AUTORESTART = 0;
76    
77     local $SIG{HUP} = 'IGNORE';
78     local $SIG{INT} = 'IGNORE';
79     local $SIG{TERM} = 'IGNORE';
80    
81     while () {
82     if ($heartbeat) {
83     unless (poll $heartbeat) {
84     $expected = 1;
85     warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
86     kill 9, $PID;
87     last;
88     }
89     }
90    
91     sysread $P, my $cmd, 1
92     or last;
93    
94     if ($cmd eq chr 0) {
95     $AUTORESTART = 0;
96    
97     } elsif ($cmd eq chr 1) {
98     $AUTORESTART = 1;
99    
100     } elsif ($cmd eq chr 2) {
101     sysread $P, my $timeout, 1
102     or last;
103    
104     $timeout = ord $timeout;
105    
106     unless (poll $timeout) {
107     warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
108     kill 9, $PID;
109     }
110    
111     if (sysread $P, my $dummy, 1) {
112     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
113     kill 9, $PID;
114     }
115    
116     $expected = 1;
117     last;
118    
119     } elsif ($cmd eq chr 3) {
120     sysread $P, my $interval, 1
121     or last;
122    
123 root 1.3 $heartbeat = ord $interval;
124 root 1.1
125     } elsif ($cmd eq chr 4) {
126     # heartbeat
127     # TODO: should only reset heartbeat timeout with \005
128    
129     } else {
130     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
131     kill 9, $PID;
132     last;
133     }
134     }
135    
136     waitpid $PID, 0;
137    
138     require POSIX;
139    
140     my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
141    
142     if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
143     $AUTORESTART = 0;
144     $expected = 1;
145     }
146    
147     unless ($expected) {
148     warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
149     if $? >> 8;
150     }
151    
152     if ($AUTORESTART) {
153     warn "AnyEvent::Watchdog: attempting automatic restart.\n";
154     } else {
155     if ($termsig) {
156     $SIG{$_} = 'DEFAULT' for keys %SIG;
157     kill $termsig, $$;
158     POSIX::_exit (127);
159     } else {
160     POSIX::_exit ($? >> 8);
161     }
162     }
163     }
164    
165     our %SEEKPOS;
166     # due to bugs in perl, try to remember file offsets for all fds, and restore them later
167     # (the parser otherwise exhausts the input files)
168    
169     # this causes perlio to flush it's handles internally, so
170     # seek offsets become correct.
171     exec "."; # toi toi toi
172     #{
173     # local $SIG{CHLD} = 'DEFAULT';
174     # my $pid = fork;
175     #
176     # if ($pid) {
177     # waitpid $pid, 0;
178     # } else {
179     # kill 9, $$;
180     # }
181     #}
182    
183     # now records all fd positions
184     for (0 .. 1023) {
185     open my $fh, "<&$_" or next;
186     $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
187     }
188    
189     while () {
190     if ($^O =~ /mswin32/i) {
191     require AnyEvent::Util;
192     ($P, $C) = AnyEvent::Util::portable_socketpair ()
193     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
194     } else {
195     require Socket;
196     socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
197     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
198     }
199    
200     local $SIG{CHLD} = 'DEFAULT';
201    
202     $PID = fork;
203    
204     unless (defined $PID) {
205     warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
206     sleep 1;
207     } elsif ($PID) {
208     close $C;
209     server;
210     } else {
211     # restore seek offsets
212     while (my ($k, $v) = each %SEEKPOS) {
213     open my $fh, "<&$k" or next;
214     sysseek $fh, $v, 0;
215     }
216    
217     # continue the program normally
218     close $P;
219     last;
220     }
221     }
222    
223     =item AnyEvent::Watchdog::restart [$timeout]
224    
225     Tells the supervisor to restart the process when it exits, or forcefully
226     after C<$timeout> seconds (minimum 1, maximum 255, default 60).
227    
228     Calls C<exit 0> to exit the process cleanly.
229    
230     =cut
231    
232     sub restart(;$) {
233     my ($timeout) = @_;
234    
235     $timeout = 60 unless defined $timeout;
236     $timeout = 1 if $timeout < 1;
237     $timeout = 255 if $timeout > 255;
238    
239     syswrite $C, "\x01\x02" . chr $timeout;
240     exit 0;
241     }
242    
243     =item AnyEvent::Watchdog::autorestart [$boolean]
244    
245     =item use AnyEvent::Watchdog qw(autorestart[=$boolean])
246    
247     Enables or disables autorestart (initially disabled, default for
248     C<$boolean> is to enable): By default, the supervisor will exit if the
249     program exits or dies in any way. When enabling autorestart behaviour,
250     then the supervisor will try to restart the program after it dies.
251    
252     Note that the supervisor will never autorestart when the child died with
253     SIGINT or SIGTERM.
254    
255     =cut
256    
257     sub autorestart(;$) {
258     syswrite $C, !@_ || $_[0] ? "\x01" : "\x00";
259     }
260    
261     =item AnyEvent::Watchdog::heartbeat [$interval]
262    
263     =item use AnyEvent::Watchdog qw(heartbeat[=$interval])
264    
265     Tells the supervisor to automatically kill the program if it doesn't
266     react for C<$interval> seconds (minium 1, maximum 255, default 60) , then
267     installs an AnyEvent timer the sends a regular heartbeat to the supervisor
268     twice as often.
269    
270     Exit behaviour isn't changed, so if you want a restart instead of an exit,
271     you have to call C<autorestart>.
272    
273 root 1.3 The heartbeat frequency can be changed as often as you want, an interval
274     of C<0> disables the heartbeat check again.
275 root 1.1
276     =cut
277    
278     sub heartbeat(;$) {
279     my ($interval) = @_;
280    
281     $interval = 60 unless defined $interval;
282     $interval = 1 if $interval < 1;
283     $interval = 255 if $interval > 255;
284    
285     syswrite $C, "\x03" . chr $interval;
286    
287     require AE;
288     $HEARTBEAT = AE::timer (0, $interval * 0.5, sub {
289     syswrite $C, "\x04";
290     });
291     }
292    
293     sub import {
294     shift;
295    
296     for (@_) {
297     if (/^autorestart(?:=(.*))?$/) {
298     autorestart defined $1 ? $1 : 1;
299     } elsif (/^heartbeat(?:=(.*))?$/) {
300     heartbeat $1;
301     } else {
302     Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
303     }
304     }
305     }
306    
307 root 1.2 =back
308    
309 root 1.1 =head1 SEE ALSO
310    
311     L<AnyEvent>.
312    
313     =head1 AUTHOR
314    
315     Marc Lehmann <schmorp@schmorp.de>
316     http://home.schmorp.de/
317    
318     =cut
319    
320     1
321