ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-Watchdog/Watchdog.pm
Revision: 1.8
Committed: Wed Oct 24 17:05:33 2012 UTC (11 years, 6 months ago) by root
Branch: MAIN
Changes since 1.7: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::Watchdog - generic watchdog/program restarter
4    
5     =head1 SYNOPSIS
6    
7 root 1.7 # MUST be use'd as the very first thing in the main program,
8     # as it clones/forks the program before it returns.
9 root 1.1 use AnyEvent::Watchdog;
10    
11     =head1 DESCRIPTION
12    
13     This module implements a watchdog that can repeatedly fork the program and
14     thus effectively restart it - as soon as the module is use'd, it will fork
15     the program (if possible) and continue to run it normally in the child,
16     while the parent becomes a supervisor.
17    
18     The child can then ask the supervisor to restart itself instead of
19     exiting, or ask the supervisor to restart it gracefully or forcefully.
20    
21     B<NOTE:> This module B<< I<MUST> >> be used as the first thing in the main
22     program. It will cause weird effects when used from another module, as
23     perl does not expect to be forked inside C<BEGIN> blocks.
24    
25     =head1 RECIPES
26    
27 root 1.7 Use AnyEvent::Watchdog solely as a convenient on-demand-restarter:
28 root 1.1
29     use AnyEvent::Watchdog;
30    
31 root 1.7 # and whenever you want to restart (e.g. to upgrade code):
32     use AnyEvent::Watchdog::Util;
33     AnyEvent::Watchdog::Util::restart;
34 root 1.1
35     Use AnyEvent::Watchdog to kill the program and exit when the event loop
36     fails to run for more than two minutes:
37    
38 root 1.7 use AnyEvent::Watchdog autorestart => 1, heartbeat => 120;
39 root 1.1
40 root 1.7 Use AnyEvent::Watchdog to automatically kill (but not restart) the program when it fails
41     to handle events for longer than 5 minutes:
42 root 1.1
43 root 1.7 use AnyEvent::Watchdog heartbeat => 300;
44 root 1.1
45 root 1.6 =head1 VARIABLES/FUNCTIONS
46 root 1.1
47 root 1.7 This module is controlled via the L<AnyEvent::Watchdog::Util> module:
48 root 1.1
49 root 1.7 use AnyEvent::Watchdog::Util;
50    
51     # attempt restart
52     AnyEvent::Watchdog::Util::restart;
53    
54     # check if it is running
55     AnyEvent::Watchdog::Util::enabled
56     or croak "not running under watchdog!";
57 root 1.1
58     =cut
59    
60     package AnyEvent::Watchdog;
61    
62     # load modules we will use later anyways
63     use common::sense;
64    
65     use Carp ();
66    
67 root 1.7 our $VERSION = '1.0';
68 root 1.6
69 root 1.1 our $PID; # child pid
70 root 1.7 our $ENABLED = 0; # also version
71 root 1.1 our $AUTORESTART; # actually exit
72     our ($P, $C);
73    
74     sub poll($) {
75     (vec my $v, fileno $P, 1) = 1;
76     CORE::select $v, undef, undef, $_[0]
77     }
78    
79     sub server {
80     my $expected;# do we expect a program exit?
81     my $heartbeat;
82    
83     $AUTORESTART = 0;
84    
85     local $SIG{HUP} = 'IGNORE';
86     local $SIG{INT} = 'IGNORE';
87     local $SIG{TERM} = 'IGNORE';
88    
89     while () {
90     if ($heartbeat) {
91     unless (poll $heartbeat) {
92     $expected = 1;
93     warn "AnyEvent::Watchdog: heartbeat failed. killing.\n";
94     kill 9, $PID;
95     last;
96     }
97     }
98    
99     sysread $P, my $cmd, 1
100     or last;
101    
102     if ($cmd eq chr 0) {
103     $AUTORESTART = 0;
104    
105     } elsif ($cmd eq chr 1) {
106     $AUTORESTART = 1;
107    
108     } elsif ($cmd eq chr 2) {
109     sysread $P, my $timeout, 1
110     or last;
111    
112     $timeout = ord $timeout;
113    
114     unless (poll $timeout) {
115     warn "AnyEvent::Watchdog: program attempted restart, but failed to do so within $timeout seconds. killing.\n";
116     kill 9, $PID;
117     }
118    
119     if (sysread $P, my $dummy, 1) {
120     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
121     kill 9, $PID;
122     }
123    
124     $expected = 1;
125     last;
126    
127     } elsif ($cmd eq chr 3) {
128     sysread $P, my $interval, 1
129     or last;
130    
131 root 1.3 $heartbeat = ord $interval;
132 root 1.1
133     } elsif ($cmd eq chr 4) {
134     # heartbeat
135     # TODO: should only reset heartbeat timeout with \005
136    
137     } else {
138     warn "AnyEvent::Watchdog: unexpected program output. killing.\n";
139     kill 9, $PID;
140     last;
141     }
142     }
143    
144     waitpid $PID, 0;
145    
146     require POSIX;
147    
148     my $termsig = POSIX::WIFSIGNALED ($?) && POSIX::WTERMSIG ($?);
149    
150     if ($termsig == POSIX::SIGINT () || $termsig == POSIX::SIGTERM ()) {
151     $AUTORESTART = 0;
152     $expected = 1;
153     }
154    
155     unless ($expected) {
156     warn "AnyEvent::Watchdog: program exited unexpectedly with status $?.\n"
157     if $? >> 8;
158     }
159    
160     if ($AUTORESTART) {
161     warn "AnyEvent::Watchdog: attempting automatic restart.\n";
162     } else {
163     if ($termsig) {
164     $SIG{$_} = 'DEFAULT' for keys %SIG;
165     kill $termsig, $$;
166     POSIX::_exit (127);
167     } else {
168     POSIX::_exit ($? >> 8);
169     }
170     }
171     }
172    
173     our %SEEKPOS;
174     # due to bugs in perl, try to remember file offsets for all fds, and restore them later
175     # (the parser otherwise exhausts the input files)
176    
177 root 1.5 # this causes perlio to flush its handles internally, so
178 root 1.1 # seek offsets become correct.
179     exec "."; # toi toi toi
180     #{
181     # local $SIG{CHLD} = 'DEFAULT';
182     # my $pid = fork;
183     #
184     # if ($pid) {
185     # waitpid $pid, 0;
186     # } else {
187     # kill 9, $$;
188     # }
189     #}
190    
191 root 1.5 # now record "all" fd positions, assuming 1023 is more than enough.
192 root 1.1 for (0 .. 1023) {
193     open my $fh, "<&$_" or next;
194     $SEEKPOS{$_} = (sysseek $fh, 0, 1 or next);
195     }
196    
197     while () {
198     if ($^O =~ /mswin32/i) {
199     require AnyEvent::Util;
200     ($P, $C) = AnyEvent::Util::portable_socketpair ()
201     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
202     } else {
203     require Socket;
204     socketpair $P, $C, Socket::AF_UNIX (), Socket::SOCK_STREAM (), 0
205     or Carp::croak "AnyEvent::Watchdog: unable to create restarter pipe: $!\n";
206     }
207    
208     local $SIG{CHLD} = 'DEFAULT';
209    
210     $PID = fork;
211    
212     unless (defined $PID) {
213     warn "AnyEvent::Watchdog: '$!', retrying in one second...\n";
214     sleep 1;
215     } elsif ($PID) {
216 root 1.6 # parent code
217 root 1.1 close $C;
218     server;
219     } else {
220 root 1.6 # child code
221 root 1.7 $ENABLED = 1; # also version
222 root 1.6
223 root 1.1 # restore seek offsets
224     while (my ($k, $v) = each %SEEKPOS) {
225     open my $fh, "<&$k" or next;
226     sysseek $fh, $v, 0;
227     }
228    
229     # continue the program normally
230     close $P;
231     last;
232     }
233     }
234    
235 root 1.7 sub import {
236     shift;
237 root 1.1
238 root 1.7 while (@_) {
239     my $k = shift;
240 root 1.1
241 root 1.7 require AnyEvent::Watchdog::Util;
242 root 1.1
243 root 1.7 if ($k eq "autorestart") {
244     AnyEvent::Watchdog::Util::autorestart (! ! shift);
245     } elsif ($k eq "heartbeat") {
246     AnyEvent::Watchdog::Util::heartbeat (shift || 60);
247 root 1.1 } else {
248     Carp::croak "AnyEvent::Watchdog: '$_' is not a valid import argument";
249     }
250     }
251     }
252    
253 root 1.7 # used by AnyEvent::Watchdog::Util.
254     our $end;
255     END { $end && &$end }
256 root 1.2
257 root 1.1 =head1 SEE ALSO
258    
259 root 1.8 L<AnyEvent::Watchdog::Util>, L<AnyEvent>.
260 root 1.1
261     =head1 AUTHOR
262    
263     Marc Lehmann <schmorp@schmorp.de>
264     http://home.schmorp.de/
265    
266     =cut
267    
268     1
269