1 |
/* This is a watchdog pgrogram from Christian Stieber. It is not an |
2 |
* officially supported piece of crossfire (I hope it works, but I am not |
3 |
* going to be spending time debugging problems in this piece of code). The |
4 |
* idea is that it periodically sends/gets udp messages to the server - if |
5 |
* the server isn't responding, it kills it off and starts a new one. There |
6 |
* is a bit more logic to it - From Christian: |
7 |
|
8 |
The wrapper is just a hack. I'm using it on a Solaris machine, and it |
9 |
seems to work fine. Notable features: |
10 |
- uses the watchdog interface |
11 |
- if the server crashes more than 10 times, with the time between |
12 |
successive crashes being less than 30 seconds, the wrapper terminates |
13 |
itself (to prevent bringing down the machine in case of startup problems) |
14 |
- the server runs at nice 10 |
15 |
|
16 |
* Note that the main advantage the watchdog has over just the simple |
17 |
* crossloop scripts is in the case of infinite loops. For simple crashes, |
18 |
* the crossloop programs do a fine job. |
19 |
*/ |
20 |
|
21 |
/* |
22 |
* Version 1 |
23 |
*/ |
24 |
|
25 |
/************************************************************************/ |
26 |
/* |
27 |
* Configuration options |
28 |
*/ |
29 |
|
30 |
/* server executable */ |
31 |
#define CROSSFIRE_SERVER "/usr/stud/stieber/bin/server" |
32 |
|
33 |
/* directory to cd to before starting the server */ |
34 |
#define CROSSFIRE_TMPDIR "/usr/stud/stieber/crossfire/tmp/" |
35 |
|
36 |
/* if the server crashes more than CRASH_COUNT times, with less than |
37 |
* CRASH_INTERVAL seconds between two successive crashes, the loop |
38 |
* program is terminated. */ |
39 |
#define CRASH_COUNT 10 |
40 |
#define CRASH_INTERVAL 30 |
41 |
|
42 |
#define USE_WATCHDOG |
43 |
#define ERROR_SLEEP 30 |
44 |
|
45 |
/************************************************************************/ |
46 |
|
47 |
#include <sys/unistd.h> |
48 |
#include <sys/wait.h> |
49 |
#include <stdlib.h> |
50 |
#include <signal.h> |
51 |
#include <stdio.h> |
52 |
#include <unistd.h> |
53 |
#include <sys/types.h> |
54 |
#include <sys/socket.h> |
55 |
#include <netinet/in.h> |
56 |
#include <netdb.h> |
57 |
#include <errno.h> |
58 |
|
59 |
/************************************************************************/ |
60 |
|
61 |
#ifdef USE_WATCHDOG |
62 |
int Pipe[2]; |
63 |
#endif |
64 |
|
65 |
/************************************************************************/ |
66 |
|
67 |
void SignalHandler(int Unused) |
68 |
|
69 |
{ |
70 |
if (write(Pipe[1],"",1)!=1) |
71 |
{ |
72 |
perror("Pipe"); |
73 |
exit(EXIT_FAILURE); |
74 |
} |
75 |
} |
76 |
|
77 |
/************************************************************************/ |
78 |
|
79 |
int main(void) |
80 |
|
81 |
{ |
82 |
int CrashCount; |
83 |
#ifdef USE_WATCHDOG |
84 |
struct protoent *protoent; |
85 |
struct sockaddr_in insock; |
86 |
int fd; |
87 |
|
88 |
memset(&insock,0,sizeof(insock)); |
89 |
|
90 |
if ((protoent=getprotobyname("udp"))==NULL) |
91 |
{ |
92 |
perror("Can't get protobyname"); |
93 |
return EXIT_FAILURE; |
94 |
} |
95 |
if ((fd=socket(PF_INET, SOCK_DGRAM, protoent->p_proto))==-1) |
96 |
{ |
97 |
perror("Can't create socket"); |
98 |
return EXIT_FAILURE; |
99 |
} |
100 |
insock.sin_family=AF_INET; |
101 |
insock.sin_port=htons((unsigned short)13325); |
102 |
if (bind(fd,(struct sockaddr *)&insock,sizeof(insock))==-1) |
103 |
{ |
104 |
perror("Error on bind"); |
105 |
return EXIT_FAILURE; |
106 |
} |
107 |
#endif |
108 |
|
109 |
CrashCount=0; |
110 |
nice(10-nice(0)); |
111 |
while (CrashCount<CRASH_COUNT) |
112 |
{ |
113 |
time_t StartTime; |
114 |
time_t EndTime; |
115 |
pid_t Server; |
116 |
|
117 |
chdir(CROSSFIRE_TMPDIR); |
118 |
time(&StartTime); |
119 |
#ifdef USE_WATCHDOG |
120 |
if (pipe(Pipe)==0) |
121 |
{ |
122 |
void (*OldHandler)(int); |
123 |
|
124 |
OldHandler=signal(SIGCHLD,SignalHandler); |
125 |
#endif |
126 |
switch (Server=fork()) |
127 |
{ |
128 |
case 0: |
129 |
execl(CROSSFIRE_SERVER,CROSSFIRE_SERVER,"-server",NULL); |
130 |
return EXIT_FAILURE; |
131 |
|
132 |
case -1: |
133 |
sleep(ERROR_SLEEP); |
134 |
break; |
135 |
|
136 |
default: |
137 |
#ifdef USE_WATCHDOG |
138 |
while (1) |
139 |
{ |
140 |
fd_set Files; |
141 |
struct timeval Timeout; |
142 |
int Max; |
143 |
|
144 |
FD_ZERO(&Files); |
145 |
FD_SET(Pipe[0],&Files); |
146 |
FD_SET(fd,&Files); |
147 |
Timeout.tv_sec=5*60; |
148 |
Timeout.tv_usec=0; |
149 |
if (fd>Pipe[0]) |
150 |
{ |
151 |
Max=fd+1; |
152 |
} |
153 |
else |
154 |
{ |
155 |
Max=Pipe[0]+1; |
156 |
} |
157 |
while (select(Max,&Files,NULL,NULL,&Timeout)==-1) |
158 |
{ |
159 |
if (errno!=EINTR) |
160 |
{ |
161 |
perror("Error on select"); |
162 |
return EXIT_FAILURE; |
163 |
} |
164 |
} |
165 |
if (FD_ISSET(Pipe[0],&Files)) |
166 |
{ |
167 |
/* crash */ |
168 |
unlink("core"); |
169 |
waitpid(Server,NULL,0); |
170 |
printf("Server crash!\n"); |
171 |
break; |
172 |
} |
173 |
else if (FD_ISSET(fd,&Files)) |
174 |
{ |
175 |
/* watchdog */ |
176 |
char t; |
177 |
|
178 |
recv(fd,&t,1,0); |
179 |
} |
180 |
else |
181 |
{ |
182 |
/* timeout */ |
183 |
printf("Watchdog timeout!\n"); |
184 |
if (kill(Server,SIGKILL)!=0) |
185 |
{ |
186 |
perror("Error on kill"); |
187 |
return EXIT_FAILURE; |
188 |
} |
189 |
} |
190 |
} |
191 |
#else |
192 |
waitpid(Server,NULL,0); |
193 |
#endif |
194 |
#ifdef USE_WATCHDOG |
195 |
signal(SIGCHLD,OldHandler); |
196 |
#endif |
197 |
time(&EndTime); |
198 |
if (EndTime-StartTime<CRASH_INTERVAL) |
199 |
{ |
200 |
CrashCount++; |
201 |
} |
202 |
else |
203 |
{ |
204 |
CrashCount=0; |
205 |
} |
206 |
break; |
207 |
} |
208 |
#ifdef USE_WATCHDOG |
209 |
close(Pipe[0]); |
210 |
close(Pipe[1]); |
211 |
} |
212 |
else |
213 |
{ |
214 |
sleep(ERROR_SLEEP); |
215 |
} |
216 |
#endif |
217 |
} |
218 |
#ifdef USE_WATCHDOG |
219 |
close(fd); |
220 |
#endif |
221 |
return 0; |
222 |
} |