… | |
… | |
41 | package Sys::FreezeThaw; |
41 | package Sys::FreezeThaw; |
42 | |
42 | |
43 | use Carp; |
43 | use Carp; |
44 | |
44 | |
45 | $VERSION = '0.02'; |
45 | $VERSION = '0.02'; |
|
|
46 | $PARTIAL_OK = 0; |
46 | |
47 | |
47 | =item Sys::FreezeThaw::freezethaw { BLOCK } |
48 | =item Sys::FreezeThaw::freezethaw { BLOCK } |
48 | |
49 | |
49 | First tries to stop all processes. If successful, runs the given code block |
50 | First tries to stop all processes. If successful, runs the given code block |
50 | (or code reference), then restarts all processes again. As the system is |
51 | (or code reference), then restarts all processes again. As the system is |
… | |
… | |
53 | |
54 | |
54 | Runtime errors will be caught with C<eval>. If an exception occurs it will |
55 | Runtime errors will be caught with C<eval>. If an exception occurs it will |
55 | be re-thrown after processes are restarted. If processes cannot be frozen |
56 | be re-thrown after processes are restarted. If processes cannot be frozen |
56 | or restarted, this function will throw an exception. |
57 | or restarted, this function will throw an exception. |
57 | |
58 | |
58 | Signal handlers for SIGPIPE, SIGHUP, SIGALRM, SIGUSR1 and SIGUSR2 will |
59 | Signal handlers for SIGINT, SIGTERM, SIGPIPE, SIGHUP, SIGALRM, SIGUSR1 and |
59 | temporarily be installed, so if you want to catch these, you have to do so |
60 | SIGUSR2 will be installed temporarily, so if you want to catch these, you |
60 | yourself within the executed code block. |
61 | have to do so yourself within the executed code block. |
61 | |
62 | |
62 | Try to do as few things as possible. For example, outputting text might |
63 | Try to do as few things as possible. For example, outputting text might |
63 | cause a deadlock, as the terminal emulator on the other side of STDOUT |
64 | cause a deadlock, as the terminal emulator on the other side of STDOUT |
64 | might be stopped, etc. |
65 | might be stopped, logging to syslog might not work and so on. |
65 | |
66 | |
66 | The return value of the code block is ignored right now, and the function |
67 | The return value of the code block is ignored right now, and the function |
67 | doesn't yet return anything sensible. |
68 | doesn't yet return anything sensible. |
68 | |
69 | |
69 | =item $token = Sys::FreezeThaw::freeze; |
70 | =item $token = Sys::FreezeThaw::freeze |
70 | |
71 | |
71 | Send SIGSTOP to all processes, and return a token that allows them to be |
72 | Send SIGSTOP to all processes, and return a token that allows them to be |
72 | thawed again. |
73 | thawed again. |
73 | |
74 | |
74 | If an error occurs, an exception will be thrown and all stopped processes |
75 | If an error occurs, an exception will be thrown and all stopped processes |
75 | will automatically be thawed. |
76 | will automatically be thawed. |
76 | |
77 | |
77 | =item Sys::FreezeThaw::thaw $token |
78 | =item Sys::FreezeThaw::thaw $token |
78 | |
79 | |
79 | Take a token returned by Sys::FreezeThaw::freeze and send all processes |
80 | Take a token returned by Sys::FreezeThaw::freeze and send all processes |
80 | a CONT signal, in the order required for them not to receive child STOP |
81 | a C<CONT> signal, in the order required for them not to receive child STOP |
81 | notifications. |
82 | notifications. |
|
|
83 | |
|
|
84 | =item $Sys::FreezeThaw::PARTIAL_OK |
|
|
85 | |
|
|
86 | A boolean that tells C<freeze> whether it is an error if a process cannot |
|
|
87 | be stopped. If false (the default), then C<freeze> will fail if there is |
|
|
88 | an unstoppable process. If it is true, then C<freeze> will pretend it the |
|
|
89 | process stopped. |
82 | |
90 | |
83 | =cut |
91 | =cut |
84 | |
92 | |
85 | # this is laughably broken, but... |
93 | # this is laughably broken, but... |
86 | sub yield { |
94 | sub yield { |
… | |
… | |
90 | # the maximum number of iterations per stop/cont etc. loop |
98 | # the maximum number of iterations per stop/cont etc. loop |
91 | # used to shield against catastrophic events (or bugs :) |
99 | # used to shield against catastrophic events (or bugs :) |
92 | # on current linux systems it can take an enourmous amount of |
100 | # on current linux systems it can take an enourmous amount of |
93 | # time for some processes to stop, but usually it only takes |
101 | # time for some processes to stop, but usually it only takes |
94 | # one or two iterations. |
102 | # one or two iterations. |
95 | sub MAX_WAIT() { 1000 } |
103 | sub MAX_WAIT() { 10 } |
96 | |
104 | |
97 | # return a list o fall pid's in the system, |
105 | # return a list o fall pid's in the system, |
98 | # topologically sorted parent-first |
106 | # topologically sorted parent-first |
99 | # skips, keys %$exclude_pid, zombies and stopped processes |
107 | # skips, keys %$exclude_pid, zombies and stopped processes |
100 | sub enum_pids($) { |
108 | sub enum_pids($) { |
… | |
… | |
164 | yield; |
172 | yield; |
165 | } |
173 | } |
166 | } |
174 | } |
167 | } |
175 | } |
168 | |
176 | |
169 | sub freeze() { |
177 | sub freeze(;$) { |
170 | local $@; |
178 | local $@; |
171 | |
179 | |
172 | my @procs; |
180 | my $procs; |
173 | |
181 | |
174 | eval { |
182 | eval { |
175 | for (1..MAX_WAIT) { |
183 | for (1..MAX_WAIT) { |
176 | my $passes = enum_pids { 1 => 1, $$ => 1 }; |
184 | my $passes = enum_pids { 1 => 1, $$ => 1 }; |
177 | last unless @$passes; |
185 | last unless @$passes; |
… | |
… | |
187 | |
195 | |
188 | # wait till processes are really stopped |
196 | # wait till processes are really stopped |
189 | yield; |
197 | yield; |
190 | } |
198 | } |
191 | |
199 | |
192 | die "unable to stop some processes: @pids" if @pids; |
200 | die "unable to stop some processes: @pids" if @pids && !$PARTIAL_OK; |
193 | } |
201 | } |
194 | } |
202 | } |
195 | }; |
203 | }; |
196 | |
204 | |
197 | if ($@) { |
205 | if ($@) { |
… | |
… | |
207 | |
215 | |
208 | my $token = freeze; |
216 | my $token = freeze; |
209 | |
217 | |
210 | eval { |
218 | eval { |
211 | local $SIG{HUP} = sub { die "ERROR: caught SIGHUP while system frozen" }; |
219 | local $SIG{HUP} = sub { die "ERROR: caught SIGHUP while system frozen" }; |
|
|
220 | local $SIG{INT} = sub { die "ERROR: caught SIGINT while system frozen" }; |
|
|
221 | local $SIG{TERM} = sub { die "ERROR: caught SIGTERM while system frozen" }; |
212 | local $SIG{PIPE} = sub { die "ERROR: caught SIGPIPE while system frozen" }; |
222 | local $SIG{PIPE} = sub { die "ERROR: caught SIGPIPE while system frozen" }; |
213 | local $SIG{ALRM} = sub { die "ERROR: caught SIGALRM while system frozen" }; |
223 | local $SIG{ALRM} = sub { die "ERROR: caught SIGALRM while system frozen" }; |
214 | local $SIG{USR1} = sub { die "ERROR: caught SIGUSR1 while system frozen" }; |
224 | local $SIG{USR1} = sub { die "ERROR: caught SIGUSR1 while system frozen" }; |
215 | local $SIG{USR2} = sub { die "ERROR: caught SIGUSR2 while system frozen" }; |
225 | local $SIG{USR2} = sub { die "ERROR: caught SIGUSR2 while system frozen" }; |
216 | |
226 | |