1 |
root |
1.1 |
=head1 NAME |
2 |
|
|
|
3 |
|
|
Sys::FreezeThaw - stop and start all user processes on a machine |
4 |
|
|
|
5 |
|
|
=head1 SYNOPSIS |
6 |
|
|
|
7 |
|
|
use Sys::FreezeThaw; |
8 |
|
|
|
9 |
|
|
Sys::FreezeThaw::freezethaw { |
10 |
|
|
# run code while system is frozen |
11 |
|
|
}; |
12 |
|
|
|
13 |
|
|
my $token = Sys::FreezeThaw::freeze; |
14 |
|
|
... do something ... |
15 |
|
|
Sys::FreezeThaw::thaw $token; |
16 |
|
|
|
17 |
|
|
=head1 DESCRIPTION |
18 |
|
|
|
19 |
|
|
Operating Systems/Kernels current supported: Linux-2.6/3.0 with F</proc>. |
20 |
|
|
|
21 |
|
|
This module implements a very specific feature: stopping(freezing and |
22 |
|
|
thawing/continuing all userspace processes on the machine. It works by |
23 |
|
|
sending SIGSTOP to all processes, parent-process first, so that the wait |
24 |
|
|
syscall will not trigger on stopped children. Restarting is done in |
25 |
|
|
reverse order. |
26 |
|
|
|
27 |
|
|
Using the combined function Sys::FreezeThaw::freezethaw is recommended as |
28 |
|
|
it will catch runtime errors, but stopping and restarting can be dine via |
29 |
|
|
separate function calls. |
30 |
|
|
|
31 |
|
|
=head2 What could it possibly be sueful for?? |
32 |
|
|
|
33 |
|
|
Possible uses include: doing atomic file system operations (such as |
34 |
|
|
replacing files while they are guaranteed not to be in use), or quieting |
35 |
|
|
down a system to investigate suspicious behaviour. |
36 |
|
|
|
37 |
|
|
=over 4 |
38 |
|
|
|
39 |
|
|
=cut |
40 |
|
|
|
41 |
|
|
package Sys::FreezeThaw; |
42 |
|
|
|
43 |
|
|
use Carp; |
44 |
|
|
|
45 |
|
|
$VERSION = '0.02'; |
46 |
root |
1.2 |
$PARTIAL_OK = 0; |
47 |
root |
1.1 |
|
48 |
|
|
=item Sys::FreezeThaw::freezethaw { BLOCK } |
49 |
|
|
|
50 |
|
|
First tries to stop all processes. If successful, runs the given code block |
51 |
|
|
(or code reference), then restarts all processes again. As the system is |
52 |
|
|
basically frozen during the code block execution, it should be as fast as |
53 |
|
|
possible. |
54 |
|
|
|
55 |
|
|
Runtime errors will be caught with C<eval>. If an exception occurs it will |
56 |
|
|
be re-thrown after processes are restarted. If processes cannot be frozen |
57 |
|
|
or restarted, this function will throw an exception. |
58 |
|
|
|
59 |
root |
1.2 |
Signal handlers for SIGINT, SIGTERM, SIGPIPE, SIGHUP, SIGALRM, SIGUSR1 and |
60 |
|
|
SIGUSR2 will be installed temporarily, so if you want to catch these, you |
61 |
|
|
have to do so yourself within the executed code block. |
62 |
root |
1.1 |
|
63 |
|
|
Try to do as few things as possible. For example, outputting text might |
64 |
|
|
cause a deadlock, as the terminal emulator on the other side of STDOUT |
65 |
root |
1.2 |
might be stopped, logging to syslog might not work and so on. |
66 |
root |
1.1 |
|
67 |
|
|
The return value of the code block is ignored right now, and the function |
68 |
|
|
doesn't yet return anything sensible. |
69 |
|
|
|
70 |
root |
1.2 |
=item $token = Sys::FreezeThaw::freeze |
71 |
root |
1.1 |
|
72 |
|
|
Send SIGSTOP to all processes, and return a token that allows them to be |
73 |
|
|
thawed again. |
74 |
|
|
|
75 |
|
|
If an error occurs, an exception will be thrown and all stopped processes |
76 |
|
|
will automatically be thawed. |
77 |
|
|
|
78 |
|
|
=item Sys::FreezeThaw::thaw $token |
79 |
|
|
|
80 |
|
|
Take a token returned by Sys::FreezeThaw::freeze and send all processes |
81 |
root |
1.2 |
a C<CONT> signal, in the order required for them not to receive child STOP |
82 |
root |
1.1 |
notifications. |
83 |
|
|
|
84 |
root |
1.2 |
=item $Sys::FreezeThaw::PARTIAL_OK |
85 |
|
|
|
86 |
|
|
A boolean that tells C<freeze> whether it is an error if a process cannot |
87 |
|
|
be stopped. If false (the default), then C<freeze> will fail if there is |
88 |
|
|
an unstoppable process. If it is true, then C<freeze> will pretend it the |
89 |
|
|
process stopped. |
90 |
|
|
|
91 |
root |
1.1 |
=cut |
92 |
|
|
|
93 |
|
|
# this is laughably broken, but... |
94 |
|
|
sub yield { |
95 |
|
|
select undef, undef, undef, 1/1000; |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
# the maximum number of iterations per stop/cont etc. loop |
99 |
|
|
# used to shield against catastrophic events (or bugs :) |
100 |
|
|
# on current linux systems it can take an enourmous amount of |
101 |
|
|
# time for some processes to stop, but usually it only takes |
102 |
|
|
# one or two iterations. |
103 |
root |
1.2 |
sub MAX_WAIT() { 10 } |
104 |
root |
1.1 |
|
105 |
|
|
# return a list o fall pid's in the system, |
106 |
|
|
# topologically sorted parent-first |
107 |
|
|
# skips, keys %$exclude_pid, zombies and stopped processes |
108 |
|
|
sub enum_pids($) { |
109 |
|
|
my ($exclude_pid) = @_; |
110 |
|
|
|
111 |
|
|
opendir my $proc, "/proc" |
112 |
|
|
or die "/proc: $!"; |
113 |
|
|
my @pid = sort { $b <=> $a } |
114 |
|
|
grep /^\d+/, |
115 |
|
|
readdir $proc; |
116 |
|
|
closedir $proc; |
117 |
|
|
|
118 |
|
|
my %ppid; |
119 |
|
|
for (@pid) { |
120 |
|
|
next if exists $exclude_pid->{$_}; |
121 |
|
|
|
122 |
|
|
open my $stat, "<", "/proc/$_/stat" |
123 |
|
|
or next; |
124 |
|
|
my ($state, $ppid, $vsize, $rss) = (split /\s+/, scalar <$stat>)[2,3,22,23]; |
125 |
|
|
|
126 |
|
|
next if $state =~ /^[TZX]/i; # stopped, zombies, dead |
127 |
|
|
next unless $vsize || $rss; # skip kernel threads or other nasties |
128 |
|
|
|
129 |
|
|
$ppid{$_} = $ppid; |
130 |
|
|
} |
131 |
|
|
|
132 |
|
|
# now topologically sort by parent-id |
133 |
|
|
my @res; |
134 |
|
|
while (scalar %ppid) { |
135 |
|
|
my @pass; |
136 |
|
|
|
137 |
|
|
for my $pid (keys %ppid) { |
138 |
|
|
if (!exists $ppid{$ppid{$pid}}) { |
139 |
|
|
push @pass, $pid; |
140 |
|
|
} |
141 |
|
|
} |
142 |
|
|
|
143 |
|
|
delete $ppid{$_} for @pass; |
144 |
|
|
|
145 |
|
|
push @res, \@pass; |
146 |
|
|
} |
147 |
|
|
|
148 |
|
|
\@res |
149 |
|
|
} |
150 |
|
|
|
151 |
|
|
sub process_stopped($) { |
152 |
|
|
open my $stat, "</proc/$_[0]/stat" |
153 |
|
|
or return 1; |
154 |
|
|
|
155 |
|
|
return +(split /\s+/, <$stat>)[2] =~ /^[TZX]/i; |
156 |
|
|
} |
157 |
|
|
|
158 |
|
|
sub thaw($) { |
159 |
|
|
local $@; |
160 |
|
|
|
161 |
|
|
my $token = shift; |
162 |
|
|
|
163 |
|
|
for (reverse @$token) { |
164 |
|
|
my @pids = @$_; |
165 |
|
|
kill CONT => @pids; |
166 |
|
|
|
167 |
|
|
# now wait till processes actually run again before the next round |
168 |
|
|
for (1..MAX_WAIT) { |
169 |
|
|
@pids = grep process_stopped $_, @pids; |
170 |
|
|
last unless @pids; |
171 |
|
|
|
172 |
|
|
yield; |
173 |
|
|
} |
174 |
|
|
} |
175 |
|
|
} |
176 |
|
|
|
177 |
root |
1.2 |
sub freeze(;$) { |
178 |
root |
1.1 |
local $@; |
179 |
|
|
|
180 |
root |
1.2 |
my $procs; |
181 |
root |
1.1 |
|
182 |
|
|
eval { |
183 |
|
|
for (1..MAX_WAIT) { |
184 |
|
|
my $passes = enum_pids { 1 => 1, $$ => 1 }; |
185 |
|
|
last unless @$passes; |
186 |
|
|
|
187 |
|
|
for (@$passes) { |
188 |
|
|
my @pids = @$_; |
189 |
|
|
push @procs, $_; |
190 |
|
|
kill STOP => @pids; |
191 |
|
|
|
192 |
|
|
for (1..MAX_WAIT) { |
193 |
|
|
@pids = grep !process_stopped $_, @pids; |
194 |
|
|
last unless @pids; |
195 |
|
|
|
196 |
|
|
# wait till processes are really stopped |
197 |
|
|
yield; |
198 |
|
|
} |
199 |
|
|
|
200 |
root |
1.2 |
die "unable to stop some processes: @pids" if @pids && !$PARTIAL_OK; |
201 |
root |
1.1 |
} |
202 |
|
|
} |
203 |
|
|
}; |
204 |
|
|
|
205 |
|
|
if ($@) { |
206 |
|
|
thaw \@procs; |
207 |
|
|
die $@; |
208 |
|
|
} |
209 |
|
|
|
210 |
|
|
\@procs |
211 |
|
|
} |
212 |
|
|
|
213 |
|
|
sub freezethaw(&) { |
214 |
|
|
my ($code) = @_; |
215 |
|
|
|
216 |
|
|
my $token = freeze; |
217 |
|
|
|
218 |
|
|
eval { |
219 |
|
|
local $SIG{HUP} = sub { die "ERROR: caught SIGHUP while system frozen" }; |
220 |
root |
1.2 |
local $SIG{INT} = sub { die "ERROR: caught SIGINT while system frozen" }; |
221 |
|
|
local $SIG{TERM} = sub { die "ERROR: caught SIGTERM while system frozen" }; |
222 |
root |
1.1 |
local $SIG{PIPE} = sub { die "ERROR: caught SIGPIPE while system frozen" }; |
223 |
|
|
local $SIG{ALRM} = sub { die "ERROR: caught SIGALRM while system frozen" }; |
224 |
|
|
local $SIG{USR1} = sub { die "ERROR: caught SIGUSR1 while system frozen" }; |
225 |
|
|
local $SIG{USR2} = sub { die "ERROR: caught SIGUSR2 while system frozen" }; |
226 |
|
|
|
227 |
|
|
$code->(); |
228 |
|
|
}; |
229 |
|
|
|
230 |
|
|
thaw $token; |
231 |
|
|
|
232 |
|
|
die $@ if $@; |
233 |
|
|
|
234 |
|
|
() |
235 |
|
|
} |
236 |
|
|
|
237 |
|
|
1; |
238 |
|
|
|
239 |
|
|
=back |
240 |
|
|
|
241 |
|
|
=head1 BUGS |
242 |
|
|
|
243 |
|
|
SIGCONT is not unnoticed by processes. Some programs (such as irssi-text) |
244 |
|
|
respond by flickering (IMHO a bug in irssi-text). Other programs might |
245 |
|
|
have other problems, but actual problems should be rare. However, one |
246 |
|
|
shouldn't overuse this module. |
247 |
|
|
|
248 |
|
|
=head1 AUTHOR |
249 |
|
|
|
250 |
|
|
Marc Lehmann <schmorp@schmorp.de> |
251 |
|
|
http://home.schmorp.de/ |
252 |
|
|
|
253 |
|
|
=cut |
254 |
|
|
|