ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Sys-FreezeThaw/FreezeThaw.pm
Revision: 1.1
Committed: Sat Oct 5 17:51:34 2013 UTC (10 years, 7 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Sys::FreezeThaw - stop and start all user processes on a machine
4
5 =head1 SYNOPSIS
6
7 use Sys::FreezeThaw;
8
9 Sys::FreezeThaw::freezethaw {
10 # run code while system is frozen
11 };
12
13 my $token = Sys::FreezeThaw::freeze;
14 ... do something ...
15 Sys::FreezeThaw::thaw $token;
16
17 =head1 DESCRIPTION
18
19 Operating Systems/Kernels current supported: Linux-2.6/3.0 with F</proc>.
20
21 This module implements a very specific feature: stopping(freezing and
22 thawing/continuing all userspace processes on the machine. It works by
23 sending SIGSTOP to all processes, parent-process first, so that the wait
24 syscall will not trigger on stopped children. Restarting is done in
25 reverse order.
26
27 Using the combined function Sys::FreezeThaw::freezethaw is recommended as
28 it will catch runtime errors, but stopping and restarting can be dine via
29 separate function calls.
30
31 =head2 What could it possibly be sueful for??
32
33 Possible uses include: doing atomic file system operations (such as
34 replacing files while they are guaranteed not to be in use), or quieting
35 down a system to investigate suspicious behaviour.
36
37 =over 4
38
39 =cut
40
41 package Sys::FreezeThaw;
42
43 use Carp;
44
45 $VERSION = '0.02';
46
47 =item Sys::FreezeThaw::freezethaw { BLOCK }
48
49 First tries to stop all processes. If successful, runs the given code block
50 (or code reference), then restarts all processes again. As the system is
51 basically frozen during the code block execution, it should be as fast as
52 possible.
53
54 Runtime errors will be caught with C<eval>. If an exception occurs it will
55 be re-thrown after processes are restarted. If processes cannot be frozen
56 or restarted, this function will throw an exception.
57
58 Signal handlers for SIGPIPE, SIGHUP, SIGALRM, SIGUSR1 and SIGUSR2 will
59 temporarily be installed, so if you want to catch these, you have to do so
60 yourself within the executed code block.
61
62 Try to do as few things as possible. For example, outputting text might
63 cause a deadlock, as the terminal emulator on the other side of STDOUT
64 might be stopped, etc.
65
66 The return value of the code block is ignored right now, and the function
67 doesn't yet return anything sensible.
68
69 =item $token = Sys::FreezeThaw::freeze;
70
71 Send SIGSTOP to all processes, and return a token that allows them to be
72 thawed again.
73
74 If an error occurs, an exception will be thrown and all stopped processes
75 will automatically be thawed.
76
77 =item Sys::FreezeThaw::thaw $token
78
79 Take a token returned by Sys::FreezeThaw::freeze and send all processes
80 a CONT signal, in the order required for them not to receive child STOP
81 notifications.
82
83 =cut
84
85 # this is laughably broken, but...
86 sub yield {
87 select undef, undef, undef, 1/1000;
88 }
89
90 # the maximum number of iterations per stop/cont etc. loop
91 # used to shield against catastrophic events (or bugs :)
92 # on current linux systems it can take an enourmous amount of
93 # time for some processes to stop, but usually it only takes
94 # one or two iterations.
95 sub MAX_WAIT() { 1000 }
96
97 # return a list o fall pid's in the system,
98 # topologically sorted parent-first
99 # skips, keys %$exclude_pid, zombies and stopped processes
100 sub enum_pids($) {
101 my ($exclude_pid) = @_;
102
103 opendir my $proc, "/proc"
104 or die "/proc: $!";
105 my @pid = sort { $b <=> $a }
106 grep /^\d+/,
107 readdir $proc;
108 closedir $proc;
109
110 my %ppid;
111 for (@pid) {
112 next if exists $exclude_pid->{$_};
113
114 open my $stat, "<", "/proc/$_/stat"
115 or next;
116 my ($state, $ppid, $vsize, $rss) = (split /\s+/, scalar <$stat>)[2,3,22,23];
117
118 next if $state =~ /^[TZX]/i; # stopped, zombies, dead
119 next unless $vsize || $rss; # skip kernel threads or other nasties
120
121 $ppid{$_} = $ppid;
122 }
123
124 # now topologically sort by parent-id
125 my @res;
126 while (scalar %ppid) {
127 my @pass;
128
129 for my $pid (keys %ppid) {
130 if (!exists $ppid{$ppid{$pid}}) {
131 push @pass, $pid;
132 }
133 }
134
135 delete $ppid{$_} for @pass;
136
137 push @res, \@pass;
138 }
139
140 \@res
141 }
142
143 sub process_stopped($) {
144 open my $stat, "</proc/$_[0]/stat"
145 or return 1;
146
147 return +(split /\s+/, <$stat>)[2] =~ /^[TZX]/i;
148 }
149
150 sub thaw($) {
151 local $@;
152
153 my $token = shift;
154
155 for (reverse @$token) {
156 my @pids = @$_;
157 kill CONT => @pids;
158
159 # now wait till processes actually run again before the next round
160 for (1..MAX_WAIT) {
161 @pids = grep process_stopped $_, @pids;
162 last unless @pids;
163
164 yield;
165 }
166 }
167 }
168
169 sub freeze() {
170 local $@;
171
172 my @procs;
173
174 eval {
175 for (1..MAX_WAIT) {
176 my $passes = enum_pids { 1 => 1, $$ => 1 };
177 last unless @$passes;
178
179 for (@$passes) {
180 my @pids = @$_;
181 push @procs, $_;
182 kill STOP => @pids;
183
184 for (1..MAX_WAIT) {
185 @pids = grep !process_stopped $_, @pids;
186 last unless @pids;
187
188 # wait till processes are really stopped
189 yield;
190 }
191
192 die "unable to stop some processes: @pids" if @pids;
193 }
194 }
195 };
196
197 if ($@) {
198 thaw \@procs;
199 die $@;
200 }
201
202 \@procs
203 }
204
205 sub freezethaw(&) {
206 my ($code) = @_;
207
208 my $token = freeze;
209
210 eval {
211 local $SIG{HUP} = sub { die "ERROR: caught SIGHUP while system frozen" };
212 local $SIG{PIPE} = sub { die "ERROR: caught SIGPIPE while system frozen" };
213 local $SIG{ALRM} = sub { die "ERROR: caught SIGALRM while system frozen" };
214 local $SIG{USR1} = sub { die "ERROR: caught SIGUSR1 while system frozen" };
215 local $SIG{USR2} = sub { die "ERROR: caught SIGUSR2 while system frozen" };
216
217 $code->();
218 };
219
220 thaw $token;
221
222 die $@ if $@;
223
224 ()
225 }
226
227 1;
228
229 =back
230
231 =head1 BUGS
232
233 SIGCONT is not unnoticed by processes. Some programs (such as irssi-text)
234 respond by flickering (IMHO a bug in irssi-text). Other programs might
235 have other problems, but actual problems should be rare. However, one
236 shouldn't overuse this module.
237
238 =head1 AUTHOR
239
240 Marc Lehmann <schmorp@schmorp.de>
241 http://home.schmorp.de/
242
243 =cut
244