ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Sys-FreezeThaw/FreezeThaw.pm
Revision: 1.2
Committed: Mon Oct 7 04:21:18 2013 UTC (10 years, 7 months ago) by root
Branch: MAIN
CVS Tags: rel-0_02, HEAD
Changes since 1.1: +20 -10 lines
Log Message:
0.2

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Sys::FreezeThaw - stop and start all user processes on a machine
4    
5     =head1 SYNOPSIS
6    
7     use Sys::FreezeThaw;
8    
9     Sys::FreezeThaw::freezethaw {
10     # run code while system is frozen
11     };
12    
13     my $token = Sys::FreezeThaw::freeze;
14     ... do something ...
15     Sys::FreezeThaw::thaw $token;
16    
17     =head1 DESCRIPTION
18    
19     Operating Systems/Kernels current supported: Linux-2.6/3.0 with F</proc>.
20    
21     This module implements a very specific feature: stopping(freezing and
22     thawing/continuing all userspace processes on the machine. It works by
23     sending SIGSTOP to all processes, parent-process first, so that the wait
24     syscall will not trigger on stopped children. Restarting is done in
25     reverse order.
26    
27     Using the combined function Sys::FreezeThaw::freezethaw is recommended as
28     it will catch runtime errors, but stopping and restarting can be dine via
29     separate function calls.
30    
31     =head2 What could it possibly be sueful for??
32    
33     Possible uses include: doing atomic file system operations (such as
34     replacing files while they are guaranteed not to be in use), or quieting
35     down a system to investigate suspicious behaviour.
36    
37     =over 4
38    
39     =cut
40    
41     package Sys::FreezeThaw;
42    
43     use Carp;
44    
45     $VERSION = '0.02';
46 root 1.2 $PARTIAL_OK = 0;
47 root 1.1
48     =item Sys::FreezeThaw::freezethaw { BLOCK }
49    
50     First tries to stop all processes. If successful, runs the given code block
51     (or code reference), then restarts all processes again. As the system is
52     basically frozen during the code block execution, it should be as fast as
53     possible.
54    
55     Runtime errors will be caught with C<eval>. If an exception occurs it will
56     be re-thrown after processes are restarted. If processes cannot be frozen
57     or restarted, this function will throw an exception.
58    
59 root 1.2 Signal handlers for SIGINT, SIGTERM, SIGPIPE, SIGHUP, SIGALRM, SIGUSR1 and
60     SIGUSR2 will be installed temporarily, so if you want to catch these, you
61     have to do so yourself within the executed code block.
62 root 1.1
63     Try to do as few things as possible. For example, outputting text might
64     cause a deadlock, as the terminal emulator on the other side of STDOUT
65 root 1.2 might be stopped, logging to syslog might not work and so on.
66 root 1.1
67     The return value of the code block is ignored right now, and the function
68     doesn't yet return anything sensible.
69    
70 root 1.2 =item $token = Sys::FreezeThaw::freeze
71 root 1.1
72     Send SIGSTOP to all processes, and return a token that allows them to be
73     thawed again.
74    
75     If an error occurs, an exception will be thrown and all stopped processes
76     will automatically be thawed.
77    
78     =item Sys::FreezeThaw::thaw $token
79    
80     Take a token returned by Sys::FreezeThaw::freeze and send all processes
81 root 1.2 a C<CONT> signal, in the order required for them not to receive child STOP
82 root 1.1 notifications.
83    
84 root 1.2 =item $Sys::FreezeThaw::PARTIAL_OK
85    
86     A boolean that tells C<freeze> whether it is an error if a process cannot
87     be stopped. If false (the default), then C<freeze> will fail if there is
88     an unstoppable process. If it is true, then C<freeze> will pretend it the
89     process stopped.
90    
91 root 1.1 =cut
92    
93     # this is laughably broken, but...
94     sub yield {
95     select undef, undef, undef, 1/1000;
96     }
97    
98     # the maximum number of iterations per stop/cont etc. loop
99     # used to shield against catastrophic events (or bugs :)
100     # on current linux systems it can take an enourmous amount of
101     # time for some processes to stop, but usually it only takes
102     # one or two iterations.
103 root 1.2 sub MAX_WAIT() { 10 }
104 root 1.1
105     # return a list o fall pid's in the system,
106     # topologically sorted parent-first
107     # skips, keys %$exclude_pid, zombies and stopped processes
108     sub enum_pids($) {
109     my ($exclude_pid) = @_;
110    
111     opendir my $proc, "/proc"
112     or die "/proc: $!";
113     my @pid = sort { $b <=> $a }
114     grep /^\d+/,
115     readdir $proc;
116     closedir $proc;
117    
118     my %ppid;
119     for (@pid) {
120     next if exists $exclude_pid->{$_};
121    
122     open my $stat, "<", "/proc/$_/stat"
123     or next;
124     my ($state, $ppid, $vsize, $rss) = (split /\s+/, scalar <$stat>)[2,3,22,23];
125    
126     next if $state =~ /^[TZX]/i; # stopped, zombies, dead
127     next unless $vsize || $rss; # skip kernel threads or other nasties
128    
129     $ppid{$_} = $ppid;
130     }
131    
132     # now topologically sort by parent-id
133     my @res;
134     while (scalar %ppid) {
135     my @pass;
136    
137     for my $pid (keys %ppid) {
138     if (!exists $ppid{$ppid{$pid}}) {
139     push @pass, $pid;
140     }
141     }
142    
143     delete $ppid{$_} for @pass;
144    
145     push @res, \@pass;
146     }
147    
148     \@res
149     }
150    
151     sub process_stopped($) {
152     open my $stat, "</proc/$_[0]/stat"
153     or return 1;
154    
155     return +(split /\s+/, <$stat>)[2] =~ /^[TZX]/i;
156     }
157    
158     sub thaw($) {
159     local $@;
160    
161     my $token = shift;
162    
163     for (reverse @$token) {
164     my @pids = @$_;
165     kill CONT => @pids;
166    
167     # now wait till processes actually run again before the next round
168     for (1..MAX_WAIT) {
169     @pids = grep process_stopped $_, @pids;
170     last unless @pids;
171    
172     yield;
173     }
174     }
175     }
176    
177 root 1.2 sub freeze(;$) {
178 root 1.1 local $@;
179    
180 root 1.2 my $procs;
181 root 1.1
182     eval {
183     for (1..MAX_WAIT) {
184     my $passes = enum_pids { 1 => 1, $$ => 1 };
185     last unless @$passes;
186    
187     for (@$passes) {
188     my @pids = @$_;
189     push @procs, $_;
190     kill STOP => @pids;
191    
192     for (1..MAX_WAIT) {
193     @pids = grep !process_stopped $_, @pids;
194     last unless @pids;
195    
196     # wait till processes are really stopped
197     yield;
198     }
199    
200 root 1.2 die "unable to stop some processes: @pids" if @pids && !$PARTIAL_OK;
201 root 1.1 }
202     }
203     };
204    
205     if ($@) {
206     thaw \@procs;
207     die $@;
208     }
209    
210     \@procs
211     }
212    
213     sub freezethaw(&) {
214     my ($code) = @_;
215    
216     my $token = freeze;
217    
218     eval {
219     local $SIG{HUP} = sub { die "ERROR: caught SIGHUP while system frozen" };
220 root 1.2 local $SIG{INT} = sub { die "ERROR: caught SIGINT while system frozen" };
221     local $SIG{TERM} = sub { die "ERROR: caught SIGTERM while system frozen" };
222 root 1.1 local $SIG{PIPE} = sub { die "ERROR: caught SIGPIPE while system frozen" };
223     local $SIG{ALRM} = sub { die "ERROR: caught SIGALRM while system frozen" };
224     local $SIG{USR1} = sub { die "ERROR: caught SIGUSR1 while system frozen" };
225     local $SIG{USR2} = sub { die "ERROR: caught SIGUSR2 while system frozen" };
226    
227     $code->();
228     };
229    
230     thaw $token;
231    
232     die $@ if $@;
233    
234     ()
235     }
236    
237     1;
238    
239     =back
240    
241     =head1 BUGS
242    
243     SIGCONT is not unnoticed by processes. Some programs (such as irssi-text)
244     respond by flickering (IMHO a bug in irssi-text). Other programs might
245     have other problems, but actual problems should be rare. However, one
246     shouldn't overuse this module.
247    
248     =head1 AUTHOR
249    
250     Marc Lehmann <schmorp@schmorp.de>
251     http://home.schmorp.de/
252    
253     =cut
254