1 | #!/opt/bin/perl |
1 | #!/opt/bin/perl |
2 | |
2 | |
3 | # inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie> |
3 | # inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie> |
4 | # about 40% faster than the original version (on my fs and raid :) |
4 | # about 40% faster than the original version (on my fs and raid :) |
5 | |
5 | |
6 | use strict; |
6 | =head1 NAME |
|
|
7 | |
|
|
8 | treescan - scan directory trees, list dirs/files, stat, sync, grep |
|
|
9 | |
|
|
10 | =head1 SYNOPSIS |
|
|
11 | |
|
|
12 | treescan [OPTION...] [PATH...] |
|
|
13 | |
|
|
14 | -q, --quiet do not print list of files/directories |
|
|
15 | -0, --print0 use null character instead of newline to separate names |
|
|
16 | -s, --stat call stat on every entry, to get stat data into cache |
|
|
17 | -d, --dirs only list dirs |
|
|
18 | -f, --files only list files |
|
|
19 | -p, --progress regularly print progress to stderr |
|
|
20 | --sync open/fsync/close every entry |
|
|
21 | -g, --grep=RE only list files that match the given perl RegEx |
|
|
22 | |
|
|
23 | =head1 DESCRIPTION |
|
|
24 | |
|
|
25 | The F<treescan> command scans directories and their contents |
|
|
26 | recursively. By default it lists all files and directories (with trailing |
|
|
27 | C</>), but it can optionally do various other things. |
|
|
28 | |
|
|
29 | If no paths are given, F<treescan> will use C<.>, the current directory. |
|
|
30 | |
|
|
31 | =head2 OPTIONS |
|
|
32 | |
|
|
33 | =over 4 |
|
|
34 | |
|
|
35 | =item -q, --quiet |
|
|
36 | |
|
|
37 | By default, F<treescan> prints the full paths of all directories or files |
|
|
38 | it finds. This option disables printing of filenames completely. This is |
|
|
39 | useful if you want to run F<treescan> solely for its side effects, such as |
|
|
40 | pulling C<stat> data into memory. |
|
|
41 | |
|
|
42 | =item -0, --print0 |
|
|
43 | |
|
|
44 | Instead of using newlines, use null characters after each filename. This |
|
|
45 | is useful to avoid quoting problems when piping the result into other |
|
|
46 | programs (for example, GNU F<grep>, F<xargs> and so on all have options to |
|
|
47 | deal with this). |
|
|
48 | |
|
|
49 | =item -s, --stat |
|
|
50 | |
|
|
51 | Normally, F<treescan> will use heuristics to avoid most C<stat> calls, |
|
|
52 | which is what makes it so fast. This option forces it to C<stat> every file. |
|
|
53 | |
|
|
54 | This is only useful for the side effect of pulling the C<stat> data into |
|
|
55 | the cache. If your disk cache is big enough, it will be filled with |
|
|
56 | file meta data after F<treescan> is done, which can speed up subsequent |
|
|
57 | commands considerably. Often, you can run F<treescan> in parallel with |
|
|
58 | other directory-scanning programs to speed them up. |
|
|
59 | |
|
|
60 | =item -d, --dirs |
|
|
61 | |
|
|
62 | Only lists directories, not file paths. This is useful if you quickly want |
|
|
63 | a list of directories and their subdirectories. |
|
|
64 | |
|
|
65 | =item -f, --files |
|
|
66 | |
|
|
67 | Only list files, not directories. This is useful if you want to operate on |
|
|
68 | all files in a hierarchy, and the directories would ony get in the way. |
|
|
69 | |
|
|
70 | =item -p, --progress |
|
|
71 | |
|
|
72 | Regularly print some progress information to standard error. This is |
|
|
73 | useful to get some progress information on long running tasks. Since |
|
|
74 | the progress is printed to standard error, you can pipe the output of |
|
|
75 | F<treescan> into other programs as usual. |
|
|
76 | |
|
|
77 | =item --sync |
|
|
78 | |
|
|
79 | The C<--sync> option can be used to make sure all the files/dirs in a tree |
|
|
80 | are sync'ed to disk. For example this could be useful after unpacking an |
|
|
81 | archive, to make sure the files hit the disk before deleting the archive |
|
|
82 | file itself. |
|
|
83 | |
|
|
84 | =item -g, --grep=RE |
|
|
85 | |
|
|
86 | This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed |
|
|
87 | and will only print matching paths. |
|
|
88 | |
|
|
89 | The regular expression uses an C</s> (single line) modifier by default, so |
|
|
90 | newlines are matched by C<.>. |
|
|
91 | |
|
|
92 | =back |
|
|
93 | |
|
|
94 | =head1 AUTHOR |
|
|
95 | |
|
|
96 | Marc Lehmann <schmorp@schmorp.de> |
|
|
97 | http://home.schmorp.de/ |
|
|
98 | |
|
|
99 | =cut |
|
|
100 | |
|
|
101 | use common::sense; |
7 | use Getopt::Long; |
102 | use Getopt::Long; |
|
|
103 | use Time::HiRes (); |
8 | use IO::AIO; |
104 | use IO::AIO; |
9 | |
105 | |
10 | our $VERSION = $IO::AIO::VERSION; |
106 | our $VERSION = $IO::AIO::VERSION; |
11 | |
107 | |
12 | Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version"); |
108 | Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version"); |
13 | |
109 | |
14 | my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_nofiles, $opt_grep); |
110 | my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help, |
|
|
111 | $opt_nofiles, $opt_grep, $opt_progress, $opt_sync); |
15 | |
112 | |
16 | GetOptions |
113 | GetOptions |
17 | "quiet|q" => \$opt_silent, |
114 | "quiet|q" => \$opt_silent, |
18 | "print0|0" => \$opt_print0, |
115 | "print0|0" => \$opt_print0, |
19 | "stat|s" => \$opt_stat, |
116 | "stat|s" => \$opt_stat, |
20 | "dirs|d" => \$opt_nofiles, |
117 | "dirs|d" => \$opt_nofiles, |
21 | "files|f" => \$opt_nodirs, |
118 | "files|f" => \$opt_nodirs, |
22 | "grep|g=s" => \$opt_grep, |
119 | "grep|g=s" => \$opt_grep, |
|
|
120 | "progress|p" => \$opt_progress, |
|
|
121 | "sync" => \$opt_sync, |
|
|
122 | "help" => \$opt_help, |
23 | or die "Usage: try $0 --help"; |
123 | or die "Usage: try $0 --help"; |
24 | |
124 | |
|
|
125 | if ($opt_help) { |
|
|
126 | require Pod::Usage; |
|
|
127 | |
|
|
128 | Pod::Usage::pod2usage ( |
|
|
129 | -verbose => 1, |
|
|
130 | -exitval => 0, |
|
|
131 | ); |
|
|
132 | } |
|
|
133 | |
25 | @ARGV = "." unless @ARGV; |
134 | @ARGV = "." unless @ARGV; |
26 | |
135 | |
|
|
136 | my @todo; # list of dirs/files still left to scan |
|
|
137 | |
27 | $opt_grep &&= qr{$opt_grep}s; |
138 | $opt_grep &&= qr{$opt_grep}s; |
|
|
139 | |
|
|
140 | my ($n_dirs, $n_files, $n_stats) = (0, 0, 0); |
|
|
141 | my ($n_last, $n_start) = (Time::HiRes::time) x 2; |
28 | |
142 | |
29 | sub printfn { |
143 | sub printfn { |
30 | my ($prefix, $files, $suffix) = @_; |
144 | my ($prefix, $files, $suffix) = @_; |
31 | |
145 | |
32 | if ($opt_grep) { |
146 | if ($opt_grep) { |
… | |
… | |
43 | sub scan { |
157 | sub scan { |
44 | my ($path) = @_; |
158 | my ($path) = @_; |
45 | |
159 | |
46 | $path .= "/"; |
160 | $path .= "/"; |
47 | |
161 | |
|
|
162 | IO::AIO::poll_cb; |
|
|
163 | |
|
|
164 | if ($opt_progress and $n_last + 1 < Time::HiRes::time) { |
|
|
165 | $n_last = Time::HiRes::time; |
|
|
166 | my $d = $n_last - $n_start; |
|
|
167 | printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ", |
|
|
168 | $n_dirs, $n_dirs / $d, |
|
|
169 | $n_files, $n_files / $d, |
|
|
170 | $n_stats, $n_stats / $d; |
|
|
171 | } |
|
|
172 | |
48 | aioreq_pri -1; |
173 | aioreq_pri -1; |
|
|
174 | ++$n_dirs; |
49 | aio_scandir $path, 8, sub { |
175 | aio_scandir $path, 8, sub { |
50 | my ($dirs, $files) = @_; |
176 | my ($dirs, $files) = @_ |
|
|
177 | or return warn "$path: $!\n"; |
51 | |
178 | |
52 | printfn "", [$path] unless $opt_nodirs; |
179 | printfn "", [$path] unless $opt_nodirs; |
53 | printfn $path, $files unless $opt_nofiles; |
180 | printfn $path, $files unless $opt_nofiles; |
54 | |
181 | |
|
|
182 | $n_files += @$files; |
|
|
183 | |
55 | if ($opt_stat) { |
184 | if ($opt_stat) { |
|
|
185 | aio_wd $path, sub { |
|
|
186 | my $wd = shift; |
|
|
187 | |
56 | aio_lstat "$path$_" for @$files; |
188 | aio_lstat [$wd, $_] for @$files; |
|
|
189 | $n_stats += @$files; |
|
|
190 | }; |
57 | } |
191 | } |
58 | |
192 | |
59 | &scan ("$path$_") for @$dirs; |
193 | if ($opt_sync) { |
|
|
194 | aio_wd $path, sub { |
|
|
195 | my $wd = shift; |
|
|
196 | |
|
|
197 | aio_pathsync [$wd, $_] for @$files; |
|
|
198 | aio_pathsync $wd; |
|
|
199 | }; |
|
|
200 | } |
|
|
201 | |
|
|
202 | push @todo, "$path$_" |
|
|
203 | for sort { $b cmp $a } @$dirs; |
60 | }; |
204 | }; |
61 | } |
205 | } |
62 | |
206 | |
63 | IO::AIO::max_outstanding 64; |
207 | IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly |
64 | IO::AIO::min_parallel 32; |
208 | IO::AIO::min_parallel 20; |
65 | |
209 | |
66 | for my $seed (@ARGV) { |
210 | @todo = reverse @ARGV; |
|
|
211 | |
|
|
212 | while () { |
|
|
213 | if (@todo) { |
|
|
214 | my $seed = pop @todo; |
67 | $seed =~ s/\/+$//; |
215 | $seed =~ s/\/+$//; |
68 | aio_lstat "$seed/.", sub { |
216 | aio_lstat "$seed/.", sub { |
69 | if ($_[0]) { |
217 | if ($_[0]) { |
70 | print STDERR "$seed: $!\n"; |
218 | print STDERR "$seed: $!\n"; |
71 | } elsif (-d _) { |
219 | } elsif (-d _) { |
72 | scan $seed; |
220 | scan $seed; |
73 | } else { |
221 | } else { |
74 | printfn "", $seed, "/"; |
222 | printfn "", $seed, "/"; |
|
|
223 | } |
75 | } |
224 | }; |
|
|
225 | } else { |
|
|
226 | IO::AIO::poll_wait; |
76 | }; |
227 | } |
77 | } |
|
|
78 | |
228 | |
79 | IO::AIO::flush; |
229 | last unless IO::AIO::nreqs; |
80 | |
230 | |
|
|
231 | IO::AIO::poll_cb; |
|
|
232 | } |
|
|
233 | |