1 |
root |
1.1 |
#!/opt/bin/perl |
2 |
|
|
|
3 |
|
|
# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie> |
4 |
|
|
# about 40% faster than the original version (on my fs and raid :) |
5 |
|
|
|
6 |
root |
1.17 |
=head1 NAME |
7 |
|
|
|
8 |
|
|
treescan - scan directory trees, list dirs/files, stat, sync, grep |
9 |
|
|
|
10 |
|
|
=head1 SYNOPSIS |
11 |
|
|
|
12 |
|
|
treescan [OPTION...] [PATH...] |
13 |
|
|
|
14 |
|
|
-q, --quiet do not print list of files/directories |
15 |
|
|
-0, --print0 use null character instead of newline to separate names |
16 |
|
|
-s, --stat call stat on every entry, to get stat data into cache |
17 |
|
|
-d, --dirs only list dirs |
18 |
|
|
-f, --files only list files |
19 |
|
|
-p, --progress regularly print progress to stderr |
20 |
|
|
--sync open/fsync/close every entry |
21 |
|
|
-g, --grep=RE only list files that match the gibven perl RegEx |
22 |
|
|
|
23 |
|
|
=head1 DESCRIPTION |
24 |
|
|
|
25 |
|
|
The F<treescan> command scans directories and their contents |
26 |
|
|
recursively. By default it lists all files and directories (with trailing |
27 |
|
|
C</>), but it can optionally do various other things. |
28 |
|
|
|
29 |
|
|
If no paths are given, F<treescan> will use C<.>, the current directory. |
30 |
|
|
|
31 |
|
|
=head2 OPTIONS |
32 |
|
|
|
33 |
|
|
=over 4 |
34 |
|
|
|
35 |
|
|
=item -q, --quiet |
36 |
|
|
|
37 |
|
|
By default, F<treescan> prints the full paths of all directories or files |
38 |
|
|
it finds. This option disables printing of filenames completely. This is |
39 |
|
|
useful if you want to run F<treescan> solely for its side effects, such as |
40 |
|
|
pulling C<stat> data into memory. |
41 |
|
|
|
42 |
|
|
=item -0, --print0 |
43 |
|
|
|
44 |
|
|
Instead of using newlines, use null characters after each filename. This |
45 |
|
|
is useful to avoid quoting problems when piping the result into other |
46 |
|
|
programs (for example, GNU F<grep>, F<xargs> and so on all have options to |
47 |
|
|
deal with this). |
48 |
|
|
|
49 |
|
|
=item -s, --stat |
50 |
|
|
|
51 |
|
|
Normally, F<treescan> will use heuristics to avoid most C<stat> calls, |
52 |
|
|
which is what makes it so fast. This option forces it to C<stat> every file. |
53 |
|
|
|
54 |
|
|
This is only useful for the side effect of pulling the C<stat> data into |
55 |
|
|
the cache. If your disk cache is big enough, it will be filled with file |
56 |
|
|
metadata after F<treescan> is done, which can speed up subsequent commands |
57 |
|
|
considerably. Often, you can run F<treescan> in parallel with other |
58 |
|
|
directory-scanning programs to speed them up. |
59 |
|
|
|
60 |
|
|
=item -d, --dirs |
61 |
|
|
|
62 |
|
|
Only lists directories, not file paths. This is useful if you quickly want |
63 |
|
|
a list of directories and their subdirectories. |
64 |
|
|
|
65 |
|
|
=item -f, --files |
66 |
|
|
|
67 |
|
|
Only list files, not directories. This is useful if you want to coperate |
68 |
|
|
on all files in a hierarchy, and the directories would ony get in the way. |
69 |
|
|
|
70 |
|
|
=item -p, --progress |
71 |
|
|
|
72 |
|
|
Regularly print some progress information to standard error. This is |
73 |
|
|
useful to get some progress information on long running tasks. Since |
74 |
|
|
the progress is printed to standard error, you can pipe the output of |
75 |
|
|
F<treescan> into other programs as usual. |
76 |
|
|
|
77 |
|
|
=item --sync |
78 |
|
|
|
79 |
|
|
The C<--sync> option can be used to make sure all the files/dirs in a tree |
80 |
|
|
are sync'ed to disk. For example this could be useful after unpacking an |
81 |
|
|
archive, to make sure the files hit the disk before deleting the archive |
82 |
|
|
file itself. |
83 |
|
|
|
84 |
|
|
=item -g, --grep=RE |
85 |
|
|
|
86 |
|
|
This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed |
87 |
|
|
and will only print matching paths. |
88 |
|
|
|
89 |
|
|
The regular expression uses an C</s> (single line) modifier by default, so |
90 |
|
|
newlines are matched by C<.>. |
91 |
|
|
|
92 |
|
|
=back |
93 |
|
|
|
94 |
|
|
=head1 AUTHOR |
95 |
|
|
|
96 |
|
|
Marc Lehmann <schmorp@schmorp.de> |
97 |
|
|
http://home.schmorp.de/ |
98 |
|
|
|
99 |
|
|
=cut |
100 |
|
|
|
101 |
root |
1.15 |
use common::sense; |
102 |
root |
1.1 |
use Getopt::Long; |
103 |
root |
1.10 |
use Time::HiRes (); |
104 |
root |
1.1 |
use IO::AIO; |
105 |
|
|
|
106 |
root |
1.3 |
our $VERSION = $IO::AIO::VERSION; |
107 |
root |
1.1 |
|
108 |
root |
1.3 |
Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version"); |
109 |
|
|
|
110 |
root |
1.17 |
my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help, |
111 |
|
|
$opt_nofiles, $opt_grep, $opt_progress, $opt_sync); |
112 |
root |
1.1 |
|
113 |
|
|
GetOptions |
114 |
root |
1.10 |
"quiet|q" => \$opt_silent, |
115 |
|
|
"print0|0" => \$opt_print0, |
116 |
|
|
"stat|s" => \$opt_stat, |
117 |
|
|
"dirs|d" => \$opt_nofiles, |
118 |
|
|
"files|f" => \$opt_nodirs, |
119 |
|
|
"grep|g=s" => \$opt_grep, |
120 |
|
|
"progress|p" => \$opt_progress, |
121 |
root |
1.17 |
"sync" => \$opt_sync, |
122 |
|
|
"help" => \$opt_help, |
123 |
root |
1.3 |
or die "Usage: try $0 --help"; |
124 |
root |
1.1 |
|
125 |
root |
1.17 |
if ($opt_help) { |
126 |
|
|
require Pod::Usage; |
127 |
|
|
|
128 |
|
|
Pod::Usage::pod2usage ( |
129 |
|
|
-verbose => 1, |
130 |
|
|
-exitval => 0, |
131 |
|
|
); |
132 |
|
|
} |
133 |
|
|
|
134 |
root |
1.1 |
@ARGV = "." unless @ARGV; |
135 |
|
|
|
136 |
root |
1.5 |
$opt_grep &&= qr{$opt_grep}s; |
137 |
|
|
|
138 |
root |
1.10 |
my ($n_dirs, $n_files, $n_stats) = (0, 0, 0); |
139 |
root |
1.13 |
my ($n_last, $n_start) = (Time::HiRes::time) x 2; |
140 |
root |
1.10 |
|
141 |
root |
1.1 |
sub printfn { |
142 |
root |
1.2 |
my ($prefix, $files, $suffix) = @_; |
143 |
root |
1.1 |
|
144 |
root |
1.5 |
if ($opt_grep) { |
145 |
|
|
@$files = grep "$prefix$_" =~ $opt_grep, @$files; |
146 |
|
|
} |
147 |
|
|
|
148 |
root |
1.1 |
if ($opt_print0) { |
149 |
root |
1.2 |
print map "$prefix$_$suffix\0", @$files; |
150 |
root |
1.1 |
} elsif (!$opt_silent) { |
151 |
root |
1.2 |
print map "$prefix$_$suffix\n", @$files; |
152 |
root |
1.1 |
} |
153 |
|
|
} |
154 |
|
|
|
155 |
|
|
sub scan { |
156 |
|
|
my ($path) = @_; |
157 |
|
|
|
158 |
root |
1.2 |
$path .= "/"; |
159 |
|
|
|
160 |
root |
1.9 |
IO::AIO::poll_cb; |
161 |
|
|
|
162 |
root |
1.10 |
if ($opt_progress and $n_last + 1 < Time::HiRes::time) { |
163 |
|
|
$n_last = Time::HiRes::time; |
164 |
root |
1.14 |
my $d = $n_last - $n_start; |
165 |
|
|
printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ", |
166 |
|
|
$n_dirs, $n_dirs / $d, |
167 |
|
|
$n_files, $n_files / $d, |
168 |
root |
1.17 |
$n_stats, $n_stats / $d; |
169 |
root |
1.10 |
} |
170 |
|
|
|
171 |
root |
1.2 |
aioreq_pri -1; |
172 |
root |
1.10 |
++$n_dirs; |
173 |
root |
1.1 |
aio_scandir $path, 8, sub { |
174 |
root |
1.9 |
my ($dirs, $files) = @_ |
175 |
root |
1.16 |
or return warn "$path: $!\n"; |
176 |
root |
1.1 |
|
177 |
root |
1.3 |
printfn "", [$path] unless $opt_nodirs; |
178 |
|
|
printfn $path, $files unless $opt_nofiles; |
179 |
root |
1.2 |
|
180 |
root |
1.10 |
$n_files += @$files; |
181 |
|
|
|
182 |
root |
1.1 |
if ($opt_stat) { |
183 |
root |
1.6 |
aio_wd $path, sub { |
184 |
|
|
my $wd = shift; |
185 |
|
|
|
186 |
|
|
aio_lstat [$wd, $_] for @$files; |
187 |
root |
1.10 |
$n_stats += @$files; |
188 |
root |
1.6 |
}; |
189 |
root |
1.1 |
} |
190 |
|
|
|
191 |
root |
1.17 |
if ($opt_sync) { |
192 |
|
|
aio_wd $path, sub { |
193 |
|
|
my $wd = shift; |
194 |
|
|
|
195 |
|
|
aio_pathsync [$wd, $_] for @$files; |
196 |
|
|
aio_pathsync $wd; |
197 |
|
|
}; |
198 |
|
|
} |
199 |
|
|
|
200 |
root |
1.2 |
&scan ("$path$_") for @$dirs; |
201 |
root |
1.1 |
}; |
202 |
|
|
} |
203 |
|
|
|
204 |
root |
1.9 |
IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly |
205 |
root |
1.7 |
IO::AIO::min_parallel 20; |
206 |
root |
1.1 |
|
207 |
|
|
for my $seed (@ARGV) { |
208 |
|
|
$seed =~ s/\/+$//; |
209 |
|
|
aio_lstat "$seed/.", sub { |
210 |
root |
1.4 |
if ($_[0]) { |
211 |
|
|
print STDERR "$seed: $!\n"; |
212 |
|
|
} elsif (-d _) { |
213 |
root |
1.2 |
scan $seed; |
214 |
|
|
} else { |
215 |
|
|
printfn "", $seed, "/"; |
216 |
|
|
} |
217 |
root |
1.1 |
}; |
218 |
|
|
} |
219 |
|
|
|
220 |
|
|
IO::AIO::flush; |
221 |
|
|
|