ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/bin/treescan
(Generate patch)

Comparing IO-AIO/bin/treescan (file contents):
Revision 1.9 by root, Fri Sep 30 00:23:44 2011 UTC vs.
Revision 1.21 by root, Wed Dec 30 07:45:33 2020 UTC

1#!/opt/bin/perl 1#!/opt/bin/perl
2 2
3# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie> 3# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4# about 40% faster than the original version (on my fs and raid :) 4# about 40% faster than the original version (on my fs and raid :)
5 5
6use strict; 6=head1 NAME
7
8treescan - scan directory trees, list dirs/files, stat, sync, grep
9
10=head1 SYNOPSIS
11
12 treescan [OPTION...] [PATH...]
13
14 -q, --quiet do not print list of files/directories
15 -0, --print0 use null character instead of newline to separate names
16 -s, --stat call stat on every entry, to get stat data into cache
17 -d, --dirs only list dirs
18 -f, --files only list files
19 -p, --progress regularly print progress to stderr
20 --sync open/fsync/close every entry
21 -g, --grep=RE only list files that match the given perl RegEx
22
23=head1 DESCRIPTION
24
25The F<treescan> command scans directories and their contents
26recursively. By default it lists all files and directories (with trailing
27C</>), but it can optionally do various other things.
28
29If no paths are given, F<treescan> will use C<.>, the current directory.
30
31=head2 OPTIONS
32
33=over 4
34
35=item -q, --quiet
36
37By default, F<treescan> prints the full paths of all directories or files
38it finds. This option disables printing of filenames completely. This is
39useful if you want to run F<treescan> solely for its side effects, such as
40pulling C<stat> data into memory.
41
42=item -0, --print0
43
44Instead of using newlines, use null characters after each filename. This
45is useful to avoid quoting problems when piping the result into other
46programs (for example, GNU F<grep>, F<xargs> and so on all have options to
47deal with this).
48
49=item -s, --stat
50
51Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
52which is what makes it so fast. This option forces it to C<stat> every file.
53
54This is only useful for the side effect of pulling the C<stat> data into
55the cache. If your disk cache is big enough, it will be filled with
56file meta data after F<treescan> is done, which can speed up subsequent
57commands considerably. Often, you can run F<treescan> in parallel with
58other directory-scanning programs to speed them up.
59
60=item -d, --dirs
61
62Only lists directories, not file paths. This is useful if you quickly want
63a list of directories and their subdirectories.
64
65=item -f, --files
66
67Only list files, not directories. This is useful if you want to operate on
68all files in a hierarchy, and the directories would ony get in the way.
69
70=item -p, --progress
71
72Regularly print some progress information to standard error. This is
73useful to get some progress information on long running tasks. Since
74the progress is printed to standard error, you can pipe the output of
75F<treescan> into other programs as usual.
76
77=item --sync
78
79The C<--sync> option can be used to make sure all the files/dirs in a tree
80are sync'ed to disk. For example this could be useful after unpacking an
81archive, to make sure the files hit the disk before deleting the archive
82file itself.
83
84=item -g, --grep=RE
85
86This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
87and will only print matching paths.
88
89The regular expression uses an C</s> (single line) modifier by default, so
90newlines are matched by C<.>.
91
92=back
93
94=head1 AUTHOR
95
96 Marc Lehmann <schmorp@schmorp.de>
97 http://home.schmorp.de/
98
99=cut
100
101use common::sense;
7use Getopt::Long; 102use Getopt::Long;
103use Time::HiRes ();
8use IO::AIO; 104use IO::AIO;
9 105
10our $VERSION = $IO::AIO::VERSION; 106our $VERSION = $IO::AIO::VERSION;
11 107
12Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version"); 108Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
13 109
14my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_nofiles, $opt_grep); 110my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
111 $opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
15 112
16GetOptions 113GetOptions
17 "quiet|q" => \$opt_silent, 114 "quiet|q" => \$opt_silent,
18 "print0|0" => \$opt_print0, 115 "print0|0" => \$opt_print0,
19 "stat|s" => \$opt_stat, 116 "stat|s" => \$opt_stat,
20 "dirs|d" => \$opt_nofiles, 117 "dirs|d" => \$opt_nofiles,
21 "files|f" => \$opt_nodirs, 118 "files|f" => \$opt_nodirs,
22 "grep|g=s" => \$opt_grep, 119 "grep|g=s" => \$opt_grep,
120 "progress|p" => \$opt_progress,
121 "sync" => \$opt_sync,
122 "help" => \$opt_help,
23 or die "Usage: try $0 --help"; 123 or die "Usage: try $0 --help";
24 124
125if ($opt_help) {
126 require Pod::Usage;
127
128 Pod::Usage::pod2usage (
129 -verbose => 1,
130 -exitval => 0,
131 );
132}
133
25@ARGV = "." unless @ARGV; 134@ARGV = "." unless @ARGV;
26 135
136my @todo; # list of dirs/files still left to scan
137
27$opt_grep &&= qr{$opt_grep}s; 138$opt_grep &&= qr{$opt_grep}s;
139
140my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
141my ($n_last, $n_start) = (Time::HiRes::time) x 2;
28 142
29sub printfn { 143sub printfn {
30 my ($prefix, $files, $suffix) = @_; 144 my ($prefix, $files, $suffix) = @_;
31 145
32 if ($opt_grep) { 146 if ($opt_grep) {
45 159
46 $path .= "/"; 160 $path .= "/";
47 161
48 IO::AIO::poll_cb; 162 IO::AIO::poll_cb;
49 163
164 if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
165 $n_last = Time::HiRes::time;
166 my $d = $n_last - $n_start;
167 printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
168 $n_dirs, $n_dirs / $d,
169 $n_files, $n_files / $d,
170 $n_stats, $n_stats / $d;
171 }
172
50 aioreq_pri -1; 173 aioreq_pri -1;
174 ++$n_dirs;
51 aio_scandir $path, 8, sub { 175 aio_scandir $path, 8, sub {
52 my ($dirs, $files) = @_ 176 my ($dirs, $files) = @_
53 or warn "$path: $!\n"; 177 or return warn "$path: $!\n";
54 178
55 printfn "", [$path] unless $opt_nodirs; 179 printfn "", [$path] unless $opt_nodirs;
56 printfn $path, $files unless $opt_nofiles; 180 printfn $path, $files unless $opt_nofiles;
181
182 $n_files += @$files;
57 183
58 if ($opt_stat) { 184 if ($opt_stat) {
59 aio_wd $path, sub { 185 aio_wd $path, sub {
60 my $wd = shift; 186 my $wd = shift;
61 187
62 aio_lstat [$wd, $_] for @$files; 188 aio_lstat [$wd, $_] for @$files;
189 $n_stats += @$files;
63 }; 190 };
64 } 191 }
65 192
66 &scan ("$path$_") for @$dirs; 193 if ($opt_sync) {
194 aio_wd $path, sub {
195 my $wd = shift;
196
197 aio_pathsync [$wd, $_] for @$files;
198 aio_pathsync $wd;
199 };
200 }
201
202 push @todo, "$path$_"
203 for sort { $b cmp $a } @$dirs;
67 }; 204 };
68} 205}
69 206
70IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly 207IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
71IO::AIO::min_parallel 20; 208IO::AIO::min_parallel 20;
72 209
73for my $seed (@ARGV) { 210@todo = reverse @ARGV;
211
212while () {
213 if (@todo) {
214 my $seed = pop @todo;
74 $seed =~ s/\/+$//; 215 $seed =~ s/\/+$//;
75 aio_lstat "$seed/.", sub { 216 aio_lstat "$seed/.", sub {
76 if ($_[0]) { 217 if ($_[0]) {
77 print STDERR "$seed: $!\n"; 218 print STDERR "$seed: $!\n";
78 } elsif (-d _) { 219 } elsif (-d _) {
79 scan $seed; 220 scan $seed;
80 } else { 221 } else {
81 printfn "", $seed, "/"; 222 printfn "", $seed, "/";
223 }
82 } 224 };
225 } else {
226 IO::AIO::poll_wait;
83 }; 227 }
84}
85 228
86IO::AIO::flush; 229 last unless IO::AIO::nreqs;
87 230
231 IO::AIO::poll_cb;
232}
233

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines