ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/bin/treescan
(Generate patch)

Comparing IO-AIO/bin/treescan (file contents):
Revision 1.15 by root, Thu Dec 13 07:34:14 2012 UTC vs.
Revision 1.21 by root, Wed Dec 30 07:45:33 2020 UTC

1#!/opt/bin/perl 1#!/opt/bin/perl
2 2
3# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie> 3# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4# about 40% faster than the original version (on my fs and raid :) 4# about 40% faster than the original version (on my fs and raid :)
5
6=head1 NAME
7
8treescan - scan directory trees, list dirs/files, stat, sync, grep
9
10=head1 SYNOPSIS
11
12 treescan [OPTION...] [PATH...]
13
14 -q, --quiet do not print list of files/directories
15 -0, --print0 use null character instead of newline to separate names
16 -s, --stat call stat on every entry, to get stat data into cache
17 -d, --dirs only list dirs
18 -f, --files only list files
19 -p, --progress regularly print progress to stderr
20 --sync open/fsync/close every entry
21 -g, --grep=RE only list files that match the given perl RegEx
22
23=head1 DESCRIPTION
24
25The F<treescan> command scans directories and their contents
26recursively. By default it lists all files and directories (with trailing
27C</>), but it can optionally do various other things.
28
29If no paths are given, F<treescan> will use C<.>, the current directory.
30
31=head2 OPTIONS
32
33=over 4
34
35=item -q, --quiet
36
37By default, F<treescan> prints the full paths of all directories or files
38it finds. This option disables printing of filenames completely. This is
39useful if you want to run F<treescan> solely for its side effects, such as
40pulling C<stat> data into memory.
41
42=item -0, --print0
43
44Instead of using newlines, use null characters after each filename. This
45is useful to avoid quoting problems when piping the result into other
46programs (for example, GNU F<grep>, F<xargs> and so on all have options to
47deal with this).
48
49=item -s, --stat
50
51Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
52which is what makes it so fast. This option forces it to C<stat> every file.
53
54This is only useful for the side effect of pulling the C<stat> data into
55the cache. If your disk cache is big enough, it will be filled with
56file meta data after F<treescan> is done, which can speed up subsequent
57commands considerably. Often, you can run F<treescan> in parallel with
58other directory-scanning programs to speed them up.
59
60=item -d, --dirs
61
62Only lists directories, not file paths. This is useful if you quickly want
63a list of directories and their subdirectories.
64
65=item -f, --files
66
67Only list files, not directories. This is useful if you want to operate on
68all files in a hierarchy, and the directories would ony get in the way.
69
70=item -p, --progress
71
72Regularly print some progress information to standard error. This is
73useful to get some progress information on long running tasks. Since
74the progress is printed to standard error, you can pipe the output of
75F<treescan> into other programs as usual.
76
77=item --sync
78
79The C<--sync> option can be used to make sure all the files/dirs in a tree
80are sync'ed to disk. For example this could be useful after unpacking an
81archive, to make sure the files hit the disk before deleting the archive
82file itself.
83
84=item -g, --grep=RE
85
86This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
87and will only print matching paths.
88
89The regular expression uses an C</s> (single line) modifier by default, so
90newlines are matched by C<.>.
91
92=back
93
94=head1 AUTHOR
95
96 Marc Lehmann <schmorp@schmorp.de>
97 http://home.schmorp.de/
98
99=cut
5 100
6use common::sense; 101use common::sense;
7use Getopt::Long; 102use Getopt::Long;
8use Time::HiRes (); 103use Time::HiRes ();
9use IO::AIO; 104use IO::AIO;
10 105
11our $VERSION = $IO::AIO::VERSION; 106our $VERSION = $IO::AIO::VERSION;
12 107
13Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version"); 108Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
14 109
15my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, 110my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
16 $opt_nofiles, $opt_grep, $opt_progress); 111 $opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
17 112
18GetOptions 113GetOptions
19 "quiet|q" => \$opt_silent, 114 "quiet|q" => \$opt_silent,
20 "print0|0" => \$opt_print0, 115 "print0|0" => \$opt_print0,
21 "stat|s" => \$opt_stat, 116 "stat|s" => \$opt_stat,
22 "dirs|d" => \$opt_nofiles, 117 "dirs|d" => \$opt_nofiles,
23 "files|f" => \$opt_nodirs, 118 "files|f" => \$opt_nodirs,
24 "grep|g=s" => \$opt_grep, 119 "grep|g=s" => \$opt_grep,
25 "progress|p" => \$opt_progress, 120 "progress|p" => \$opt_progress,
121 "sync" => \$opt_sync,
122 "help" => \$opt_help,
26 or die "Usage: try $0 --help"; 123 or die "Usage: try $0 --help";
27 124
125if ($opt_help) {
126 require Pod::Usage;
127
128 Pod::Usage::pod2usage (
129 -verbose => 1,
130 -exitval => 0,
131 );
132}
133
28@ARGV = "." unless @ARGV; 134@ARGV = "." unless @ARGV;
135
136my @todo; # list of dirs/files still left to scan
29 137
30$opt_grep &&= qr{$opt_grep}s; 138$opt_grep &&= qr{$opt_grep}s;
31 139
32my ($n_dirs, $n_files, $n_stats) = (0, 0, 0); 140my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
33my ($n_last, $n_start) = (Time::HiRes::time) x 2; 141my ($n_last, $n_start) = (Time::HiRes::time) x 2;
57 $n_last = Time::HiRes::time; 165 $n_last = Time::HiRes::time;
58 my $d = $n_last - $n_start; 166 my $d = $n_last - $n_start;
59 printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ", 167 printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
60 $n_dirs, $n_dirs / $d, 168 $n_dirs, $n_dirs / $d,
61 $n_files, $n_files / $d, 169 $n_files, $n_files / $d,
62 $n_stats, $n_stats / $d 170 $n_stats, $n_stats / $d;
63 if $opt_progress;
64 } 171 }
65 172
66 aioreq_pri -1; 173 aioreq_pri -1;
67 ++$n_dirs; 174 ++$n_dirs;
68 aio_scandir $path, 8, sub { 175 aio_scandir $path, 8, sub {
69 my ($dirs, $files) = @_ 176 my ($dirs, $files) = @_
70 or warn "$path: $!\n"; 177 or return warn "$path: $!\n";
71 178
72 printfn "", [$path] unless $opt_nodirs; 179 printfn "", [$path] unless $opt_nodirs;
73 printfn $path, $files unless $opt_nofiles; 180 printfn $path, $files unless $opt_nofiles;
74 181
75 $n_files += @$files; 182 $n_files += @$files;
81 aio_lstat [$wd, $_] for @$files; 188 aio_lstat [$wd, $_] for @$files;
82 $n_stats += @$files; 189 $n_stats += @$files;
83 }; 190 };
84 } 191 }
85 192
86 &scan ("$path$_") for @$dirs; 193 if ($opt_sync) {
194 aio_wd $path, sub {
195 my $wd = shift;
196
197 aio_pathsync [$wd, $_] for @$files;
198 aio_pathsync $wd;
199 };
200 }
201
202 push @todo, "$path$_"
203 for sort { $b cmp $a } @$dirs;
87 }; 204 };
88} 205}
89 206
90IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly 207IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
91IO::AIO::min_parallel 20; 208IO::AIO::min_parallel 20;
92 209
93for my $seed (@ARGV) { 210@todo = reverse @ARGV;
211
212while () {
213 if (@todo) {
214 my $seed = pop @todo;
94 $seed =~ s/\/+$//; 215 $seed =~ s/\/+$//;
95 aio_lstat "$seed/.", sub { 216 aio_lstat "$seed/.", sub {
96 if ($_[0]) { 217 if ($_[0]) {
97 print STDERR "$seed: $!\n"; 218 print STDERR "$seed: $!\n";
98 } elsif (-d _) { 219 } elsif (-d _) {
99 scan $seed; 220 scan $seed;
100 } else { 221 } else {
101 printfn "", $seed, "/"; 222 printfn "", $seed, "/";
223 }
102 } 224 };
225 } else {
226 IO::AIO::poll_wait;
103 }; 227 }
104}
105 228
106IO::AIO::flush; 229 last unless IO::AIO::nreqs;
107 230
231 IO::AIO::poll_cb;
232}
233

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines