ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/bin/treescan
Revision: 1.21
Committed: Wed Dec 30 07:45:33 2020 UTC (3 years, 2 months ago) by root
Branch: MAIN
CVS Tags: rel-4_81, rel-4_80, rel-4_78, rel-4_79, rel-4_75, rel-4_76, rel-4_77, HEAD
Changes since 1.20: +0 -2 lines
Log Message:
4.75

File Contents

# Content
1 #!/opt/bin/perl
2
3 # inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4 # about 40% faster than the original version (on my fs and raid :)
5
6 =head1 NAME
7
8 treescan - scan directory trees, list dirs/files, stat, sync, grep
9
10 =head1 SYNOPSIS
11
12 treescan [OPTION...] [PATH...]
13
14 -q, --quiet do not print list of files/directories
15 -0, --print0 use null character instead of newline to separate names
16 -s, --stat call stat on every entry, to get stat data into cache
17 -d, --dirs only list dirs
18 -f, --files only list files
19 -p, --progress regularly print progress to stderr
20 --sync open/fsync/close every entry
21 -g, --grep=RE only list files that match the given perl RegEx
22
23 =head1 DESCRIPTION
24
25 The F<treescan> command scans directories and their contents
26 recursively. By default it lists all files and directories (with trailing
27 C</>), but it can optionally do various other things.
28
29 If no paths are given, F<treescan> will use C<.>, the current directory.
30
31 =head2 OPTIONS
32
33 =over 4
34
35 =item -q, --quiet
36
37 By default, F<treescan> prints the full paths of all directories or files
38 it finds. This option disables printing of filenames completely. This is
39 useful if you want to run F<treescan> solely for its side effects, such as
40 pulling C<stat> data into memory.
41
42 =item -0, --print0
43
44 Instead of using newlines, use null characters after each filename. This
45 is useful to avoid quoting problems when piping the result into other
46 programs (for example, GNU F<grep>, F<xargs> and so on all have options to
47 deal with this).
48
49 =item -s, --stat
50
51 Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
52 which is what makes it so fast. This option forces it to C<stat> every file.
53
54 This is only useful for the side effect of pulling the C<stat> data into
55 the cache. If your disk cache is big enough, it will be filled with
56 file meta data after F<treescan> is done, which can speed up subsequent
57 commands considerably. Often, you can run F<treescan> in parallel with
58 other directory-scanning programs to speed them up.
59
60 =item -d, --dirs
61
62 Only lists directories, not file paths. This is useful if you quickly want
63 a list of directories and their subdirectories.
64
65 =item -f, --files
66
67 Only list files, not directories. This is useful if you want to operate on
68 all files in a hierarchy, and the directories would ony get in the way.
69
70 =item -p, --progress
71
72 Regularly print some progress information to standard error. This is
73 useful to get some progress information on long running tasks. Since
74 the progress is printed to standard error, you can pipe the output of
75 F<treescan> into other programs as usual.
76
77 =item --sync
78
79 The C<--sync> option can be used to make sure all the files/dirs in a tree
80 are sync'ed to disk. For example this could be useful after unpacking an
81 archive, to make sure the files hit the disk before deleting the archive
82 file itself.
83
84 =item -g, --grep=RE
85
86 This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
87 and will only print matching paths.
88
89 The regular expression uses an C</s> (single line) modifier by default, so
90 newlines are matched by C<.>.
91
92 =back
93
94 =head1 AUTHOR
95
96 Marc Lehmann <schmorp@schmorp.de>
97 http://home.schmorp.de/
98
99 =cut
100
101 use common::sense;
102 use Getopt::Long;
103 use Time::HiRes ();
104 use IO::AIO;
105
106 our $VERSION = $IO::AIO::VERSION;
107
108 Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
109
110 my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
111 $opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
112
113 GetOptions
114 "quiet|q" => \$opt_silent,
115 "print0|0" => \$opt_print0,
116 "stat|s" => \$opt_stat,
117 "dirs|d" => \$opt_nofiles,
118 "files|f" => \$opt_nodirs,
119 "grep|g=s" => \$opt_grep,
120 "progress|p" => \$opt_progress,
121 "sync" => \$opt_sync,
122 "help" => \$opt_help,
123 or die "Usage: try $0 --help";
124
125 if ($opt_help) {
126 require Pod::Usage;
127
128 Pod::Usage::pod2usage (
129 -verbose => 1,
130 -exitval => 0,
131 );
132 }
133
134 @ARGV = "." unless @ARGV;
135
136 my @todo; # list of dirs/files still left to scan
137
138 $opt_grep &&= qr{$opt_grep}s;
139
140 my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
141 my ($n_last, $n_start) = (Time::HiRes::time) x 2;
142
143 sub printfn {
144 my ($prefix, $files, $suffix) = @_;
145
146 if ($opt_grep) {
147 @$files = grep "$prefix$_" =~ $opt_grep, @$files;
148 }
149
150 if ($opt_print0) {
151 print map "$prefix$_$suffix\0", @$files;
152 } elsif (!$opt_silent) {
153 print map "$prefix$_$suffix\n", @$files;
154 }
155 }
156
157 sub scan {
158 my ($path) = @_;
159
160 $path .= "/";
161
162 IO::AIO::poll_cb;
163
164 if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
165 $n_last = Time::HiRes::time;
166 my $d = $n_last - $n_start;
167 printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
168 $n_dirs, $n_dirs / $d,
169 $n_files, $n_files / $d,
170 $n_stats, $n_stats / $d;
171 }
172
173 aioreq_pri -1;
174 ++$n_dirs;
175 aio_scandir $path, 8, sub {
176 my ($dirs, $files) = @_
177 or return warn "$path: $!\n";
178
179 printfn "", [$path] unless $opt_nodirs;
180 printfn $path, $files unless $opt_nofiles;
181
182 $n_files += @$files;
183
184 if ($opt_stat) {
185 aio_wd $path, sub {
186 my $wd = shift;
187
188 aio_lstat [$wd, $_] for @$files;
189 $n_stats += @$files;
190 };
191 }
192
193 if ($opt_sync) {
194 aio_wd $path, sub {
195 my $wd = shift;
196
197 aio_pathsync [$wd, $_] for @$files;
198 aio_pathsync $wd;
199 };
200 }
201
202 push @todo, "$path$_"
203 for sort { $b cmp $a } @$dirs;
204 };
205 }
206
207 IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
208 IO::AIO::min_parallel 20;
209
210 @todo = reverse @ARGV;
211
212 while () {
213 if (@todo) {
214 my $seed = pop @todo;
215 $seed =~ s/\/+$//;
216 aio_lstat "$seed/.", sub {
217 if ($_[0]) {
218 print STDERR "$seed: $!\n";
219 } elsif (-d _) {
220 scan $seed;
221 } else {
222 printfn "", $seed, "/";
223 }
224 };
225 } else {
226 IO::AIO::poll_wait;
227 }
228
229 last unless IO::AIO::nreqs;
230
231 IO::AIO::poll_cb;
232 }
233