ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/bin/treescan
Revision: 1.21
Committed: Wed Dec 30 07:45:33 2020 UTC (3 years, 3 months ago) by root
Branch: MAIN
CVS Tags: rel-4_81, rel-4_80, rel-4_78, rel-4_79, rel-4_75, rel-4_76, rel-4_77, HEAD
Changes since 1.20: +0 -2 lines
Log Message:
4.75

File Contents

# User Rev Content
1 root 1.1 #!/opt/bin/perl
2    
3     # inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4     # about 40% faster than the original version (on my fs and raid :)
5    
6 root 1.17 =head1 NAME
7    
8     treescan - scan directory trees, list dirs/files, stat, sync, grep
9    
10     =head1 SYNOPSIS
11    
12     treescan [OPTION...] [PATH...]
13    
14     -q, --quiet do not print list of files/directories
15     -0, --print0 use null character instead of newline to separate names
16     -s, --stat call stat on every entry, to get stat data into cache
17     -d, --dirs only list dirs
18     -f, --files only list files
19     -p, --progress regularly print progress to stderr
20     --sync open/fsync/close every entry
21 root 1.19 -g, --grep=RE only list files that match the given perl RegEx
22 root 1.17
23     =head1 DESCRIPTION
24    
25     The F<treescan> command scans directories and their contents
26     recursively. By default it lists all files and directories (with trailing
27     C</>), but it can optionally do various other things.
28    
29     If no paths are given, F<treescan> will use C<.>, the current directory.
30    
31     =head2 OPTIONS
32    
33     =over 4
34    
35     =item -q, --quiet
36    
37     By default, F<treescan> prints the full paths of all directories or files
38     it finds. This option disables printing of filenames completely. This is
39     useful if you want to run F<treescan> solely for its side effects, such as
40     pulling C<stat> data into memory.
41    
42     =item -0, --print0
43    
44     Instead of using newlines, use null characters after each filename. This
45     is useful to avoid quoting problems when piping the result into other
46     programs (for example, GNU F<grep>, F<xargs> and so on all have options to
47     deal with this).
48    
49     =item -s, --stat
50    
51     Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
52     which is what makes it so fast. This option forces it to C<stat> every file.
53    
54     This is only useful for the side effect of pulling the C<stat> data into
55 root 1.18 the cache. If your disk cache is big enough, it will be filled with
56     file meta data after F<treescan> is done, which can speed up subsequent
57     commands considerably. Often, you can run F<treescan> in parallel with
58     other directory-scanning programs to speed them up.
59 root 1.17
60     =item -d, --dirs
61    
62     Only lists directories, not file paths. This is useful if you quickly want
63     a list of directories and their subdirectories.
64    
65     =item -f, --files
66    
67 root 1.18 Only list files, not directories. This is useful if you want to operate on
68     all files in a hierarchy, and the directories would ony get in the way.
69 root 1.17
70     =item -p, --progress
71    
72     Regularly print some progress information to standard error. This is
73     useful to get some progress information on long running tasks. Since
74     the progress is printed to standard error, you can pipe the output of
75     F<treescan> into other programs as usual.
76    
77     =item --sync
78    
79     The C<--sync> option can be used to make sure all the files/dirs in a tree
80     are sync'ed to disk. For example this could be useful after unpacking an
81     archive, to make sure the files hit the disk before deleting the archive
82     file itself.
83    
84     =item -g, --grep=RE
85    
86     This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
87     and will only print matching paths.
88    
89     The regular expression uses an C</s> (single line) modifier by default, so
90     newlines are matched by C<.>.
91    
92     =back
93    
94     =head1 AUTHOR
95    
96     Marc Lehmann <schmorp@schmorp.de>
97     http://home.schmorp.de/
98    
99     =cut
100    
101 root 1.15 use common::sense;
102 root 1.1 use Getopt::Long;
103 root 1.10 use Time::HiRes ();
104 root 1.1 use IO::AIO;
105    
106 root 1.3 our $VERSION = $IO::AIO::VERSION;
107 root 1.1
108 root 1.3 Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
109    
110 root 1.17 my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
111     $opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
112 root 1.1
113     GetOptions
114 root 1.10 "quiet|q" => \$opt_silent,
115     "print0|0" => \$opt_print0,
116     "stat|s" => \$opt_stat,
117     "dirs|d" => \$opt_nofiles,
118     "files|f" => \$opt_nodirs,
119     "grep|g=s" => \$opt_grep,
120     "progress|p" => \$opt_progress,
121 root 1.17 "sync" => \$opt_sync,
122     "help" => \$opt_help,
123 root 1.3 or die "Usage: try $0 --help";
124 root 1.1
125 root 1.17 if ($opt_help) {
126     require Pod::Usage;
127    
128     Pod::Usage::pod2usage (
129     -verbose => 1,
130     -exitval => 0,
131     );
132     }
133    
134 root 1.1 @ARGV = "." unless @ARGV;
135    
136 root 1.20 my @todo; # list of dirs/files still left to scan
137    
138 root 1.5 $opt_grep &&= qr{$opt_grep}s;
139    
140 root 1.10 my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
141 root 1.13 my ($n_last, $n_start) = (Time::HiRes::time) x 2;
142 root 1.10
143 root 1.1 sub printfn {
144 root 1.2 my ($prefix, $files, $suffix) = @_;
145 root 1.1
146 root 1.5 if ($opt_grep) {
147     @$files = grep "$prefix$_" =~ $opt_grep, @$files;
148     }
149    
150 root 1.1 if ($opt_print0) {
151 root 1.2 print map "$prefix$_$suffix\0", @$files;
152 root 1.1 } elsif (!$opt_silent) {
153 root 1.2 print map "$prefix$_$suffix\n", @$files;
154 root 1.1 }
155     }
156    
157     sub scan {
158     my ($path) = @_;
159    
160 root 1.2 $path .= "/";
161    
162 root 1.9 IO::AIO::poll_cb;
163    
164 root 1.10 if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
165     $n_last = Time::HiRes::time;
166 root 1.14 my $d = $n_last - $n_start;
167     printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
168     $n_dirs, $n_dirs / $d,
169     $n_files, $n_files / $d,
170 root 1.17 $n_stats, $n_stats / $d;
171 root 1.10 }
172    
173 root 1.2 aioreq_pri -1;
174 root 1.10 ++$n_dirs;
175 root 1.1 aio_scandir $path, 8, sub {
176 root 1.9 my ($dirs, $files) = @_
177 root 1.16 or return warn "$path: $!\n";
178 root 1.1
179 root 1.3 printfn "", [$path] unless $opt_nodirs;
180     printfn $path, $files unless $opt_nofiles;
181 root 1.2
182 root 1.10 $n_files += @$files;
183    
184 root 1.1 if ($opt_stat) {
185 root 1.6 aio_wd $path, sub {
186     my $wd = shift;
187    
188     aio_lstat [$wd, $_] for @$files;
189 root 1.10 $n_stats += @$files;
190 root 1.6 };
191 root 1.1 }
192    
193 root 1.17 if ($opt_sync) {
194     aio_wd $path, sub {
195     my $wd = shift;
196    
197     aio_pathsync [$wd, $_] for @$files;
198     aio_pathsync $wd;
199     };
200     }
201    
202 root 1.20 push @todo, "$path$_"
203     for sort { $b cmp $a } @$dirs;
204 root 1.1 };
205     }
206    
207 root 1.9 IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
208 root 1.7 IO::AIO::min_parallel 20;
209 root 1.1
210 root 1.20 @todo = reverse @ARGV;
211    
212     while () {
213     if (@todo) {
214     my $seed = pop @todo;
215     $seed =~ s/\/+$//;
216     aio_lstat "$seed/.", sub {
217     if ($_[0]) {
218     print STDERR "$seed: $!\n";
219     } elsif (-d _) {
220     scan $seed;
221     } else {
222     printfn "", $seed, "/";
223     }
224     };
225     } else {
226     IO::AIO::poll_wait;
227     }
228    
229     last unless IO::AIO::nreqs;
230    
231     IO::AIO::poll_cb;
232 root 1.1 }
233