ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/bin/treescan
Revision: 1.19
Committed: Fri Aug 23 07:20:21 2019 UTC (4 years, 8 months ago) by root
Branch: MAIN
CVS Tags: rel-4_73
Changes since 1.18: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #!/opt/bin/perl
2    
3     # inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4     # about 40% faster than the original version (on my fs and raid :)
5    
6 root 1.17 =head1 NAME
7    
8     treescan - scan directory trees, list dirs/files, stat, sync, grep
9    
10     =head1 SYNOPSIS
11    
12     treescan [OPTION...] [PATH...]
13    
14     -q, --quiet do not print list of files/directories
15     -0, --print0 use null character instead of newline to separate names
16     -s, --stat call stat on every entry, to get stat data into cache
17     -d, --dirs only list dirs
18     -f, --files only list files
19     -p, --progress regularly print progress to stderr
20     --sync open/fsync/close every entry
21 root 1.19 -g, --grep=RE only list files that match the given perl RegEx
22 root 1.17
23     =head1 DESCRIPTION
24    
25     The F<treescan> command scans directories and their contents
26     recursively. By default it lists all files and directories (with trailing
27     C</>), but it can optionally do various other things.
28    
29     If no paths are given, F<treescan> will use C<.>, the current directory.
30    
31     =head2 OPTIONS
32    
33     =over 4
34    
35     =item -q, --quiet
36    
37     By default, F<treescan> prints the full paths of all directories or files
38     it finds. This option disables printing of filenames completely. This is
39     useful if you want to run F<treescan> solely for its side effects, such as
40     pulling C<stat> data into memory.
41    
42     =item -0, --print0
43    
44     Instead of using newlines, use null characters after each filename. This
45     is useful to avoid quoting problems when piping the result into other
46     programs (for example, GNU F<grep>, F<xargs> and so on all have options to
47     deal with this).
48    
49     =item -s, --stat
50    
51     Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
52     which is what makes it so fast. This option forces it to C<stat> every file.
53    
54     This is only useful for the side effect of pulling the C<stat> data into
55 root 1.18 the cache. If your disk cache is big enough, it will be filled with
56     file meta data after F<treescan> is done, which can speed up subsequent
57     commands considerably. Often, you can run F<treescan> in parallel with
58     other directory-scanning programs to speed them up.
59 root 1.17
60     =item -d, --dirs
61    
62     Only lists directories, not file paths. This is useful if you quickly want
63     a list of directories and their subdirectories.
64    
65     =item -f, --files
66    
67 root 1.18 Only list files, not directories. This is useful if you want to operate on
68     all files in a hierarchy, and the directories would ony get in the way.
69 root 1.17
70     =item -p, --progress
71    
72     Regularly print some progress information to standard error. This is
73     useful to get some progress information on long running tasks. Since
74     the progress is printed to standard error, you can pipe the output of
75     F<treescan> into other programs as usual.
76    
77     =item --sync
78    
79     The C<--sync> option can be used to make sure all the files/dirs in a tree
80     are sync'ed to disk. For example this could be useful after unpacking an
81     archive, to make sure the files hit the disk before deleting the archive
82     file itself.
83    
84     =item -g, --grep=RE
85    
86     This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
87     and will only print matching paths.
88    
89     The regular expression uses an C</s> (single line) modifier by default, so
90     newlines are matched by C<.>.
91    
92     =back
93    
94     =head1 AUTHOR
95    
96     Marc Lehmann <schmorp@schmorp.de>
97     http://home.schmorp.de/
98    
99     =cut
100    
101 root 1.15 use common::sense;
102 root 1.1 use Getopt::Long;
103 root 1.10 use Time::HiRes ();
104 root 1.1 use IO::AIO;
105    
106 root 1.3 our $VERSION = $IO::AIO::VERSION;
107 root 1.1
108 root 1.3 Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
109    
110 root 1.17 my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
111     $opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
112 root 1.1
113     GetOptions
114 root 1.10 "quiet|q" => \$opt_silent,
115     "print0|0" => \$opt_print0,
116     "stat|s" => \$opt_stat,
117     "dirs|d" => \$opt_nofiles,
118     "files|f" => \$opt_nodirs,
119     "grep|g=s" => \$opt_grep,
120     "progress|p" => \$opt_progress,
121 root 1.17 "sync" => \$opt_sync,
122     "help" => \$opt_help,
123 root 1.3 or die "Usage: try $0 --help";
124 root 1.1
125 root 1.17 if ($opt_help) {
126     require Pod::Usage;
127    
128     Pod::Usage::pod2usage (
129     -verbose => 1,
130     -exitval => 0,
131     );
132     }
133    
134 root 1.1 @ARGV = "." unless @ARGV;
135    
136 root 1.5 $opt_grep &&= qr{$opt_grep}s;
137    
138 root 1.10 my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
139 root 1.13 my ($n_last, $n_start) = (Time::HiRes::time) x 2;
140 root 1.10
141 root 1.1 sub printfn {
142 root 1.2 my ($prefix, $files, $suffix) = @_;
143 root 1.1
144 root 1.5 if ($opt_grep) {
145     @$files = grep "$prefix$_" =~ $opt_grep, @$files;
146     }
147    
148 root 1.1 if ($opt_print0) {
149 root 1.2 print map "$prefix$_$suffix\0", @$files;
150 root 1.1 } elsif (!$opt_silent) {
151 root 1.2 print map "$prefix$_$suffix\n", @$files;
152 root 1.1 }
153     }
154    
155     sub scan {
156     my ($path) = @_;
157    
158 root 1.2 $path .= "/";
159    
160 root 1.9 IO::AIO::poll_cb;
161    
162 root 1.10 if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
163     $n_last = Time::HiRes::time;
164 root 1.14 my $d = $n_last - $n_start;
165     printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
166     $n_dirs, $n_dirs / $d,
167     $n_files, $n_files / $d,
168 root 1.17 $n_stats, $n_stats / $d;
169 root 1.10 }
170    
171 root 1.2 aioreq_pri -1;
172 root 1.10 ++$n_dirs;
173 root 1.1 aio_scandir $path, 8, sub {
174 root 1.9 my ($dirs, $files) = @_
175 root 1.16 or return warn "$path: $!\n";
176 root 1.1
177 root 1.3 printfn "", [$path] unless $opt_nodirs;
178     printfn $path, $files unless $opt_nofiles;
179 root 1.2
180 root 1.10 $n_files += @$files;
181    
182 root 1.1 if ($opt_stat) {
183 root 1.6 aio_wd $path, sub {
184     my $wd = shift;
185    
186     aio_lstat [$wd, $_] for @$files;
187 root 1.10 $n_stats += @$files;
188 root 1.6 };
189 root 1.1 }
190    
191 root 1.17 if ($opt_sync) {
192     aio_wd $path, sub {
193     my $wd = shift;
194    
195     aio_pathsync [$wd, $_] for @$files;
196     aio_pathsync $wd;
197     };
198     }
199    
200 root 1.2 &scan ("$path$_") for @$dirs;
201 root 1.1 };
202     }
203    
204 root 1.9 IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
205 root 1.7 IO::AIO::min_parallel 20;
206 root 1.1
207     for my $seed (@ARGV) {
208     $seed =~ s/\/+$//;
209     aio_lstat "$seed/.", sub {
210 root 1.4 if ($_[0]) {
211     print STDERR "$seed: $!\n";
212     } elsif (-d _) {
213 root 1.2 scan $seed;
214     } else {
215     printfn "", $seed, "/";
216     }
217 root 1.1 };
218     }
219    
220     IO::AIO::flush;
221