ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/bin/treescan
Revision: 1.18
Committed: Sat Dec 3 16:33:45 2016 UTC (7 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-4_4, rel-4_5, rel-4_6, rel-4_7, rel-4_52, rel-4_53, rel-4_51, rel-4_54, rel-4_71, rel-4_72
Changes since 1.17: +6 -6 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4 # about 40% faster than the original version (on my fs and raid :)
5
6 =head1 NAME
7
8 treescan - scan directory trees, list dirs/files, stat, sync, grep
9
10 =head1 SYNOPSIS
11
12 treescan [OPTION...] [PATH...]
13
14 -q, --quiet do not print list of files/directories
15 -0, --print0 use null character instead of newline to separate names
16 -s, --stat call stat on every entry, to get stat data into cache
17 -d, --dirs only list dirs
18 -f, --files only list files
19 -p, --progress regularly print progress to stderr
20 --sync open/fsync/close every entry
21 -g, --grep=RE only list files that match the gibven perl RegEx
22
23 =head1 DESCRIPTION
24
25 The F<treescan> command scans directories and their contents
26 recursively. By default it lists all files and directories (with trailing
27 C</>), but it can optionally do various other things.
28
29 If no paths are given, F<treescan> will use C<.>, the current directory.
30
31 =head2 OPTIONS
32
33 =over 4
34
35 =item -q, --quiet
36
37 By default, F<treescan> prints the full paths of all directories or files
38 it finds. This option disables printing of filenames completely. This is
39 useful if you want to run F<treescan> solely for its side effects, such as
40 pulling C<stat> data into memory.
41
42 =item -0, --print0
43
44 Instead of using newlines, use null characters after each filename. This
45 is useful to avoid quoting problems when piping the result into other
46 programs (for example, GNU F<grep>, F<xargs> and so on all have options to
47 deal with this).
48
49 =item -s, --stat
50
51 Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
52 which is what makes it so fast. This option forces it to C<stat> every file.
53
54 This is only useful for the side effect of pulling the C<stat> data into
55 the cache. If your disk cache is big enough, it will be filled with
56 file meta data after F<treescan> is done, which can speed up subsequent
57 commands considerably. Often, you can run F<treescan> in parallel with
58 other directory-scanning programs to speed them up.
59
60 =item -d, --dirs
61
62 Only lists directories, not file paths. This is useful if you quickly want
63 a list of directories and their subdirectories.
64
65 =item -f, --files
66
67 Only list files, not directories. This is useful if you want to operate on
68 all files in a hierarchy, and the directories would ony get in the way.
69
70 =item -p, --progress
71
72 Regularly print some progress information to standard error. This is
73 useful to get some progress information on long running tasks. Since
74 the progress is printed to standard error, you can pipe the output of
75 F<treescan> into other programs as usual.
76
77 =item --sync
78
79 The C<--sync> option can be used to make sure all the files/dirs in a tree
80 are sync'ed to disk. For example this could be useful after unpacking an
81 archive, to make sure the files hit the disk before deleting the archive
82 file itself.
83
84 =item -g, --grep=RE
85
86 This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
87 and will only print matching paths.
88
89 The regular expression uses an C</s> (single line) modifier by default, so
90 newlines are matched by C<.>.
91
92 =back
93
94 =head1 AUTHOR
95
96 Marc Lehmann <schmorp@schmorp.de>
97 http://home.schmorp.de/
98
99 =cut
100
101 use common::sense;
102 use Getopt::Long;
103 use Time::HiRes ();
104 use IO::AIO;
105
106 our $VERSION = $IO::AIO::VERSION;
107
108 Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
109
110 my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
111 $opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
112
113 GetOptions
114 "quiet|q" => \$opt_silent,
115 "print0|0" => \$opt_print0,
116 "stat|s" => \$opt_stat,
117 "dirs|d" => \$opt_nofiles,
118 "files|f" => \$opt_nodirs,
119 "grep|g=s" => \$opt_grep,
120 "progress|p" => \$opt_progress,
121 "sync" => \$opt_sync,
122 "help" => \$opt_help,
123 or die "Usage: try $0 --help";
124
125 if ($opt_help) {
126 require Pod::Usage;
127
128 Pod::Usage::pod2usage (
129 -verbose => 1,
130 -exitval => 0,
131 );
132 }
133
134 @ARGV = "." unless @ARGV;
135
136 $opt_grep &&= qr{$opt_grep}s;
137
138 my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
139 my ($n_last, $n_start) = (Time::HiRes::time) x 2;
140
141 sub printfn {
142 my ($prefix, $files, $suffix) = @_;
143
144 if ($opt_grep) {
145 @$files = grep "$prefix$_" =~ $opt_grep, @$files;
146 }
147
148 if ($opt_print0) {
149 print map "$prefix$_$suffix\0", @$files;
150 } elsif (!$opt_silent) {
151 print map "$prefix$_$suffix\n", @$files;
152 }
153 }
154
155 sub scan {
156 my ($path) = @_;
157
158 $path .= "/";
159
160 IO::AIO::poll_cb;
161
162 if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
163 $n_last = Time::HiRes::time;
164 my $d = $n_last - $n_start;
165 printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
166 $n_dirs, $n_dirs / $d,
167 $n_files, $n_files / $d,
168 $n_stats, $n_stats / $d;
169 }
170
171 aioreq_pri -1;
172 ++$n_dirs;
173 aio_scandir $path, 8, sub {
174 my ($dirs, $files) = @_
175 or return warn "$path: $!\n";
176
177 printfn "", [$path] unless $opt_nodirs;
178 printfn $path, $files unless $opt_nofiles;
179
180 $n_files += @$files;
181
182 if ($opt_stat) {
183 aio_wd $path, sub {
184 my $wd = shift;
185
186 aio_lstat [$wd, $_] for @$files;
187 $n_stats += @$files;
188 };
189 }
190
191 if ($opt_sync) {
192 aio_wd $path, sub {
193 my $wd = shift;
194
195 aio_pathsync [$wd, $_] for @$files;
196 aio_pathsync $wd;
197 };
198 }
199
200 &scan ("$path$_") for @$dirs;
201 };
202 }
203
204 IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
205 IO::AIO::min_parallel 20;
206
207 for my $seed (@ARGV) {
208 $seed =~ s/\/+$//;
209 aio_lstat "$seed/.", sub {
210 if ($_[0]) {
211 print STDERR "$seed: $!\n";
212 } elsif (-d _) {
213 scan $seed;
214 } else {
215 printfn "", $seed, "/";
216 }
217 };
218 }
219
220 IO::AIO::flush;
221