#!/opt/bin/perl # inspired by treescan by Jamie Lokier # about 40% faster than the original version (on my fs and raid :) =head1 NAME treescan - scan directory trees, list dirs/files, stat, sync, grep =head1 SYNOPSIS treescan [OPTION...] [PATH...] -q, --quiet do not print list of files/directories -0, --print0 use null character instead of newline to separate names -s, --stat call stat on every entry, to get stat data into cache -d, --dirs only list dirs -f, --files only list files -p, --progress regularly print progress to stderr --sync open/fsync/close every entry -g, --grep=RE only list files that match the given perl RegEx =head1 DESCRIPTION The F command scans directories and their contents recursively. By default it lists all files and directories (with trailing C), but it can optionally do various other things. If no paths are given, F will use C<.>, the current directory. =head2 OPTIONS =over 4 =item -q, --quiet By default, F prints the full paths of all directories or files it finds. This option disables printing of filenames completely. This is useful if you want to run F solely for its side effects, such as pulling C data into memory. =item -0, --print0 Instead of using newlines, use null characters after each filename. This is useful to avoid quoting problems when piping the result into other programs (for example, GNU F, F and so on all have options to deal with this). =item -s, --stat Normally, F will use heuristics to avoid most C calls, which is what makes it so fast. This option forces it to C every file. This is only useful for the side effect of pulling the C data into the cache. If your disk cache is big enough, it will be filled with file meta data after F is done, which can speed up subsequent commands considerably. Often, you can run F in parallel with other directory-scanning programs to speed them up. =item -d, --dirs Only lists directories, not file paths. This is useful if you quickly want a list of directories and their subdirectories. =item -f, --files Only list files, not directories. This is useful if you want to operate on all files in a hierarchy, and the directories would ony get in the way. =item -p, --progress Regularly print some progress information to standard error. This is useful to get some progress information on long running tasks. Since the progress is printed to standard error, you can pipe the output of F into other programs as usual. =item --sync The C<--sync> option can be used to make sure all the files/dirs in a tree are sync'ed to disk. For example this could be useful after unpacking an archive, to make sure the files hit the disk before deleting the archive file itself. =item -g, --grep=RE This applies a perl regular expression (see the L manpage) to all paths that would normally be printed and will only print matching paths. The regular expression uses an C (single line) modifier by default, so newlines are matched by C<.>. =back =head1 AUTHOR Marc Lehmann http://home.schmorp.de/ =cut use common::sense; use Getopt::Long; use Time::HiRes (); use IO::AIO; our $VERSION = $IO::AIO::VERSION; Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version"); my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help, $opt_nofiles, $opt_grep, $opt_progress, $opt_sync); GetOptions "quiet|q" => \$opt_silent, "print0|0" => \$opt_print0, "stat|s" => \$opt_stat, "dirs|d" => \$opt_nofiles, "files|f" => \$opt_nodirs, "grep|g=s" => \$opt_grep, "progress|p" => \$opt_progress, "sync" => \$opt_sync, "help" => \$opt_help, or die "Usage: try $0 --help"; if ($opt_help) { require Pod::Usage; Pod::Usage::pod2usage ( -verbose => 1, -exitval => 0, ); } @ARGV = "." unless @ARGV; my @todo; # list of dirs/files still left to scan $opt_grep &&= qr{$opt_grep}s; my ($n_dirs, $n_files, $n_stats) = (0, 0, 0); my ($n_last, $n_start) = (Time::HiRes::time) x 2; sub printfn { my ($prefix, $files, $suffix) = @_; if ($opt_grep) { @$files = grep "$prefix$_" =~ $opt_grep, @$files; } if ($opt_print0) { print map "$prefix$_$suffix\0", @$files; } elsif (!$opt_silent) { print map "$prefix$_$suffix\n", @$files; } } sub scan { my ($path) = @_; $path .= "/"; IO::AIO::poll_cb; if ($opt_progress and $n_last + 1 < Time::HiRes::time) { $n_last = Time::HiRes::time; my $d = $n_last - $n_start; printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ", $n_dirs, $n_dirs / $d, $n_files, $n_files / $d, $n_stats, $n_stats / $d; } aioreq_pri -1; ++$n_dirs; aio_scandir $path, 8, sub { my ($dirs, $files) = @_ or return warn "$path: $!\n"; printfn "", [$path] unless $opt_nodirs; printfn $path, $files unless $opt_nofiles; $n_files += @$files; if ($opt_stat) { aio_wd $path, sub { my $wd = shift; aio_lstat [$wd, $_] for @$files; $n_stats += @$files; }; } if ($opt_sync) { aio_wd $path, sub { my $wd = shift; aio_pathsync [$wd, $_] for @$files; aio_pathsync $wd; }; } push @todo, "$path$_" for sort { $b cmp $a } @$dirs; }; } IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly IO::AIO::min_parallel 20; @todo = reverse @ARGV; while () { if (@todo) { my $seed = pop @todo; $seed =~ s/\/+$//; aio_lstat "$seed/.", sub { if ($_[0]) { print STDERR "$seed: $!\n"; } elsif (-d _) { scan $seed; } else { printfn "", $seed, "/"; } }; } else { IO::AIO::poll_wait; } last unless IO::AIO::nreqs; IO::AIO::poll_cb; }