[ViewVC] Diff of: cvs/IO-AIO/bin/treescan

Comparing IO-AIO/bin/treescan (file contents):
Revision 1.5 by root, Sun Jan 9 05:10:28 2011 UTC vs.
Revision 1.21 by root, Wed Dec 30 07:45:33 2020 UTC

…		…
1	#!/opt/bin/perl	1	#!/opt/bin/perl
2		2
3	# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>	3	# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4	# about 40% faster than the original version (on my fs and raid :)	4	# about 40% faster than the original version (on my fs and raid :)
5		5
6	use strict;	6	=head1 NAME
		7
		8	treescan - scan directory trees, list dirs/files, stat, sync, grep
		9
		10	=head1 SYNOPSIS
		11
		12	treescan [OPTION...] [PATH...]
		13
		14	-q, --quiet do not print list of files/directories
		15	-0, --print0 use null character instead of newline to separate names
		16	-s, --stat call stat on every entry, to get stat data into cache
		17	-d, --dirs only list dirs
		18	-f, --files only list files
		19	-p, --progress regularly print progress to stderr
		20	--sync open/fsync/close every entry
		21	-g, --grep=RE only list files that match the given perl RegEx
		22
		23	=head1 DESCRIPTION
		24
		25	The F<treescan> command scans directories and their contents
		26	recursively. By default it lists all files and directories (with trailing
		27	C</>), but it can optionally do various other things.
		28
		29	If no paths are given, F<treescan> will use C<.>, the current directory.
		30
		31	=head2 OPTIONS
		32
		33	=over 4
		34
		35	=item -q, --quiet
		36
		37	By default, F<treescan> prints the full paths of all directories or files
		38	it finds. This option disables printing of filenames completely. This is
		39	useful if you want to run F<treescan> solely for its side effects, such as
		40	pulling C<stat> data into memory.
		41
		42	=item -0, --print0
		43
		44	Instead of using newlines, use null characters after each filename. This
		45	is useful to avoid quoting problems when piping the result into other
		46	programs (for example, GNU F<grep>, F<xargs> and so on all have options to
		47	deal with this).
		48
		49	=item -s, --stat
		50
		51	Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
		52	which is what makes it so fast. This option forces it to C<stat> every file.
		53
		54	This is only useful for the side effect of pulling the C<stat> data into
		55	the cache. If your disk cache is big enough, it will be filled with
		56	file meta data after F<treescan> is done, which can speed up subsequent
		57	commands considerably. Often, you can run F<treescan> in parallel with
		58	other directory-scanning programs to speed them up.
		59
		60	=item -d, --dirs
		61
		62	Only lists directories, not file paths. This is useful if you quickly want
		63	a list of directories and their subdirectories.
		64
		65	=item -f, --files
		66
		67	Only list files, not directories. This is useful if you want to operate on
		68	all files in a hierarchy, and the directories would ony get in the way.
		69
		70	=item -p, --progress
		71
		72	Regularly print some progress information to standard error. This is
		73	useful to get some progress information on long running tasks. Since
		74	the progress is printed to standard error, you can pipe the output of
		75	F<treescan> into other programs as usual.
		76
		77	=item --sync
		78
		79	The C<--sync> option can be used to make sure all the files/dirs in a tree
		80	are sync'ed to disk. For example this could be useful after unpacking an
		81	archive, to make sure the files hit the disk before deleting the archive
		82	file itself.
		83
		84	=item -g, --grep=RE
		85
		86	This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
		87	and will only print matching paths.
		88
		89	The regular expression uses an C</s> (single line) modifier by default, so
		90	newlines are matched by C<.>.
		91
		92	=back
		93
		94	=head1 AUTHOR
		95
		96	Marc Lehmann <schmorp@schmorp.de>
		97	http://home.schmorp.de/
		98
		99	=cut
		100
		101	use common::sense;
7	use Getopt::Long;	102	use Getopt::Long;
		103	use Time::HiRes ();
8	use IO::AIO;	104	use IO::AIO;
9		105
10	our $VERSION = $IO::AIO::VERSION;	106	our $VERSION = $IO::AIO::VERSION;
11		107
12	Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");	108	Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
13		109
14	my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_nofiles, $opt_grep);	110	my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
		111	$opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
15		112
16	GetOptions	113	GetOptions
17	"quiet\|q" => \$opt_silent,	114	"quiet\|q" => \$opt_silent,
18	"print0\|0" => \$opt_print0,	115	"print0\|0" => \$opt_print0,
19	"stat\|s" => \$opt_stat,	116	"stat\|s" => \$opt_stat,
20	"dirs\|d" => \$opt_nofiles,	117	"dirs\|d" => \$opt_nofiles,
21	"files\|f" => \$opt_nodirs,	118	"files\|f" => \$opt_nodirs,
22	"grep\|g=s" => \$opt_grep,	119	"grep\|g=s" => \$opt_grep,
		120	"progress\|p" => \$opt_progress,
		121	"sync" => \$opt_sync,
		122	"help" => \$opt_help,
23	or die "Usage: try $0 --help";	123	or die "Usage: try $0 --help";
24		124
		125	if ($opt_help) {
		126	require Pod::Usage;
		127
		128	Pod::Usage::pod2usage (
		129	-verbose => 1,
		130	-exitval => 0,
		131	);
		132	}
		133
25	@ARGV = "." unless @ARGV;	134	@ARGV = "." unless @ARGV;
26		135
		136	my @todo; # list of dirs/files still left to scan
		137
27	$opt_grep &&= qr{$opt_grep}s;	138	$opt_grep &&= qr{$opt_grep}s;
		139
		140	my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
		141	my ($n_last, $n_start) = (Time::HiRes::time) x 2;
28		142
29	sub printfn {	143	sub printfn {
30	my ($prefix, $files, $suffix) = @_;	144	my ($prefix, $files, $suffix) = @_;
31		145
32	if ($opt_grep) {	146	if ($opt_grep) {
43	sub scan {	157	sub scan {
44	my ($path) = @_;	158	my ($path) = @_;
45		159
46	$path .= "/";	160	$path .= "/";
47		161
		162	IO::AIO::poll_cb;
		163
		164	if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
		165	$n_last = Time::HiRes::time;
		166	my $d = $n_last - $n_start;
		167	printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
		168	$n_dirs, $n_dirs / $d,
		169	$n_files, $n_files / $d,
		170	$n_stats, $n_stats / $d;
		171	}
		172
48	aioreq_pri -1;	173	aioreq_pri -1;
		174	++$n_dirs;
49	aio_scandir $path, 8, sub {	175	aio_scandir $path, 8, sub {
50	my ($dirs, $files) = @_;	176	my ($dirs, $files) = @_
		177	or return warn "$path: $!\n";
51		178
52	printfn "", [$path] unless $opt_nodirs;	179	printfn "", [$path] unless $opt_nodirs;
53	printfn $path, $files unless $opt_nofiles;	180	printfn $path, $files unless $opt_nofiles;
54		181
		182	$n_files += @$files;
		183
55	if ($opt_stat) {	184	if ($opt_stat) {
		185	aio_wd $path, sub {
		186	my $wd = shift;
		187
56	aio_lstat "$path$_" for @$files;	188	aio_lstat [$wd, $_] for @$files;
		189	$n_stats += @$files;
		190	};
57	}	191	}
58		192
59	&scan ("$path$_") for @$dirs;	193	if ($opt_sync) {
		194	aio_wd $path, sub {
		195	my $wd = shift;
		196
		197	aio_pathsync [$wd, $_] for @$files;
		198	aio_pathsync $wd;
		199	};
		200	}
		201
		202	push @todo, "$path$_"
		203	for sort { $b cmp $a } @$dirs;
60	};	204	};
61	}	205	}
62		206
63	IO::AIO::max_outstanding 64;	207	IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
64	IO::AIO::min_parallel 32;	208	IO::AIO::min_parallel 20;
65		209
66	for my $seed (@ARGV) {	210	@todo = reverse @ARGV;
		211
		212	while () {
		213	if (@todo) {
		214	my $seed = pop @todo;
67	$seed =~ s/\/+$//;	215	$seed =~ s/\/+$//;
68	aio_lstat "$seed/.", sub {	216	aio_lstat "$seed/.", sub {
69	if ($_[0]) {	217	if ($_[0]) {
70	print STDERR "$seed: $!\n";	218	print STDERR "$seed: $!\n";
71	} elsif (-d _) {	219	} elsif (-d _) {
72	scan $seed;	220	scan $seed;
73	} else {	221	} else {
74	printfn "", $seed, "/";	222	printfn "", $seed, "/";
		223	}
75	}	224	};
		225	} else {
		226	IO::AIO::poll_wait;
76	};	227	}
77	}
78		228
79	IO::AIO::flush;	229	last unless IO::AIO::nreqs;
80		230
		231	IO::AIO::poll_cb;
		232	}
		233

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing IO-AIO/bin/treescan (file contents): Revision 1.5 by root, Sun Jan 9 05:10:28 2011 UTC vs. Revision 1.21 by root, Wed Dec 30 07:45:33 2020 UTC

Diff Legend

Comparing IO-AIO/bin/treescan (file contents):
Revision 1.5 by root, Sun Jan 9 05:10:28 2011 UTC vs.
Revision 1.21 by root, Wed Dec 30 07:45:33 2020 UTC