[ViewVC] Diff of: cvs/IO-AIO/bin/treescan

Comparing IO-AIO/bin/treescan (file contents):
Revision 1.11 by root, Tue Oct 4 18:22:43 2011 UTC vs.
Revision 1.18 by root, Sat Dec 3 16:33:45 2016 UTC

…		…
1	#!/opt/bin/perl	1	#!/opt/bin/perl
2		2
3	# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>	3	# inspired by treescan by Jamie Lokier <jamie@imbolc.ucc.ie>
4	# about 40% faster than the original version (on my fs and raid :)	4	# about 40% faster than the original version (on my fs and raid :)
5		5
6	use strict;	6	=head1 NAME
		7
		8	treescan - scan directory trees, list dirs/files, stat, sync, grep
		9
		10	=head1 SYNOPSIS
		11
		12	treescan [OPTION...] [PATH...]
		13
		14	-q, --quiet do not print list of files/directories
		15	-0, --print0 use null character instead of newline to separate names
		16	-s, --stat call stat on every entry, to get stat data into cache
		17	-d, --dirs only list dirs
		18	-f, --files only list files
		19	-p, --progress regularly print progress to stderr
		20	--sync open/fsync/close every entry
		21	-g, --grep=RE only list files that match the gibven perl RegEx
		22
		23	=head1 DESCRIPTION
		24
		25	The F<treescan> command scans directories and their contents
		26	recursively. By default it lists all files and directories (with trailing
		27	C</>), but it can optionally do various other things.
		28
		29	If no paths are given, F<treescan> will use C<.>, the current directory.
		30
		31	=head2 OPTIONS
		32
		33	=over 4
		34
		35	=item -q, --quiet
		36
		37	By default, F<treescan> prints the full paths of all directories or files
		38	it finds. This option disables printing of filenames completely. This is
		39	useful if you want to run F<treescan> solely for its side effects, such as
		40	pulling C<stat> data into memory.
		41
		42	=item -0, --print0
		43
		44	Instead of using newlines, use null characters after each filename. This
		45	is useful to avoid quoting problems when piping the result into other
		46	programs (for example, GNU F<grep>, F<xargs> and so on all have options to
		47	deal with this).
		48
		49	=item -s, --stat
		50
		51	Normally, F<treescan> will use heuristics to avoid most C<stat> calls,
		52	which is what makes it so fast. This option forces it to C<stat> every file.
		53
		54	This is only useful for the side effect of pulling the C<stat> data into
		55	the cache. If your disk cache is big enough, it will be filled with
		56	file meta data after F<treescan> is done, which can speed up subsequent
		57	commands considerably. Often, you can run F<treescan> in parallel with
		58	other directory-scanning programs to speed them up.
		59
		60	=item -d, --dirs
		61
		62	Only lists directories, not file paths. This is useful if you quickly want
		63	a list of directories and their subdirectories.
		64
		65	=item -f, --files
		66
		67	Only list files, not directories. This is useful if you want to operate on
		68	all files in a hierarchy, and the directories would ony get in the way.
		69
		70	=item -p, --progress
		71
		72	Regularly print some progress information to standard error. This is
		73	useful to get some progress information on long running tasks. Since
		74	the progress is printed to standard error, you can pipe the output of
		75	F<treescan> into other programs as usual.
		76
		77	=item --sync
		78
		79	The C<--sync> option can be used to make sure all the files/dirs in a tree
		80	are sync'ed to disk. For example this could be useful after unpacking an
		81	archive, to make sure the files hit the disk before deleting the archive
		82	file itself.
		83
		84	=item -g, --grep=RE
		85
		86	This applies a perl regular expression (see the L<perlre> manpage) to all paths that would normally be printed
		87	and will only print matching paths.
		88
		89	The regular expression uses an C</s> (single line) modifier by default, so
		90	newlines are matched by C<.>.
		91
		92	=back
		93
		94	=head1 AUTHOR
		95
		96	Marc Lehmann <schmorp@schmorp.de>
		97	http://home.schmorp.de/
		98
		99	=cut
		100
		101	use common::sense;
7	use Getopt::Long;	102	use Getopt::Long;
8	use Time::HiRes ();	103	use Time::HiRes ();
9	use IO::AIO;	104	use IO::AIO;
10		105
11	our $VERSION = $IO::AIO::VERSION;	106	our $VERSION = $IO::AIO::VERSION;
12		107
13	Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");	108	Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order", "auto_help", "auto_version");
14		109
15	my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs,	110	my ($opt_silent, $opt_print0, $opt_stat, $opt_nodirs, $opt_help,
16	$opt_nofiles, $opt_grep, $opt_progress);	111	$opt_nofiles, $opt_grep, $opt_progress, $opt_sync);
17		112
18	GetOptions	113	GetOptions
19	"quiet\|q" => \$opt_silent,	114	"quiet\|q" => \$opt_silent,
20	"print0\|0" => \$opt_print0,	115	"print0\|0" => \$opt_print0,
21	"stat\|s" => \$opt_stat,	116	"stat\|s" => \$opt_stat,
22	"dirs\|d" => \$opt_nofiles,	117	"dirs\|d" => \$opt_nofiles,
23	"files\|f" => \$opt_nodirs,	118	"files\|f" => \$opt_nodirs,
24	"grep\|g=s" => \$opt_grep,	119	"grep\|g=s" => \$opt_grep,
25	"progress\|p" => \$opt_progress,	120	"progress\|p" => \$opt_progress,
		121	"sync" => \$opt_sync,
		122	"help" => \$opt_help,
26	or die "Usage: try $0 --help";	123	or die "Usage: try $0 --help";
27		124
		125	if ($opt_help) {
		126	require Pod::Usage;
		127
		128	Pod::Usage::pod2usage (
		129	-verbose => 1,
		130	-exitval => 0,
		131	);
		132	}
		133
28	@ARGV = "." unless @ARGV;	134	@ARGV = "." unless @ARGV;
29		135
30	$opt_grep &&= qr{$opt_grep}s;	136	$opt_grep &&= qr{$opt_grep}s;
31		137
32	my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);	138	my ($n_dirs, $n_files, $n_stats) = (0, 0, 0);
33	my $n_last;
34	my $n_start = Time::HiRes::time;	139	my ($n_last, $n_start) = (Time::HiRes::time) x 2;
35		140
36	sub printfn {	141	sub printfn {
37	my ($prefix, $files, $suffix) = @_;	142	my ($prefix, $files, $suffix) = @_;
38		143
39	if ($opt_grep) {	144	if ($opt_grep) {
54		159
55	IO::AIO::poll_cb;	160	IO::AIO::poll_cb;
56		161
57	if ($opt_progress and $n_last + 1 < Time::HiRes::time) {	162	if ($opt_progress and $n_last + 1 < Time::HiRes::time) {
58	$n_last = Time::HiRes::time;	163	$n_last = Time::HiRes::time;
59	printf STDERR "%d dirs %d files %d stats %g stats/s \r", $n_dirs, $n_files, $n_stats, $n_stats / ($n_last - $n_start)	164	my $d = $n_last - $n_start;
60	if $opt_progress;	165	printf STDERR "\r%d dirs (%g/s) %d files (%g/s) %d stats (%g/s) ",
		166	$n_dirs, $n_dirs / $d,
		167	$n_files, $n_files / $d,
		168	$n_stats, $n_stats / $d;
61	}	169	}
62		170
63	aioreq_pri -1;	171	aioreq_pri -1;
64	++$n_dirs;	172	++$n_dirs;
65	aio_scandir $path, 8, sub {	173	aio_scandir $path, 8, sub {
66	my ($dirs, $files) = @_	174	my ($dirs, $files) = @_
67	or warn "$path: $!\n";	175	or return warn "$path: $!\n";
68		176
69	printfn "", [$path] unless $opt_nodirs;	177	printfn "", [$path] unless $opt_nodirs;
70	printfn $path, $files unless $opt_nofiles;	178	printfn $path, $files unless $opt_nofiles;
71		179
72	$n_files += @$files;	180	$n_files += @$files;
…		…
78	aio_lstat [$wd, $_] for @$files;	186	aio_lstat [$wd, $_] for @$files;
79	$n_stats += @$files;	187	$n_stats += @$files;
80	};	188	};
81	}	189	}
82		190
		191	if ($opt_sync) {
		192	aio_wd $path, sub {
		193	my $wd = shift;
		194
		195	aio_pathsync [$wd, $_] for @$files;
		196	aio_pathsync $wd;
		197	};
		198	}
		199
83	&scan ("$path$_") for @$dirs;	200	&scan ("$path$_") for @$dirs;
84	};	201	};
85	}	202	}
86		203
87	IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly	204	IO::AIO::max_outstanding 100; # two fds per directory, so limit accordingly
88	IO::AIO::min_parallel 20;	205	IO::AIO::min_parallel 20;
89		206
90	for my $seed (@ARGV) {	207	for my $seed (@ARGV) {
91	$seed =~ s/\/+$//;	208	$seed =~ s/\/+$//;
92	++$n_stats;
93	aio_lstat "$seed/.", sub {	209	aio_lstat "$seed/.", sub {
94	if ($_[0]) {	210	if ($_[0]) {
95	print STDERR "$seed: $!\n";	211	print STDERR "$seed: $!\n";
96	} elsif (-d _) {	212	} elsif (-d _) {
97	scan $seed;	213	scan $seed;

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing IO-AIO/bin/treescan (file contents): Revision 1.11 by root, Tue Oct 4 18:22:43 2011 UTC vs. Revision 1.18 by root, Sat Dec 3 16:33:45 2016 UTC

Diff Legend

Comparing IO-AIO/bin/treescan (file contents):
Revision 1.11 by root, Tue Oct 4 18:22:43 2011 UTC vs.
Revision 1.18 by root, Sat Dec 3 16:33:45 2016 UTC