--- IO-AIO/AIO.pm 2005/08/28 11:05:50 1.39 +++ IO-AIO/AIO.pm 2005/08/30 15:45:10 1.40 @@ -68,7 +68,7 @@ $VERSION = 1.6; @EXPORT = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat - aio_lstat aio_unlink aio_rmdir aio_readdir aio_symlink + aio_lstat aio_unlink aio_rmdir aio_readdir aio_scandir aio_symlink aio_fsync aio_fdatasync aio_readahead); @EXPORT_OK = qw(poll_fileno poll_cb min_parallel max_parallel max_outstanding nreqs); @@ -107,7 +107,7 @@ =over 4 -=item aio_open $pathname, $flags, $mode, $callback +=item aio_open $pathname, $flags, $mode, $callback->($fh) Asynchronously open or create a file and call the callback with a newly created filehandle for the file. @@ -134,7 +134,7 @@ } }; -=item aio_close $fh, $callback +=item aio_close $fh, $callback->($status) Asynchronously close a file and call the callback with the result code. I although accepted, you should not pass in a perl @@ -145,9 +145,9 @@ This is supposed to be a bug in the API, so that might change. It's therefore best to avoid this function. -=item aio_read $fh,$offset,$length, $data,$dataoffset,$callback +=item aio_read $fh,$offset,$length, $data,$dataoffset, $callback->($retval) -=item aio_write $fh,$offset,$length, $data,$dataoffset,$callback +=item aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval) Reads or writes C bytes from the specified C and C into the scalar given by C and offset C and calls the @@ -166,7 +166,7 @@ print "read $_[0] bytes: <$buffer>\n"; }; -=item aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback +=item aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback->($retval) Tries to copy C<$length> bytes from C<$in_fh> to C<$out_fh>. It starts reading at byte offset C<$in_offset>, and starts writing at the current @@ -189,7 +189,7 @@ value equals C<$length> one can assume that C<$length> bytes have been read. -=item aio_readahead $fh,$offset,$length, $callback +=item aio_readahead $fh,$offset,$length, $callback->($retval) C populates the page cache with data from a file so that subsequent reads from that file will not block on disk I/O. The C<$offset> @@ -203,9 +203,9 @@ If that syscall doesn't exist (likely if your OS isn't Linux) it will be emulated by simply reading the data, which would have a similar effect. -=item aio_stat $fh_or_path, $callback +=item aio_stat $fh_or_path, $callback->($status) -=item aio_lstat $fh, $callback +=item aio_lstat $fh, $callback->($status) Works like perl's C or C in void context. The callback will be called after the stat and the results will be available using C @@ -225,17 +225,17 @@ print "size is ", -s _, "\n"; }; -=item aio_unlink $pathname, $callback +=item aio_unlink $pathname, $callback->($status) Asynchronously unlink (delete) a file and call the callback with the result code. -=item aio_rmdir $pathname, $callback +=item aio_rmdir $pathname, $callback->($status) Asynchronously rmdir (delete) a directory and call the callback with the result code. -=item aio_readdir $pathname $callback +=item aio_readdir $pathname $callback->($entries) Unlike the POSIX call of the same name, C reads an entire directory (i.e. opendir + readdir + closedir). The entries will not be @@ -244,12 +244,149 @@ The callback a single argument which is either C or an array-ref with the filenames. -=item aio_fsync $fh, $callback +=item aio_scandir $path, $maxreq, $callback->($dirs, $nondirs) + +Scans a directory (similar to C) and tries to separate the +entries of directory C<$path> into two sets of names, ones you can recurse +into (directories), and ones you cannot recurse into (everything else). + +C is a composite request that consists of many +aio-primitives. C<$maxreq> specifies the maximum number of outstanding +aio requests that this function generates. If it is C<< <= 0 >>, then a +suitable default will be chosen (currently 8). + +On error, the callback is called without arguments, otherwise it receives +two array-refs with path-relative entry names. + +Example: + + aio_scandir $dir, 0, sub { + my ($dirs, $nondirs) = @_; + print "real directories: @$dirs\n"; + print "everything else: @$nondirs\n"; + }; + +Implementation notes. + +The C cannot be avoided, but C'ing every entry can. + +After reading the directory, the modification time, size etc. of the +directory before and after the readdir is checked, and if they match, the +link count will be used to decide how many entries are directories (if +>= 2). Otherwise, no knowledge of the number of subdirectories will be +assumed. + +Then entires will be sorted into likely directories (everything without a +non-initial dot) and likely non-directories (everything else). Then every +entry + C will be C'ed, likely directories first. This is often +faster because filesystems might detect the type of the entry without +reading the inode data (e.g. ext2s filetype feature). If that succeeds, +it assumes that the entry is a directory or a symlink to directory (which +will be checked seperately). + +If the known number of directories has been reached, the rest of the +entries is assumed to be non-directories. + +=cut + +sub aio_scandir($$$) { + my ($path, $maxreq, $cb) = @_; + + $maxreq = 8 if $maxreq <= 0; + + # stat once + aio_stat $path, sub { + $cb->() if $_[0]; + my $hash1 = join ":", (stat _)[0,1,3,7,9]; + + # read the directory entries + aio_readdir $path, sub { + my $entries = shift + or return $cb->(); + + # stat the dir another time + aio_stat $path, sub { + my $hash2 = join ":", (stat _)[0,1,3,7,9]; + + my $ndirs; + + # take the slow route if anything looks fishy + if ($hash1 ne $hash2) { + $ndirs = -1; + } else { + # if nlink == 2, we are finished + # on non-posix-fs's, we rely on nlink < 2 + $ndirs = (stat _)[3] - 2 + or $cb->([], $entries); + } + + # sort into likely dirs and likely nondirs + # dirs == files without ".", short entries first + $entries = [map $_->[0], + sort { $b->[1] cmp $a->[1] } + map [$_, sprintf "%s%04d", (/.\./ ? "1" : "0"), length], + @$entries]; + + my (@dirs, @nondirs); + + my ($statcb, $schedcb); + my $nreq = 0; + + $schedcb = sub { + if (@$entries) { + if ($nreq < $maxreq) { + my $ent = pop @$entries; + $nreq++; + aio_stat "$path/$ent/.", sub { $statcb->($_[0], $ent) }; + } + } elsif (!$nreq) { + # finished + undef $statcb; + undef $schedcb; + $cb->(\@dirs, \@nondirs); + undef $cb; + } + }; + $statcb = sub { + my ($status, $entry) = @_; + + if ($status < 0) { + $nreq--; + push @nondirs, $entry; + &$schedcb; + } else { + # need to check for real directory + aio_lstat "$path/$entry", sub { + $nreq--; + + if (-d _) { + push @dirs, $entry; + + if (!--$ndirs) { + push @nondirs, @$entries; + $entries = []; + } + } else { + push @nondirs, $entry; + } + + &$schedcb; + } + } + }; + + &$schedcb while @$entries && $nreq < $maxreq; + }; + }; + }; +} + +=item aio_fsync $fh, $callback->($status) Asynchronously call fsync on the given filehandle and call the callback with the fsync result code. -=item aio_fdatasync $fh, $callback +=item aio_fdatasync $fh, $callback->($status) Asynchronously call fdatasync on the given filehandle and call the callback with the fdatasync result code.