--- IO-AIO/AIO.pm 2007/01/07 21:36:58 1.100 +++ IO-AIO/AIO.pm 2007/12/02 20:54:33 1.119 @@ -28,10 +28,13 @@ my $grp = aio_group sub { print "all stats done\n" }; add $grp aio_stat "..." for ...; - # AnyEvent integration + # AnyEvent integration (EV, Event, Glib, Tk, urxvt, pureperl...) open my $fh, "<&=" . IO::AIO::poll_fileno or die "$!"; my $w = AnyEvent->io (fh => $fh, poll => 'r', cb => sub { IO::AIO::poll_cb }); + # EV integration + my $w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; + # Event integration Event->io (fd => IO::AIO::poll_fileno, poll => 'r', @@ -64,12 +67,11 @@ on a RAID volume or over NFS when you do a number of stat operations concurrently. -While most of this works on all types of file descriptors (for example -sockets), using these functions on file descriptors that support -nonblocking operation (again, sockets, pipes etc.) is very inefficient or -might not work (aio_read fails on sockets/pipes/fifos). Use an event loop -for that (such as the L module): IO::AIO will naturally fit -into such an event loop itself. +While most of this works on all types of file descriptors (for +example sockets), using these functions on file descriptors that +support nonblocking operation (again, sockets, pipes etc.) is very +inefficient. Use an event loop for that (such as the L +module): IO::AIO will naturally fit into such an event loop itself. In this version, a number of threads are started that execute your requests and signal their completion. You don't need thread support @@ -81,10 +83,10 @@ aio_write, so the remaining functionality would have to be implemented using threads anyway. -Although the module will work with in the presence of other (Perl-) -threads, it is currently not reentrant in any way, so use appropriate -locking yourself, always call C from within the same thread, or -never call C (or other C functions) recursively. +Although the module will work in the presence of other (Perl-) threads, +it is currently not reentrant in any way, so use appropriate locking +yourself, always call C from within the same thread, or never +call C (or other C functions) recursively. =head2 EXAMPLE @@ -186,18 +188,21 @@ package IO::AIO; +use Carp (); + no warnings; use strict 'vars'; use base 'Exporter'; BEGIN { - our $VERSION = '2.32'; + our $VERSION = '2.6'; our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_scandir aio_symlink - aio_readlink aio_fsync aio_fdatasync aio_readahead aio_rename aio_link - aio_move aio_copy aio_group aio_nop aio_mknod aio_load aio_rmtree); + aio_readlink aio_sync aio_fsync aio_fdatasync aio_readahead aio_rename aio_link + aio_move aio_copy aio_group aio_nop aio_mknod aio_load aio_rmtree aio_mkdir + aio_chown aio_chmod aio_utime aio_truncate); our @EXPORT = (@AIO_REQ, qw(aioreq_pri aioreq_nice aio_block)); our @EXPORT_OK = qw(poll_fileno poll_cb poll_wait flush min_parallel max_parallel max_idle @@ -273,11 +278,13 @@ }; }; + =item aioreq_nice $pri_adjust Similar to C, but subtracts the given value from the current priority, so the effect is cumulative. + =item aio_open $pathname, $flags, $mode, $callback->($fh) Asynchronously open or create a file and call the callback with a newly @@ -292,7 +299,9 @@ Likewise, C<$mode> specifies the mode of the newly created file, if it didn't exist and C has been given, just like perl's C, except that it is mandatory (i.e. use C<0> if you don't create new files, -and C<0666> or C<0777> if you do). +and C<0666> or C<0777> if you do). Note that the C<$mode> will be modified +by the umask in effect then the request is being executed, so better never +change the umask. Example: @@ -305,29 +314,94 @@ } }; + =item aio_close $fh, $callback->($status) Asynchronously close a file and call the callback with the result -code. I although accepted, you should not pass in a perl -filehandle here, as perl will likely close the file descriptor another -time when the filehandle is destroyed. Normally, you can safely call perls -C or just let filehandles go out of scope. +code. + +Unfortunately, you can't do this to perl. Perl I very strongly on +closing the file descriptor associated with the filehandle itself. Here is +what aio_close will try: + + 1. dup()licate the fd + 2. asynchronously close() the duplicated fd + 3. dup()licate the fd once more + 4. let perl close() the filehandle + 5. asynchronously close the duplicated fd + +The idea is that the first close() flushes stuff to disk that closing an +fd will flush, so when perl closes the fd, nothing much will need to be +flushed. The second async. close() will then flush stuff to disk that +closing the last fd to the file will flush. + +Just FYI, SuSv3 has this to say on close: + + All outstanding record locks owned by the process on the file + associated with the file descriptor shall be removed. + + If fildes refers to a socket, close() shall cause the socket to be + destroyed. ... close() shall block for up to the current linger + interval until all data is transmitted. + [this actually sounds like a specification bug, but who knows] + +And at least Linux additionally actually flushes stuff on every close, +even when the file itself is still open. + +Sounds enourmously inefficient and complicated? Yes... please show me how +to nuke perl's fd out of existence... + +=cut + +sub aio_close($;$) { + aio_block { + my ($fh, $cb) = @_; + + my $pri = aioreq_pri; + my $grp = aio_group $cb; + + my $fd = fileno $fh; + + defined $fd or Carp::croak "aio_close called with fd-less filehandle"; + + # if the dups fail we will simply get EBADF + my $fd2 = _dup $fd; + aioreq_pri $pri; + add $grp _aio_close $fd2, sub { + my $fd2 = _dup $fd; + close $fh; + aioreq_pri $pri; + add $grp _aio_close $fd2, sub { + $grp->result ($_[0]); + }; + }; + + $grp + } +} -This is supposed to be a bug in the API, so that might change. It's -therefore best to avoid this function. =item aio_read $fh,$offset,$length, $data,$dataoffset, $callback->($retval) =item aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval) -Reads or writes C bytes from the specified C and C -into the scalar given by C and offset C and calls the +Reads or writes C<$length> bytes from the specified C<$fh> and C<$offset> +into the scalar given by C<$data> and offset C<$dataoffset> and calls the callback without the actual number of bytes read (or -1 on error, just like the syscall). +If C<$offset> is undefined, then the current file descriptor offset will +be used (and updated), otherwise the file descriptor offset will not be +changed by these calls. + +If C<$length> is undefined in C, use the remaining length of C<$data>. + +If C<$dataoffset> is less than zero, it will be counted from the end of +C<$data>. + The C<$data> scalar I be modified in any way while the request -is outstanding. Modifying it can result in segfaults or WW3 (if the -necessary/optional hardware is installed). +is outstanding. Modifying it can result in segfaults or World War III (if +the necessary/optional hardware is installed). Example: Read 15 bytes at offset 7 into scalar C<$buffer>, starting at offset C<0> within the scalar: @@ -337,6 +411,7 @@ print "read $_[0] bytes: <$buffer>\n"; }; + =item aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback->($retval) Tries to copy C<$length> bytes from C<$in_fh> to C<$out_fh>. It starts @@ -360,6 +435,7 @@ value equals C<$length> one can assume that C<$length> bytes have been read. + =item aio_readahead $fh,$offset,$length, $callback->($retval) C populates the page cache with data from a file so that @@ -374,6 +450,7 @@ If that syscall doesn't exist (likely if your OS isn't Linux) it will be emulated by simply reading the data, which would have a similar effect. + =item aio_stat $fh_or_path, $callback->($status) =item aio_lstat $fh, $callback->($status) @@ -396,11 +473,54 @@ print "size is ", -s _, "\n"; }; + +=item aio_utime $fh_or_path, $atime, $mtime, $callback->($status) + +Works like perl's C function (including the special case of $atime +and $mtime being undef). Fractional times are supported if the underlying +syscalls support them. + +When called with a pathname, uses utimes(2) if available, otherwise +utime(2). If called on a file descriptor, uses futimes(2) if available, +otherwise returns ENOSYS, so this is not portable. + +Examples: + + # set atime and mtime to current time (basically touch(1)): + aio_utime "path", undef, undef; + # set atime to current time and mtime to beginning of the epoch: + aio_utime "path", time, undef; # undef==0 + + +=item aio_chown $fh_or_path, $uid, $gid, $callback->($status) + +Works like perl's C function, except that C for either $uid +or $gid is being interpreted as "do not change" (but -1 can also be used). + +Examples: + + # same as "chown root path" in the shell: + aio_chown "path", 0, -1; + # same as above: + aio_chown "path", 0, undef; + + +=item aio_truncate $fh_or_path, $offset, $callback->($status) + +Works like truncate(2) or ftruncate(2). + + +=item aio_chmod $fh_or_path, $mode, $callback->($status) + +Works like perl's C function. + + =item aio_unlink $pathname, $callback->($status) Asynchronously unlink (delete) a file and call the callback with the result code. + =item aio_mknod $path, $mode, $dev, $callback->($status) [EXPERIMENTAL] @@ -411,32 +531,45 @@ aio_mknod $path, IO::AIO::S_IFIFO | $mode, 0, sub { ... + =item aio_link $srcpath, $dstpath, $callback->($status) Asynchronously create a new link to the existing object at C<$srcpath> at the path C<$dstpath> and call the callback with the result code. + =item aio_symlink $srcpath, $dstpath, $callback->($status) Asynchronously create a new symbolic link to the existing object at C<$srcpath> at the path C<$dstpath> and call the callback with the result code. + =item aio_readlink $path, $callback->($link) Asynchronously read the symlink specified by C<$path> and pass it to the callback. If an error occurs, nothing or undef gets passed to the callback. + =item aio_rename $srcpath, $dstpath, $callback->($status) Asynchronously rename the object at C<$srcpath> to C<$dstpath>, just as rename(2) and call the callback with the result code. + +=item aio_mkdir $pathname, $mode, $callback->($status) + +Asynchronously mkdir (create) a directory and call the callback with +the result code. C<$mode> will be modified by the umask at the time the +request is executed, so do not change your umask. + + =item aio_rmdir $pathname, $callback->($status) Asynchronously rmdir (delete) a directory and call the callback with the result code. + =item aio_readdir $pathname, $callback->($entries) Unlike the POSIX call of the same name, C reads an entire @@ -446,6 +579,7 @@ The callback a single argument which is either C or an array-ref with the filenames. + =item aio_load $path, $data, $callback->($status) This is a composite request that tries to fully load the given file into @@ -463,7 +597,7 @@ aioreq_pri $pri; add $grp aio_open $path, O_RDONLY, 0, sub { - my ($fh) = @_ + my $fh = shift or return $grp->result (-1); aioreq_pri $pri; @@ -761,6 +895,10 @@ } } +=item aio_sync $callback->($status) + +Asynchronously call sync and call the callback when finished. + =item aio_fsync $fh, $callback->($status) Asynchronously call fsync on the given filehandle and call the callback @@ -1072,8 +1210,12 @@ IO::AIO::poll_wait, IO::AIO::poll_cb while IO::AIO::nreqs; +=back + =head3 CONTROLLING THE NUMBER OF THREADS +=over + =item IO::AIO::min_parallel $nthreads Set the minimum number of AIO threads to C<$nthreads>. The current @@ -1130,7 +1272,7 @@ use an C together with a feed callback. Sets the maximum number of outstanding requests to C<$nreqs>. If you -to queue up more than this number of requests, the next call to the +do queue up more than this number of requests, the next call to the C (and C and other functions calling C) function will block until the limit is no longer exceeded. @@ -1141,8 +1283,12 @@ C is mainly useful in simple scripts (with low values) or as a stop gap to shield against fatal memory overflow (with large values). +=back + =head3 STATISTICAL INFORMATION +=over + =item IO::AIO::nreqs Returns the number of requests currently in the ready, execute or pending @@ -1167,22 +1313,6 @@ =cut -# support function to convert a fd into a perl filehandle -sub _fd2fh { - return undef if $_[0] < 0; - - # try to generate nice filehandles - my $sym = "IO::AIO::fd#$_[0]"; - local *$sym; - - open *$sym, "+<&=$_[0]" # usually works under any unix - or open *$sym, "<&=$_[0]" # cygwin needs this - or open *$sym, ">&=$_[0]" # or this - or return undef; - - *$sym -} - min_parallel 8; END { flush } @@ -1215,7 +1345,7 @@ scalars and other data passed into aio requests will also be locked and will consume memory till the request has entered the done state. -This is now awfully much, so queuing lots of requests is not usually a +This is not awfully much, so queuing lots of requests is not usually a problem. Per-thread usage: