--- IO-AIO/AIO.pm 2006/11/08 01:57:42 1.92 +++ IO-AIO/AIO.pm 2008/10/22 18:15:36 1.142 @@ -7,7 +7,8 @@ use IO::AIO; aio_open "/etc/passwd", O_RDONLY, 0, sub { - my ($fh) = @_; + my $fh = shift + or die "/etc/passwd: $!"; ... }; @@ -27,9 +28,11 @@ my $grp = aio_group sub { print "all stats done\n" }; add $grp aio_stat "..." for ...; - # AnyEvent integration - open my $fh, "<&=" . IO::AIO::poll_fileno or die "$!"; - my $w = AnyEvent->io (fh => $fh, poll => 'r', cb => sub { IO::AIO::poll_cb }); + # AnyEvent integration (EV, Event, Glib, Tk, POE, urxvt, pureperl...) + use AnyEvent::AIO; + + # EV integration + my $w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; # Event integration Event->io (fd => IO::AIO::poll_fileno, @@ -63,12 +66,11 @@ on a RAID volume or over NFS when you do a number of stat operations concurrently. -While most of this works on all types of file descriptors (for example -sockets), using these functions on file descriptors that support -nonblocking operation (again, sockets, pipes etc.) is very inefficient or -might not work (aio_read fails on sockets/pipes/fifos). Use an event loop -for that (such as the L module): IO::AIO will naturally fit -into such an event loop itself. +While most of this works on all types of file descriptors (for +example sockets), using these functions on file descriptors that +support nonblocking operation (again, sockets, pipes etc.) is very +inefficient. Use an event loop for that (such as the L +module): IO::AIO will naturally fit into such an event loop itself. In this version, a number of threads are started that execute your requests and signal their completion. You don't need thread support @@ -80,10 +82,10 @@ aio_write, so the remaining functionality would have to be implemented using threads anyway. -Although the module will work with in the presence of other (Perl-) -threads, it is currently not reentrant in any way, so use appropriate -locking yourself, always call C from within the same thread, or -never call C (or other C functions) recursively. +Although the module will work in the presence of other (Perl-) threads, +it is currently not reentrant in any way, so use appropriate locking +yourself, always call C from within the same thread, or never +call C (or other C functions) recursively. =head2 EXAMPLE @@ -101,7 +103,7 @@ # queue the request to open /etc/passwd aio_open "/etc/passwd", O_RDONLY, 0, sub { - my $fh = $_[0] + my $fh = shift or die "error while opening: $!"; # stat'ing filehandles is generally non-blocking @@ -185,18 +187,24 @@ package IO::AIO; +use Carp (); + no warnings; use strict 'vars'; use base 'Exporter'; BEGIN { - our $VERSION = '2.2'; + our $VERSION = '3.16'; + + our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close + aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir + aio_scandir aio_symlink aio_readlink aio_sync aio_fsync + aio_fdatasync aio_sync_file_range aio_pathsync aio_readahead + aio_rename aio_link aio_move aio_copy aio_group + aio_nop aio_mknod aio_load aio_rmtree aio_mkdir aio_chown + aio_chmod aio_utime aio_truncate); - our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat - aio_lstat aio_unlink aio_rmdir aio_readdir aio_scandir aio_symlink - aio_readlink aio_fsync aio_fdatasync aio_readahead aio_rename aio_link - aio_move aio_copy aio_group aio_nop aio_mknod); our @EXPORT = (@AIO_REQ, qw(aioreq_pri aioreq_nice)); our @EXPORT_OK = qw(poll_fileno poll_cb poll_wait flush min_parallel max_parallel max_idle @@ -218,7 +226,7 @@ and they all accept an additional (and optional) C<$callback> argument which must be a code reference. This code reference will get called with the syscall return code (e.g. most syscalls return C<-1> on error, unlike -perl, which usually delivers "false") as it's sole argument when the given +perl, which usually delivers "false") as its sole argument after the given syscall has been executed asynchronously. All functions expecting a filehandle keep a copy of the filehandle @@ -242,7 +250,7 @@ use something else to ensure your scalar has the correct contents. This works, btw. independent of the internal UTF-8 bit, which IO::AIO -handles correctly wether it is set or not. +handles correctly whether it is set or not. =over 4 @@ -272,11 +280,13 @@ }; }; + =item aioreq_nice $pri_adjust Similar to C, but subtracts the given value from the current priority, so the effect is cumulative. + =item aio_open $pathname, $flags, $mode, $callback->($fh) Asynchronously open or create a file and call the callback with a newly @@ -291,7 +301,9 @@ Likewise, C<$mode> specifies the mode of the newly created file, if it didn't exist and C has been given, just like perl's C, except that it is mandatory (i.e. use C<0> if you don't create new files, -and C<0666> or C<0777> if you do). +and C<0666> or C<0777> if you do). Note that the C<$mode> will be modified +by the umask in effect then the request is being executed, so better never +change the umask. Example: @@ -304,29 +316,45 @@ } }; + =item aio_close $fh, $callback->($status) Asynchronously close a file and call the callback with the result -code. I although accepted, you should not pass in a perl -filehandle here, as perl will likely close the file descriptor another -time when the filehandle is destroyed. Normally, you can safely call perls -C or just let filehandles go out of scope. +code. + +Unfortunately, you can't do this to perl. Perl I very strongly on +closing the file descriptor associated with the filehandle itself. -This is supposed to be a bug in the API, so that might change. It's -therefore best to avoid this function. +Therefore, C will not close the filehandle - instead it will +use dup2 to overwrite the file descriptor with the write-end of a pipe +(the pipe fd will be created on demand and will be cached). + +Or in other words: the file descriptor will be closed, but it will not be +free for reuse until the perl filehandle is closed. + +=cut =item aio_read $fh,$offset,$length, $data,$dataoffset, $callback->($retval) =item aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval) -Reads or writes C bytes from the specified C and C -into the scalar given by C and offset C and calls the +Reads or writes C<$length> bytes from the specified C<$fh> and C<$offset> +into the scalar given by C<$data> and offset C<$dataoffset> and calls the callback without the actual number of bytes read (or -1 on error, just like the syscall). +If C<$offset> is undefined, then the current file descriptor offset will +be used (and updated), otherwise the file descriptor offset will not be +changed by these calls. + +If C<$length> is undefined in C, use the remaining length of C<$data>. + +If C<$dataoffset> is less than zero, it will be counted from the end of +C<$data>. + The C<$data> scalar I be modified in any way while the request -is outstanding. Modifying it can result in segfaults or WW3 (if the -necessary/optional hardware is installed). +is outstanding. Modifying it can result in segfaults or World War III (if +the necessary/optional hardware is installed). Example: Read 15 bytes at offset 7 into scalar C<$buffer>, starting at offset C<0> within the scalar: @@ -336,6 +364,7 @@ print "read $_[0] bytes: <$buffer>\n"; }; + =item aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback->($retval) Tries to copy C<$length> bytes from C<$in_fh> to C<$out_fh>. It starts @@ -359,6 +388,7 @@ value equals C<$length> one can assume that C<$length> bytes have been read. + =item aio_readahead $fh,$offset,$length, $callback->($retval) C populates the page cache with data from a file so that @@ -373,6 +403,7 @@ If that syscall doesn't exist (likely if your OS isn't Linux) it will be emulated by simply reading the data, which would have a similar effect. + =item aio_stat $fh_or_path, $callback->($status) =item aio_lstat $fh, $callback->($status) @@ -395,11 +426,54 @@ print "size is ", -s _, "\n"; }; + +=item aio_utime $fh_or_path, $atime, $mtime, $callback->($status) + +Works like perl's C function (including the special case of $atime +and $mtime being undef). Fractional times are supported if the underlying +syscalls support them. + +When called with a pathname, uses utimes(2) if available, otherwise +utime(2). If called on a file descriptor, uses futimes(2) if available, +otherwise returns ENOSYS, so this is not portable. + +Examples: + + # set atime and mtime to current time (basically touch(1)): + aio_utime "path", undef, undef; + # set atime to current time and mtime to beginning of the epoch: + aio_utime "path", time, undef; # undef==0 + + +=item aio_chown $fh_or_path, $uid, $gid, $callback->($status) + +Works like perl's C function, except that C for either $uid +or $gid is being interpreted as "do not change" (but -1 can also be used). + +Examples: + + # same as "chown root path" in the shell: + aio_chown "path", 0, -1; + # same as above: + aio_chown "path", 0, undef; + + +=item aio_truncate $fh_or_path, $offset, $callback->($status) + +Works like truncate(2) or ftruncate(2). + + +=item aio_chmod $fh_or_path, $mode, $callback->($status) + +Works like perl's C function. + + =item aio_unlink $pathname, $callback->($status) Asynchronously unlink (delete) a file and call the callback with the result code. + =item aio_mknod $path, $mode, $dev, $callback->($status) [EXPERIMENTAL] @@ -410,32 +484,45 @@ aio_mknod $path, IO::AIO::S_IFIFO | $mode, 0, sub { ... + =item aio_link $srcpath, $dstpath, $callback->($status) Asynchronously create a new link to the existing object at C<$srcpath> at the path C<$dstpath> and call the callback with the result code. + =item aio_symlink $srcpath, $dstpath, $callback->($status) Asynchronously create a new symbolic link to the existing object at C<$srcpath> at the path C<$dstpath> and call the callback with the result code. + =item aio_readlink $path, $callback->($link) Asynchronously read the symlink specified by C<$path> and pass it to the callback. If an error occurs, nothing or undef gets passed to the callback. + =item aio_rename $srcpath, $dstpath, $callback->($status) Asynchronously rename the object at C<$srcpath> to C<$dstpath>, just as rename(2) and call the callback with the result code. + +=item aio_mkdir $pathname, $mode, $callback->($status) + +Asynchronously mkdir (create) a directory and call the callback with +the result code. C<$mode> will be modified by the umask at the time the +request is executed, so do not change your umask. + + =item aio_rmdir $pathname, $callback->($status) Asynchronously rmdir (delete) a directory and call the callback with the result code. + =item aio_readdir $pathname, $callback->($entries) Unlike the POSIX call of the same name, C reads an entire @@ -445,13 +532,42 @@ The callback a single argument which is either C or an array-ref with the filenames. + +=item aio_load $path, $data, $callback->($status) + +This is a composite request that tries to fully load the given file into +memory. Status is the same as with aio_read. + +=cut + +sub aio_load($$;$) { + my ($path, undef, $cb) = @_; + my $data = \$_[1]; + + my $pri = aioreq_pri; + my $grp = aio_group $cb; + + aioreq_pri $pri; + add $grp aio_open $path, O_RDONLY, 0, sub { + my $fh = shift + or return $grp->result (-1); + + aioreq_pri $pri; + add $grp aio_read $fh, 0, (-s $fh), $$data, 0, sub { + $grp->result ($_[0]); + }; + }; + + $grp +} + =item aio_copy $srcpath, $dstpath, $callback->($status) Try to copy the I (directories not supported as either source or destination) from C<$srcpath> to C<$dstpath> and call the callback with the C<0> (error) or C<-1> ok. -This is a composite request that it creates the destination file with +This is a composite request that creates the destination file with mode 0200 and copies the contents of the source file into it using C, followed by restoring atime, mtime, access mode and uid/gid, in that order. @@ -486,7 +602,9 @@ utime $stat[8], $stat[9], $dst; chmod $stat[2] & 07777, $dst_fh; chown $stat[4], $stat[5], $dst_fh; - close $dst_fh; + + aioreq_pri $pri; + add $grp aio_close $dst_fh; } else { $grp->result (-1); close $src_fh; @@ -515,9 +633,9 @@ destination) from C<$srcpath> to C<$dstpath> and call the callback with the C<0> (error) or C<-1> ok. -This is a composite request that tries to rename(2) the file first. If -rename files with C, it copies the file with C and, if -that is successful, unlinking the C<$srcpath>. +This is a composite request that tries to rename(2) the file first; if +rename fails with C, it copies the file with C and, if +that is successful, unlinks the C<$srcpath>. =cut @@ -601,7 +719,7 @@ =cut -sub aio_scandir($$$) { +sub aio_scandir($$;$) { my ($path, $maxreq, $cb) = @_; my $pri = aioreq_pri; @@ -687,6 +805,45 @@ $grp } +=item aio_rmtree $path, $callback->($status) + +Delete a directory tree starting (and including) C<$path>, return the +status of the final C only. This is a composite request that +uses C to recurse into and rmdir directories, and unlink +everything else. + +=cut + +sub aio_rmtree; +sub aio_rmtree($;$) { + my ($path, $cb) = @_; + + my $pri = aioreq_pri; + my $grp = aio_group $cb; + + aioreq_pri $pri; + add $grp aio_scandir $path, 0, sub { + my ($dirs, $nondirs) = @_; + + my $dirgrp = aio_group sub { + add $grp aio_rmdir $path, sub { + $grp->result ($_[0]); + }; + }; + + (aioreq_pri $pri), add $dirgrp aio_rmtree "$path/$_" for @$dirs; + (aioreq_pri $pri), add $dirgrp aio_unlink "$path/$_" for @$nondirs; + + add $grp $dirgrp; + }; + + $grp +} + +=item aio_sync $callback->($status) + +Asynchronously call sync and call the callback when finished. + =item aio_fsync $fh, $callback->($status) Asynchronously call fsync on the given filehandle and call the callback @@ -700,6 +857,56 @@ If this call isn't available because your OS lacks it or it couldn't be detected, it will be emulated by calling C instead. +=item aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) + +Sync the data portion of the file specified by C<$offset> and C<$length> +to disk (but NOT the metadata), by calling the Linux-specific +sync_file_range call. If sync_file_range is not available or it returns +ENOSYS, then fdatasync or fsync is being substituted. + +C<$flags> can be a combination of C, +C and +C: refer to the sync_file_range +manpage for details. + +=item aio_pathsync $path, $callback->($status) + +This request tries to open, fsync and close the given path. This is a +composite request intended to sync directories after directory operations +(E.g. rename). This might not work on all operating systems or have any +specific effect, but usually it makes sure that directory changes get +written to disc. It works for anything that can be opened for read-only, +not just directories. + +Passes C<0> when everything went ok, and C<-1> on error. + +=cut + +sub aio_pathsync($;$) { + my ($path, $cb) = @_; + + my $pri = aioreq_pri; + my $grp = aio_group $cb; + + aioreq_pri $pri; + add $grp aio_open $path, O_RDONLY, 0, sub { + my ($fh) = @_; + if ($fh) { + aioreq_pri $pri; + add $grp aio_fsync $fh, sub { + $grp->result ($_[0]); + + aioreq_pri $pri; + add $grp aio_close $fh; + }; + } else { + $grp->result (-1); + } + }; + + $grp +} + =item aio_group $callback->(...) This is a very special aio request: Instead of doing something, it is a @@ -820,10 +1027,11 @@ C state, they will also finish. Otherwise they will continue to exist. -That means after creating a group you have some time to add requests. And -in the callbacks of those requests, you can add further requests to the -group. And only when all those requests have finished will the the group -itself finish. +That means after creating a group you have some time to add requests +(precisely before the callback has been invoked, which is only done within +the C). And in the callbacks of those requests, you can add +further requests to the group. And only when all those requests have +finished will the the group itself finish. =over 4 @@ -845,7 +1053,7 @@ =item $grp->result (...) Set the result value(s) that will be passed to the group callback when all -subrequests have finished and set thre groups errno to the current value +subrequests have finished and set the groups errno to the current value of errno (just like calling C without an error number). By default, no argument will be passed and errno is zero. @@ -866,9 +1074,9 @@ Sets a feeder/generator on this group: every group can have an attached generator that generates requests if idle. The idea behind this is that, although you could just queue as many requests as you want in a group, -this might starve other requests for a potentially long time. For -example, C might generate hundreds of thousands C -requests, delaying any later requests for a long time. +this might starve other requests for a potentially long time. For example, +C might generate hundreds of thousands C requests, +delaying any later requests for a long time. To avoid this, and allow incremental generation of requests, you can instead a group and set a feeder on it that generates those requests. The @@ -882,7 +1090,8 @@ If the feed does not queue more requests when called, it will be automatically removed from the group. -If the feed limit is C<0>, it will be set to C<2> automatically. +If the feed limit is C<0> when this method is called, it will be set to +C<2> automatically. Example: @@ -904,6 +1113,9 @@ Setting the limit to C<0> will pause the feeding process. +The default value for the limit is C<0>, but note that setting a feeder +automatically bumps it up to C<2>. + =back =head2 SUPPORT FUNCTIONS @@ -924,12 +1136,14 @@ =item IO::AIO::poll_cb Process some outstanding events on the result pipe. You have to call this -regularly. Returns the number of events processed. Returns immediately -when no events are outstanding. The amount of events processed depends on -the settings of C and C. +regularly. Returns C<0> if all events could be processed, or C<-1> if it +returned earlier for whatever reason. Returns immediately when no events +are outstanding. The amount of events processed depends on the settings of +C and C. If not all requests were processed for whatever reason, the filehandle -will still be ready when C returns. +will still be ready when C returns, so normally you don't have to +do anything special to have it called later. Example: Install an Event watcher that automatically calls IO::AIO::poll_cb with high priority: @@ -973,9 +1187,10 @@ =item IO::AIO::poll_wait -If there are any outstanding requests, wait till the result filehandle -becomes ready for reading (simply does a C on the filehandle. This is useful if you want to +synchronously wait for some requests to finish). See C for an example. @@ -987,7 +1202,6 @@ equivalent to: IO::AIO::poll_wait, IO::AIO::poll_cb - if IO::AIO::nreqs; =item IO::AIO::flush @@ -998,8 +1212,12 @@ IO::AIO::poll_wait, IO::AIO::poll_cb while IO::AIO::nreqs; +=back + =head3 CONTROLLING THE NUMBER OF THREADS +=over + =item IO::AIO::min_parallel $nthreads Set the minimum number of AIO threads to C<$nthreads>. The current @@ -1049,14 +1267,14 @@ creation is fast. If thread creation is very slow on your system you might want to use larger values. -=item $oldmaxreqs = IO::AIO::max_outstanding $maxreqs +=item IO::AIO::max_outstanding $maxreqs This is a very bad function to use in interactive programs because it blocks, and a bad way to reduce concurrency because it is inexact: Better use an C together with a feed callback. Sets the maximum number of outstanding requests to C<$nreqs>. If you -to queue up more than this number of requests, the next call to the +do queue up more than this number of requests, the next call to the C (and C and other functions calling C) function will block until the limit is no longer exceeded. @@ -1064,11 +1282,15 @@ number of outstanding requests. You can still queue as many requests as you want. Therefore, -C is mainly useful in simple scripts (with low values) or +C is mainly useful in simple scripts (with low values) or as a stop gap to shield against fatal memory overflow (with large values). +=back + =head3 STATISTICAL INFORMATION +=over + =item IO::AIO::nreqs Returns the number of requests currently in the ready, execute or pending @@ -1093,28 +1315,9 @@ =cut -# support function to convert a fd into a perl filehandle -sub _fd2fh { - return undef if $_[0] < 0; - - # try to generate nice filehandles - my $sym = "IO::AIO::fd#$_[0]"; - local *$sym; - - open *$sym, "+<&=$_[0]" # usually works under any unix - or open *$sym, "<&=$_[0]" # cygwin needs this - or open *$sym, ">&=$_[0]" # or this - or return undef; - - *$sym -} - min_parallel 8; -END { - min_parallel 1; - flush; -}; +END { flush } 1; @@ -1144,7 +1347,7 @@ scalars and other data passed into aio requests will also be locked and will consume memory till the request has entered the done state. -This is now awfully much, so queuing lots of requests is not usually a +This is not awfully much, so queuing lots of requests is not usually a problem. Per-thread usage: @@ -1159,7 +1362,8 @@ =head1 SEE ALSO -L. +L for easy integration into event loops, L for a +more natural syntax. =head1 AUTHOR