--- IO-AIO/AIO.pm 2011/03/27 10:26:08 1.189 +++ IO-AIO/AIO.pm 2011/07/24 03:32:51 1.206 @@ -170,12 +170,13 @@ use base 'Exporter'; BEGIN { - our $VERSION = '3.8'; + our $VERSION = '4.0'; our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_readdirx - aio_scandir aio_symlink aio_readlink aio_sync aio_fsync - aio_fdatasync aio_sync_file_range aio_pathsync aio_readahead + aio_scandir aio_symlink aio_readlink aio_realpath aio_sync + aio_fsync aio_syncfs aio_fdatasync aio_sync_file_range aio_fallocate + aio_pathsync aio_readahead aio_rename aio_link aio_move aio_copy aio_group aio_nop aio_mknod aio_load aio_rmtree aio_mkdir aio_chown aio_chmod aio_utime aio_truncate @@ -224,6 +225,7 @@ aio_link $srcpath, $dstpath, $callback->($status) aio_symlink $srcpath, $dstpath, $callback->($status) aio_readlink $path, $callback->($link) + aio_realpath $path, $callback->($link) aio_rename $srcpath, $dstpath, $callback->($status) aio_mkdir $pathname, $mode, $callback->($status) aio_rmdir $pathname, $callback->($status) @@ -237,6 +239,7 @@ aio_scandir $path, $maxreq, $callback->($dirs, $nondirs) aio_rmtree $path, $callback->($status) aio_sync $callback->($status) + aio_syncfs $fh, $callback->($status) aio_fsync $fh, $callback->($status) aio_fdatasync $fh, $callback->($status) aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) @@ -370,6 +373,15 @@ } }; +In addition to all the common open modes/flags (C, C, +C, C, C, C and C), the +following POSIX and non-POSIX constants are available (missing ones on +your system are, as usual, C<0>): + +C, C, C, C, C, C, +C, C, C, C, C, +C, C and C. + =item aio_close $fh, $callback->($status) @@ -429,32 +441,40 @@ reading at byte offset C<$in_offset>, and starts writing at the current file offset of C<$out_fh>. Because of that, it is not safe to issue more than one C per C<$out_fh>, as they will interfere with each -other. +other. The same C<$in_fh> works fine though, as this function does not +move or use the file offset of C<$in_fh>. Please note that C can read more bytes from C<$in_fh> than -are written, and there is no way to find out how many bytes have been read -from C alone, as C only provides the number of -bytes written to C<$out_fh>. Only if the result value equals C<$length> -one can assume that C<$length> bytes have been read. +are written, and there is no way to find out how many more bytes have been +read from C alone, as C only provides the +number of bytes written to C<$out_fh>. Only if the result value equals +C<$length> one can assume that C<$length> bytes have been read. Unlike with other C functions, it makes a lot of sense to use C on non-blocking sockets, as long as one end (typically the C<$in_fh>) is a file - the file I/O will then be asynchronous, while -the socket I/O will be non-blocking. Note, however, that you can run into -a trap where C reads some data with readahead, then fails -to write all data, and when the socket is ready the next time, the data -in the cache is already lost, forcing C to again hit the -disk. Explicit C + C let's you control resource usage -much better. - -This call tries to make use of a native C syscall to provide -zero-copy operation. For this to work, C<$out_fh> should refer to a -socket, and C<$in_fh> should refer to an mmap'able file. +the socket I/O will be non-blocking. Note, however, that you can run +into a trap where C reads some data with readahead, then +fails to write all data, and when the socket is ready the next time, the +data in the cache is already lost, forcing C to again hit +the disk. Explicit C + C let's you better control +resource usage. + +This call tries to make use of a native C-like syscall to +provide zero-copy operation. For this to work, C<$out_fh> should refer to +a socket, and C<$in_fh> should refer to an mmap'able file. If a native sendfile cannot be found or it fails with C, -C, C, C, C or C, -it will be emulated, so you can call C on any type of -filehandle regardless of the limitations of the operating system. +C, C, C, C, C or +C, it will be emulated, so you can call C on any +type of filehandle regardless of the limitations of the operating system. + +As native sendfile syscalls (as practically any non-POSIX interface hacked +together in a hurry to improve benchmark numbers) tend to be rather buggy +on many systems, this implementation tries to work around some known bugs +in Linux and FreeBSD kernels (probably others, too), but that might fail, +so you really really should check the return value of C - +fewre bytes than expected might have been transferred. =item aio_readahead $fh,$offset,$length, $callback->($retval) @@ -628,6 +648,16 @@ callback. +=item aio_realpath $path, $callback->($path) + +Asynchronously make the path absolute and resolve any symlinks in +C<$path>. The resulting path only consists of directories (Same as +L). + +This request can be used to get the absolute path of the current working +directory by passing it a path of F<.> (a single dot). + + =item aio_rename $srcpath, $dstpath, $callback->($status) Asynchronously rename the object at C<$srcpath> to C<$dstpath>, just as @@ -670,8 +700,8 @@ =item IO::AIO::READDIR_DENTS -When this flag is off, then the callback gets an arrayref with of names -only (as with C), otherwise it gets an arrayref with +When this flag is off, then the callback gets an arrayref consisting of +names only (as with C), otherwise it gets an arrayref with C<[$name, $type, $inode]> arrayrefs, each describing a single directory entry in more detail. @@ -694,13 +724,13 @@ =item IO::AIO::READDIR_DIRS_FIRST When this flag is set, then the names will be returned in an order where -likely directories come first. This is useful when you need to quickly -find directories, or you want to find all directories while avoiding to -stat() each entry. +likely directories come first, in optimal stat order. This is useful when +you need to quickly find directories, or you want to find all directories +while avoiding to stat() each entry. If the system returns type information in readdir, then this is used -to find directories directly. Otherwise, likely directories are files -beginning with ".", or otherwise files with no dots, of which files with +to find directories directly. Otherwise, likely directories are names +beginning with ".", or otherwise names with no dots, of which names with short names are tried first. =item IO::AIO::READDIR_STAT_ORDER @@ -855,7 +885,7 @@ add $grp aio_copy $src, $dst, sub { $grp->result ($_[0]); - if (!$_[0]) { + unless ($_[0]) { aioreq_pri $pri; add $grp aio_unlink $src; } @@ -1057,6 +1087,13 @@ If this call isn't available because your OS lacks it or it couldn't be detected, it will be emulated by calling C instead. +=item aio_syncfs $fh, $callback->($status) + +Asynchronously call the syncfs syscall to sync the filesystem associated +to the given filehandle and call the callback with the syncfs result +code. If syncfs is not available, calls sync(), but returns C<-1> and sets +errno to C nevertheless. + =item aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) Sync the data portion of the file specified by C<$offset> and C<$length> @@ -1408,16 +1445,24 @@ =item IO::AIO::poll_cb -Process some outstanding events on the result pipe. You have to call this -regularly. Returns C<0> if all events could be processed, or C<-1> if it -returned earlier for whatever reason. Returns immediately when no events -are outstanding. The amount of events processed depends on the settings of -C and C. +Process some outstanding events on the result pipe. You have to call +this regularly. Returns C<0> if all events could be processed (or there +were no events to process), or C<-1> if it returned earlier for whatever +reason. Returns immediately when no events are outstanding. The amount of +events processed depends on the settings of C and +C. If not all requests were processed for whatever reason, the filehandle will still be ready when C returns, so normally you don't have to do anything special to have it called later. +Apart from calling C when the event filehandle becomes +ready, it can be beneficial to call this function from loops which submit +a lot of requests, to make sure the results get processed when they become +available and not just when the loop is finished and the event loop takes +over again. This function returns very fast when there are no outstanding +requests. + Example: Install an Event watcher that automatically calls IO::AIO::poll_cb with high priority (more examples can be found in the SYNOPSIS section, at the top of this document): @@ -1549,21 +1594,38 @@ =item IO::AIO::max_outstanding $maxreqs +Sets the maximum number of outstanding requests to C<$nreqs>. If +you do queue up more than this number of requests, the next call to +C (and other functions calling C, such as +C or C) will block until the limit is no +longer exceeded. + +In other words, this setting does not enforce a queue limit, but can be +used to make poll functions block if the limit is exceeded. + This is a very bad function to use in interactive programs because it blocks, and a bad way to reduce concurrency because it is inexact: Better use an C together with a feed callback. -Sets the maximum number of outstanding requests to C<$nreqs>. If you -do queue up more than this number of requests, the next call to the -C (and C and other functions calling C) -function will block until the limit is no longer exceeded. - -The default value is very large, so there is no practical limit on the -number of outstanding requests. - -You can still queue as many requests as you want. Therefore, -C is mainly useful in simple scripts (with low values) or -as a stop gap to shield against fatal memory overflow (with large values). +It's main use is in scripts without an event loop - when you want to stat +a lot of files, you can write somehting like this: + + IO::AIO::max_outstanding 32; + + for my $path (...) { + aio_stat $path , ...; + IO::AIO::poll_cb; + } + + IO::AIO::flush; + +The call to C inside the loop will normally return instantly, but +as soon as more thna C<32> reqeusts are in-flight, it will block until +some requests have been handled. This keeps the loop from pushing a large +number of C requests onto the queue. + +The default value for C is very large, so there is no +practical limit on the number of outstanding requests. =back @@ -1749,19 +1811,36 @@ =head2 FORK BEHAVIOUR -This module should do "the right thing" when the process using it forks: +Usage of pthreads in a program changes the semantics of fork +considerably. Specifically, only async-safe functions can be called after +fork. Perl doesn't know about this, so in general, you cannot call fork +with defined behaviour in perl if pthreads are involved. IO::AIO uses +pthreads, so this applies, but many other extensions and (for inexplicable +reasons) perl itself often is linked against pthreads, so this limitation +applies to quite a lot of perls. + +This module no longer tries to fight your OS, or POSIX. That means IO::AIO +only works in the process that loaded it. Forking is fully supported, but +using IO::AIO in the child is not. + +You might get around by not I IO::AIO before (or after) +forking. You could also try to call the L function in the +child: -Before the fork, IO::AIO enters a quiescent state where no requests -can be added in other threads and no results will be processed. After -the fork the parent simply leaves the quiescent state and continues -request/result processing, while the child frees the request/result queue -(so that the requests started before the fork will only be handled in the -parent). Threads will be started on demand until the limit set in the -parent process has been reached again. - -In short: the parent will, after a short pause, continue as if fork had -not been called, while the child will act as if IO::AIO has not been used -yet. +=over 4 + +=item IO::AIO::reinit + +Abondons all current requests and I/O threads and simply reinitialises all +data structures. This is not an operation suppported by any standards, but +happens to work on GNU/Linux and some newer BSD systems. + +The only reasonable use for this function is to call it after forking, if +C was used in the parent. Calling it while IO::AIO is active in +the process will result in undefined behaviour. Calling it at any time +will also result in any undefined (by POSIX) behaviour. + +=back =head2 MEMORY USAGE