--- IO-AIO/AIO.pm 2011/02/15 03:21:41 1.188 +++ IO-AIO/AIO.pm 2011/07/25 16:50:33 1.207 @@ -170,12 +170,13 @@ use base 'Exporter'; BEGIN { - our $VERSION = '3.72'; + our $VERSION = '4.0'; our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_readdirx - aio_scandir aio_symlink aio_readlink aio_sync aio_fsync - aio_fdatasync aio_sync_file_range aio_pathsync aio_readahead + aio_scandir aio_symlink aio_readlink aio_realpath aio_sync + aio_fsync aio_syncfs aio_fdatasync aio_sync_file_range aio_fallocate + aio_pathsync aio_readahead aio_rename aio_link aio_move aio_copy aio_group aio_nop aio_mknod aio_load aio_rmtree aio_mkdir aio_chown aio_chmod aio_utime aio_truncate @@ -224,6 +225,7 @@ aio_link $srcpath, $dstpath, $callback->($status) aio_symlink $srcpath, $dstpath, $callback->($status) aio_readlink $path, $callback->($link) + aio_realpath $path, $callback->($link) aio_rename $srcpath, $dstpath, $callback->($status) aio_mkdir $pathname, $mode, $callback->($status) aio_rmdir $pathname, $callback->($status) @@ -237,6 +239,7 @@ aio_scandir $path, $maxreq, $callback->($dirs, $nondirs) aio_rmtree $path, $callback->($status) aio_sync $callback->($status) + aio_syncfs $fh, $callback->($status) aio_fsync $fh, $callback->($status) aio_fdatasync $fh, $callback->($status) aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) @@ -370,6 +373,15 @@ } }; +In addition to all the common open modes/flags (C, C, +C, C, C, C and C), the +following POSIX and non-POSIX constants are available (missing ones on +your system are, as usual, C<0>): + +C, C, C, C, C, C, +C, C, C, C, C, +C, C and C. + =item aio_close $fh, $callback->($status) @@ -429,32 +441,40 @@ reading at byte offset C<$in_offset>, and starts writing at the current file offset of C<$out_fh>. Because of that, it is not safe to issue more than one C per C<$out_fh>, as they will interfere with each -other. +other. The same C<$in_fh> works fine though, as this function does not +move or use the file offset of C<$in_fh>. Please note that C can read more bytes from C<$in_fh> than -are written, and there is no way to find out how many bytes have been read -from C alone, as C only provides the number of -bytes written to C<$out_fh>. Only if the result value equals C<$length> -one can assume that C<$length> bytes have been read. +are written, and there is no way to find out how many more bytes have been +read from C alone, as C only provides the +number of bytes written to C<$out_fh>. Only if the result value equals +C<$length> one can assume that C<$length> bytes have been read. Unlike with other C functions, it makes a lot of sense to use C on non-blocking sockets, as long as one end (typically the C<$in_fh>) is a file - the file I/O will then be asynchronous, while -the socket I/O will be non-blocking. Note, however, that you can run into -a trap where C reads some data with readahead, then fails -to write all data, and when the socket is ready the next time, the data -in the cache is already lost, forcing C to again hit the -disk. Explicit C + C let's you control resource usage -much better. - -This call tries to make use of a native C syscall to provide -zero-copy operation. For this to work, C<$out_fh> should refer to a -socket, and C<$in_fh> should refer to an mmap'able file. +the socket I/O will be non-blocking. Note, however, that you can run +into a trap where C reads some data with readahead, then +fails to write all data, and when the socket is ready the next time, the +data in the cache is already lost, forcing C to again hit +the disk. Explicit C + C let's you better control +resource usage. + +This call tries to make use of a native C-like syscall to +provide zero-copy operation. For this to work, C<$out_fh> should refer to +a socket, and C<$in_fh> should refer to an mmap'able file. If a native sendfile cannot be found or it fails with C, -C, C, C, C or C, -it will be emulated, so you can call C on any type of -filehandle regardless of the limitations of the operating system. +C, C, C, C, C or +C, it will be emulated, so you can call C on any +type of filehandle regardless of the limitations of the operating system. + +As native sendfile syscalls (as practically any non-POSIX interface hacked +together in a hurry to improve benchmark numbers) tend to be rather buggy +on many systems, this implementation tries to work around some known bugs +in Linux and FreeBSD kernels (probably others, too), but that might fail, +so you really really should check the return value of C - +fewre bytes than expected might have been transferred. =item aio_readahead $fh,$offset,$length, $callback->($retval) @@ -628,6 +648,16 @@ callback. +=item aio_realpath $path, $callback->($path) + +Asynchronously make the path absolute and resolve any symlinks in +C<$path>. The resulting path only consists of directories (Same as +L). + +This request can be used to get the absolute path of the current working +directory by passing it a path of F<.> (a single dot). + + =item aio_rename $srcpath, $dstpath, $callback->($status) Asynchronously rename the object at C<$srcpath> to C<$dstpath>, just as @@ -659,8 +689,8 @@ =item aio_readdirx $pathname, $flags, $callback->($entries, $flags) -Quite similar to C, but the C<$flags> argument allows to tune -behaviour and output format. In case of an error, C<$entries> will be +Quite similar to C, but the C<$flags> argument allows one to +tune behaviour and output format. In case of an error, C<$entries> will be C. The flags are a combination of the following constants, ORed together (the @@ -670,8 +700,8 @@ =item IO::AIO::READDIR_DENTS -When this flag is off, then the callback gets an arrayref with of names -only (as with C), otherwise it gets an arrayref with +When this flag is off, then the callback gets an arrayref consisting of +names only (as with C), otherwise it gets an arrayref with C<[$name, $type, $inode]> arrayrefs, each describing a single directory entry in more detail. @@ -694,13 +724,13 @@ =item IO::AIO::READDIR_DIRS_FIRST When this flag is set, then the names will be returned in an order where -likely directories come first. This is useful when you need to quickly -find directories, or you want to find all directories while avoiding to -stat() each entry. +likely directories come first, in optimal stat order. This is useful when +you need to quickly find directories, or you want to find all directories +while avoiding to stat() each entry. If the system returns type information in readdir, then this is used -to find directories directly. Otherwise, likely directories are files -beginning with ".", or otherwise files with no dots, of which files with +to find directories directly. Otherwise, likely directories are names +beginning with ".", or otherwise names with no dots, of which names with short names are tried first. =item IO::AIO::READDIR_STAT_ORDER @@ -717,7 +747,7 @@ This flag should not be set when calling C. Instead, it is being set by C, when any of the C<$type>'s found were -C. The absense of this flag therefore indicates that all +C. The absence of this flag therefore indicates that all C<$type>'s are known, which can be used to speed up some algorithms. =back @@ -855,7 +885,7 @@ add $grp aio_copy $src, $dst, sub { $grp->result ($_[0]); - if (!$_[0]) { + unless ($_[0]) { aioreq_pri $pri; add $grp aio_unlink $src; } @@ -909,7 +939,7 @@ entry plus an appended C will be C'ed, likely directories first, in order of their inode numbers. If that succeeds, it assumes that the entry is a directory or a symlink to directory (which will be checked -seperately). This is often faster than stat'ing the entry itself because +separately). This is often faster than stat'ing the entry itself because filesystems might detect the type of the entry without reading the inode data (e.g. ext2fs filetype feature), even on systems that cannot return the filetype information on readdir. @@ -1057,6 +1087,13 @@ If this call isn't available because your OS lacks it or it couldn't be detected, it will be emulated by calling C instead. +=item aio_syncfs $fh, $callback->($status) + +Asynchronously call the syncfs syscall to sync the filesystem associated +to the given filehandle and call the callback with the syncfs result +code. If syncfs is not available, calls sync(), but returns C<-1> and sets +errno to C nevertheless. + =item aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) Sync the data portion of the file specified by C<$offset> and C<$length> @@ -1408,16 +1445,24 @@ =item IO::AIO::poll_cb -Process some outstanding events on the result pipe. You have to call this -regularly. Returns C<0> if all events could be processed, or C<-1> if it -returned earlier for whatever reason. Returns immediately when no events -are outstanding. The amount of events processed depends on the settings of -C and C. +Process some outstanding events on the result pipe. You have to call +this regularly. Returns C<0> if all events could be processed (or there +were no events to process), or C<-1> if it returned earlier for whatever +reason. Returns immediately when no events are outstanding. The amount of +events processed depends on the settings of C and +C. If not all requests were processed for whatever reason, the filehandle will still be ready when C returns, so normally you don't have to do anything special to have it called later. +Apart from calling C when the event filehandle becomes +ready, it can be beneficial to call this function from loops which submit +a lot of requests, to make sure the results get processed when they become +available and not just when the loop is finished and the event loop takes +over again. This function returns very fast when there are no outstanding +requests. + Example: Install an Event watcher that automatically calls IO::AIO::poll_cb with high priority (more examples can be found in the SYNOPSIS section, at the top of this document): @@ -1549,21 +1594,38 @@ =item IO::AIO::max_outstanding $maxreqs +Sets the maximum number of outstanding requests to C<$nreqs>. If +you do queue up more than this number of requests, the next call to +C (and other functions calling C, such as +C or C) will block until the limit is no +longer exceeded. + +In other words, this setting does not enforce a queue limit, but can be +used to make poll functions block if the limit is exceeded. + This is a very bad function to use in interactive programs because it blocks, and a bad way to reduce concurrency because it is inexact: Better use an C together with a feed callback. -Sets the maximum number of outstanding requests to C<$nreqs>. If you -do queue up more than this number of requests, the next call to the -C (and C and other functions calling C) -function will block until the limit is no longer exceeded. - -The default value is very large, so there is no practical limit on the -number of outstanding requests. - -You can still queue as many requests as you want. Therefore, -C is mainly useful in simple scripts (with low values) or -as a stop gap to shield against fatal memory overflow (with large values). +It's main use is in scripts without an event loop - when you want to stat +a lot of files, you can write somehting like this: + + IO::AIO::max_outstanding 32; + + for my $path (...) { + aio_stat $path , ...; + IO::AIO::poll_cb; + } + + IO::AIO::flush; + +The call to C inside the loop will normally return instantly, but +as soon as more thna C<32> reqeusts are in-flight, it will block until +some requests have been handled. This keeps the loop from pushing a large +number of C requests onto the queue. + +The default value for C is very large, so there is no +practical limit on the number of outstanding requests. =back @@ -1613,7 +1675,7 @@ Simply calls the C function (see its manpage for details). The following advice constants are -avaiable: C, C, +available: C, C, C, C, C, C. @@ -1624,7 +1686,7 @@ Simply calls the C function (see its manpage for details). The following advice constants are -avaiable: C, C, +available: C, C, C, C, C. On systems that do not implement C, this function returns @@ -1634,7 +1696,7 @@ Simply calls the C function on the preferably AIO::mmap'ed $scalar (see its manpage for details). The following protect -constants are avaiable: C, C, +constants are available: C, C, C, C. On systems that do not implement C, this function returns @@ -1749,19 +1811,36 @@ =head2 FORK BEHAVIOUR -This module should do "the right thing" when the process using it forks: +Usage of pthreads in a program changes the semantics of fork +considerably. Specifically, only async-safe functions can be called after +fork. Perl doesn't know about this, so in general, you cannot call fork +with defined behaviour in perl if pthreads are involved. IO::AIO uses +pthreads, so this applies, but many other extensions and (for inexplicable +reasons) perl itself often is linked against pthreads, so this limitation +applies to quite a lot of perls. + +This module no longer tries to fight your OS, or POSIX. That means IO::AIO +only works in the process that loaded it. Forking is fully supported, but +using IO::AIO in the child is not. + +You might get around by not I IO::AIO before (or after) +forking. You could also try to call the L function in the +child: -Before the fork, IO::AIO enters a quiescent state where no requests -can be added in other threads and no results will be processed. After -the fork the parent simply leaves the quiescent state and continues -request/result processing, while the child frees the request/result queue -(so that the requests started before the fork will only be handled in the -parent). Threads will be started on demand until the limit set in the -parent process has been reached again. - -In short: the parent will, after a short pause, continue as if fork had -not been called, while the child will act as if IO::AIO has not been used -yet. +=over 4 + +=item IO::AIO::reinit + +Abandons all current requests and I/O threads and simply reinitialises all +data structures. This is not an operation supported by any standards, but +happens to work on GNU/Linux and some newer BSD systems. + +The only reasonable use for this function is to call it after forking, if +C was used in the parent. Calling it while IO::AIO is active in +the process will result in undefined behaviour. Calling it at any time +will also result in any undefined (by POSIX) behaviour. + +=back =head2 MEMORY USAGE