--- IO-AIO/AIO.pm 2005/07/12 11:29:40 1.20 +++ IO-AIO/AIO.pm 2005/08/28 11:05:50 1.39 @@ -24,7 +24,7 @@ # Glib/Gtk2 add_watch Glib::IO IO::AIO::poll_fileno, - in => sub { IO::AIO::poll_cb, 1 }; + in => sub { IO::AIO::poll_cb; 1 }; # Tk Tk::Event::IO->fileevent (IO::AIO::poll_fileno, "", @@ -50,22 +50,28 @@ remaining functionality would have to be implemented using threads anyway. Although the module will work with in the presence of other threads, it is -currently not reentrant, so use appropriate locking yourself. +currently not reentrant, so use appropriate locking yourself, always call +C from within the same thread, or never call C (or other +C functions) recursively. =cut package IO::AIO; +no warnings; + use base 'Exporter'; use Fcntl (); BEGIN { - $VERSION = 0.5; + $VERSION = 1.6; - @EXPORT = qw(aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink + @EXPORT = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat + aio_lstat aio_unlink aio_rmdir aio_readdir aio_symlink aio_fsync aio_fdatasync aio_readahead); - @EXPORT_OK = qw(poll_fileno poll_cb min_parallel max_parallel max_outstanding nreqs); + @EXPORT_OK = qw(poll_fileno poll_cb min_parallel max_parallel + max_outstanding nreqs); require XSLoader; XSLoader::load IO::AIO, $VERSION; @@ -83,12 +89,21 @@ perl, which usually delivers "false") as it's sole argument when the given syscall has been executed asynchronously. -All functions that expect a filehandle will also accept a file descriptor. +All functions expecting a filehandle keep a copy of the filehandle +internally until the request has finished. -The filenames you pass to these routines I be absolute. The reason -for this is that at the time the request is being executed, the current -working directory could have changed. Alternatively, you can make sure -that you never change the current working directory. +The pathnames you pass to these routines I be absolute and +encoded in byte form. The reason for the former is that at the time the +request is being executed, the current working directory could have +changed. Alternatively, you can make sure that you never change the +current working directory. + +To encode pathnames to byte form, either make sure you either: a) +always pass in filenames you got from outside (command line, readdir +etc.), b) are ASCII or ISO 8859-1, c) use the Encode module and encode +your pathnames to the locale (or other) encoding in effect in the user +environment, d) use Glib::filename_from_unicode on unicode filenames or e) +use something else. =over 4 @@ -139,6 +154,10 @@ callback without the actual number of bytes read (or -1 on error, just like the syscall). +The C<$data> scalar I be modified in any way while the request +is outstanding. Modifying it can result in segfaults or WW3 (if the +necessary/optional hardware is installed). + Example: Read 15 bytes at offset 7 into scalar C<$buffer>, starting at offset C<0> within the scalar: @@ -147,11 +166,30 @@ print "read $_[0] bytes: <$buffer>\n"; }; -=item aio_readahead $fh,$offset,$length, $callback +=item aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback -Asynchronously reads the specified byte range into the page cache, using -the C syscall. If that syscall doesn't exist (likely if your OS -isn't Linux) the status will be C<-1> and C<$!> is set to C. +Tries to copy C<$length> bytes from C<$in_fh> to C<$out_fh>. It starts +reading at byte offset C<$in_offset>, and starts writing at the current +file offset of C<$out_fh>. Because of that, it is not safe to issue more +than one C per C<$out_fh>, as they will interfere with each +other. + +This call tries to make use of a native C syscall to provide +zero-copy operation. For this to work, C<$out_fh> should refer to a +socket, and C<$in_fh> should refer to mmap'able file. + +If the native sendfile call fails or is not implemented, it will be +emulated, so you can call C on any type of filehandle +regardless of the limitations of the operating system. + +Please note, however, that C can read more bytes from +C<$in_fh> than are written, and there is no way to find out how many +bytes have been read from C alone, as C only +provides the number of bytes written to C<$out_fh>. Only if the result +value equals C<$length> one can assume that C<$length> bytes have been +read. + +=item aio_readahead $fh,$offset,$length, $callback C populates the page cache with data from a file so that subsequent reads from that file will not block on disk I/O. The C<$offset> @@ -162,6 +200,9 @@ (off-set+length). C does not read beyond the end of the file. The current file offset of the file is left unchanged. +If that syscall doesn't exist (likely if your OS isn't Linux) it will be +emulated by simply reading the data, which would have a similar effect. + =item aio_stat $fh_or_path, $callback =item aio_lstat $fh, $callback @@ -189,6 +230,20 @@ Asynchronously unlink (delete) a file and call the callback with the result code. +=item aio_rmdir $pathname, $callback + +Asynchronously rmdir (delete) a directory and call the callback with the +result code. + +=item aio_readdir $pathname $callback + +Unlike the POSIX call of the same name, C reads an entire +directory (i.e. opendir + readdir + closedir). The entries will not be +sorted, and will B include the C<.> and C<..> entries. + +The callback a single argument which is either C or an array-ref +with the filenames. + =item aio_fsync $fh, $callback Asynchronously call fsync on the given filehandle and call the callback @@ -197,8 +252,10 @@ =item aio_fdatasync $fh, $callback Asynchronously call fdatasync on the given filehandle and call the -callback with the fdatasync result code. Might set C<$!> to C if -C is not available. +callback with the fdatasync result code. + +If this call isn't available because your OS lacks it or it couldn't be +detected, it will be emulated by calling C instead. =back @@ -266,24 +323,29 @@ =item IO::AIO::min_parallel $nthreads -Set the minimum number of AIO threads to C<$nthreads>. The default is -C<1>, which means a single asynchronous operation can be done at one time +Set the minimum number of AIO threads to C<$nthreads>. The current default +is C<4>, which means four asynchronous operations can be done at one time (the number of outstanding operations, however, is unlimited). +IO::AIO starts threads only on demand, when an AIO request is queued and +no free thread exists. + It is recommended to keep the number of threads low, as some Linux kernel versions will scale negatively with the number of threads (higher parallelity => MUCH higher latency). With current Linux 2.6 versions, 4-32 threads should be fine. -Under normal circumstances you don't need to call this function, as this -module automatically starts some threads (the exact number might change, -and is currently 4). +Under most circumstances you don't need to call this function, as the +module selects a default that is suitable for low to moderate load. =item IO::AIO::max_parallel $nthreads -Sets the maximum number of AIO threads to C<$nthreads>. If more than -the specified number of threads are currently running, kill them. This -function blocks until the limit is reached. +Sets the maximum number of AIO threads to C<$nthreads>. If more than the +specified number of threads are currently running, this function kills +them. This function blocks until the limit is reached. + +While C<$nthreads> are zero, aio requests get queued but not executed +until the number of threads has been increased again. This module automatically runs C at program end, to ensure that all threads are killed and that there are no outstanding requests. @@ -297,7 +359,7 @@ some requests have been handled. The default is very large, so normally there is no practical limit. If you -queue up many requests in a loop it it often improves speed if you set +queue up many requests in a loop it often improves speed if you set this to a relatively low number, such as C<100>. Under normal circumstances you don't need to call this function. @@ -310,12 +372,16 @@ sub _fd2fh { return undef if $_[0] < 0; - # try to be perl5.6-compatible - local *AIO_FH; - open AIO_FH, "+<&=$_[0]" + # try to generate nice filehandles + my $sym = "IO::AIO::fd#$_[0]"; + local *$sym; + + open *$sym, "+<&=$_[0]" # usually works under any unix + or open *$sym, "<&=$_[0]" # cygwin needs this + or open *$sym, ">&=$_[0]" # or this or return undef; - *AIO_FH + *$sym } min_parallel 4; @@ -326,6 +392,16 @@ 1; +=head2 FORK BEHAVIOUR + +Before the fork, IO::AIO enters a quiescent state where no requests +can be added in other threads and no results will be processed. After +the fork the parent simply leaves the quiescent state and continues +request/result processing, while the child clears the request/result +queue (so the requests started before the fork will only be handled in +the parent). Threats will be started on demand until the limit ste in the +parent process has been reached again. + =head1 SEE ALSO L, L.