--- IO-AIO/AIO.pm 2010/01/07 20:25:57 1.173 +++ IO-AIO/AIO.pm 2011/06/29 12:46:36 1.199 @@ -6,7 +6,7 @@ use IO::AIO; - aio_open "/etc/passwd", O_RDONLY, 0, sub { + aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { my $fh = shift or die "/etc/passwd: $!"; ... @@ -28,29 +28,6 @@ my $grp = aio_group sub { print "all stats done\n" }; add $grp aio_stat "..." for ...; - # AnyEvent integration (EV, Event, Glib, Tk, POE, urxvt, pureperl...) - use AnyEvent::AIO; - - # EV integration - my $aio_w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; - - # Event integration - Event->io (fd => IO::AIO::poll_fileno, - poll => 'r', - cb => \&IO::AIO::poll_cb); - - # Glib/Gtk2 integration - add_watch Glib::IO IO::AIO::poll_fileno, - in => sub { IO::AIO::poll_cb; 1 }; - - # Tk integration - Tk::Event::IO->fileevent (IO::AIO::poll_fileno, "", - readable => \&IO::AIO::poll_cb); - - # Danga::Socket integration - Danga::Socket->AddOtherFds (IO::AIO::poll_fileno => - \&IO::AIO::poll_cb); - =head1 DESCRIPTION This module implements asynchronous I/O using whatever means your @@ -101,7 +78,7 @@ my $aio_w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; # queue the request to open /etc/passwd - aio_open "/etc/passwd", O_RDONLY, 0, sub { + aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { my $fh = shift or die "error while opening: $!"; @@ -193,7 +170,7 @@ use base 'Exporter'; BEGIN { - our $VERSION = '3.5'; + our $VERSION = '3.92'; our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_readdirx @@ -202,14 +179,16 @@ aio_rename aio_link aio_move aio_copy aio_group aio_nop aio_mknod aio_load aio_rmtree aio_mkdir aio_chown aio_chmod aio_utime aio_truncate - aio_msync aio_mtouch aio_statvfs); + aio_msync aio_mtouch aio_mlock aio_mlockall + aio_statvfs); our @EXPORT = (@AIO_REQ, qw(aioreq_pri aioreq_nice)); our @EXPORT_OK = qw(poll_fileno poll_cb poll_wait flush - min_parallel max_parallel max_idle + min_parallel max_parallel max_idle idle_timeout nreqs nready npending nthreads max_poll_time max_poll_reqs - sendfile fadvise); + sendfile fadvise madvise + mmap munmap munlock munlockall); push @AIO_REQ, qw(aio_busy); # not exported @@ -221,6 +200,79 @@ =head1 FUNCTIONS +=head2 QUICK OVERVIEW + +This section simply lists the prototypes of the most important functions +for quick reference. See the following sections for function-by-function +documentation. + + aio_open $pathname, $flags, $mode, $callback->($fh) + aio_close $fh, $callback->($status) + aio_read $fh,$offset,$length, $data,$dataoffset, $callback->($retval) + aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval) + aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback->($retval) + aio_readahead $fh,$offset,$length, $callback->($retval) + aio_stat $fh_or_path, $callback->($status) + aio_lstat $fh, $callback->($status) + aio_statvfs $fh_or_path, $callback->($statvfs) + aio_utime $fh_or_path, $atime, $mtime, $callback->($status) + aio_chown $fh_or_path, $uid, $gid, $callback->($status) + aio_truncate $fh_or_path, $offset, $callback->($status) + aio_chmod $fh_or_path, $mode, $callback->($status) + aio_unlink $pathname, $callback->($status) + aio_mknod $path, $mode, $dev, $callback->($status) + aio_link $srcpath, $dstpath, $callback->($status) + aio_symlink $srcpath, $dstpath, $callback->($status) + aio_readlink $path, $callback->($link) + aio_rename $srcpath, $dstpath, $callback->($status) + aio_mkdir $pathname, $mode, $callback->($status) + aio_rmdir $pathname, $callback->($status) + aio_readdir $pathname, $callback->($entries) + aio_readdirx $pathname, $flags, $callback->($entries, $flags) + IO::AIO::READDIR_DENTS IO::AIO::READDIR_DIRS_FIRST + IO::AIO::READDIR_STAT_ORDER IO::AIO::READDIR_FOUND_UNKNOWN + aio_load $path, $data, $callback->($status) + aio_copy $srcpath, $dstpath, $callback->($status) + aio_move $srcpath, $dstpath, $callback->($status) + aio_scandir $path, $maxreq, $callback->($dirs, $nondirs) + aio_rmtree $path, $callback->($status) + aio_sync $callback->($status) + aio_fsync $fh, $callback->($status) + aio_fdatasync $fh, $callback->($status) + aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) + aio_pathsync $path, $callback->($status) + aio_msync $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) + aio_mtouch $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) + aio_mlock $scalar, $offset = 0, $length = undef, $callback->($status) + aio_mlockall $flags, $callback->($status) + aio_group $callback->(...) + aio_nop $callback->() + + $prev_pri = aioreq_pri [$pri] + aioreq_nice $pri_adjust + + IO::AIO::poll_wait + IO::AIO::poll_cb + IO::AIO::poll + IO::AIO::flush + IO::AIO::max_poll_reqs $nreqs + IO::AIO::max_poll_time $seconds + IO::AIO::min_parallel $nthreads + IO::AIO::max_parallel $nthreads + IO::AIO::max_idle $nthreads + IO::AIO::idle_timeout $seconds + IO::AIO::max_outstanding $maxreqs + IO::AIO::nreqs + IO::AIO::nready + IO::AIO::npending + + IO::AIO::sendfile $ofh, $ifh, $offset, $count + IO::AIO::fadvise $fh, $offset, $len, $advice + IO::AIO::madvise $scalar, $offset, $length, $advice + IO::AIO::mprotect $scalar, $offset, $length, $protect + IO::AIO::munlock $scalar, $offset = 0, $length = undef + IO::AIO::munlockall + =head2 AIO REQUEST FUNCTIONS All the C calls are more or less thin wrappers around the syscall @@ -309,7 +361,7 @@ Example: - aio_open "/etc/passwd", O_RDONLY, 0, sub { + aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { if ($_[0]) { print "open successful, fh is $_[0]\n"; ... @@ -318,6 +370,15 @@ } }; +In addition to all the common open modes/flags (C, C, +C, C, C, C and C), the +following POSIX and non-POSIX constants are available (missing ones on +your system are, as usual, C<0>): + +C, C, C, C, C, C, +C, C, C, C, C, +C, C and C. + =item aio_close $fh, $callback->($status) @@ -377,23 +438,40 @@ reading at byte offset C<$in_offset>, and starts writing at the current file offset of C<$out_fh>. Because of that, it is not safe to issue more than one C per C<$out_fh>, as they will interfere with each -other. +other. The same C<$in_fh> works fine though, as this function does not +move or use the file offset of C<$in_fh>. -This call tries to make use of a native C syscall to provide -zero-copy operation. For this to work, C<$out_fh> should refer to a -socket, and C<$in_fh> should refer to mmap'able file. +Please note that C can read more bytes from C<$in_fh> than +are written, and there is no way to find out how many more bytes have been +read from C alone, as C only provides the +number of bytes written to C<$out_fh>. Only if the result value equals +C<$length> one can assume that C<$length> bytes have been read. + +Unlike with other C functions, it makes a lot of sense to use +C on non-blocking sockets, as long as one end (typically +the C<$in_fh>) is a file - the file I/O will then be asynchronous, while +the socket I/O will be non-blocking. Note, however, that you can run +into a trap where C reads some data with readahead, then +fails to write all data, and when the socket is ready the next time, the +data in the cache is already lost, forcing C to again hit +the disk. Explicit C + C let's you better control +resource usage. + +This call tries to make use of a native C-like syscall to +provide zero-copy operation. For this to work, C<$out_fh> should refer to +a socket, and C<$in_fh> should refer to an mmap'able file. If a native sendfile cannot be found or it fails with C, -C, C, C, C or C, -it will be emulated, so you can call C on any type of -filehandle regardless of the limitations of the operating system. - -Please note, however, that C can read more bytes from -C<$in_fh> than are written, and there is no way to find out how many -bytes have been read from C alone, as C only -provides the number of bytes written to C<$out_fh>. Only if the result -value equals C<$length> one can assume that C<$length> bytes have been -read. +C, C, C, C, C or +C, it will be emulated, so you can call C on any +type of filehandle regardless of the limitations of the operating system. + +As native sendfile syscalls (as practically any non-POSIX interface hacked +together in a hurry to improve benchmark numbers) tend to be rather buggy +on many systems, this implementation tries to work around some known bugs +in Linux and FreeBSD kernels (probably others, too), but that might fail, +so you really really should check the return value of C - +fewre bytes than expected might have been transferred. =item aio_readahead $fh,$offset,$length, $callback->($retval) @@ -426,6 +504,15 @@ error when stat'ing a large file, the results will be silently truncated unless perl itself is compiled with large file support. +To help interpret the mode and dev/rdev stat values, IO::AIO offers the +following constants and functions (if not implemented, the constants will +be C<0> and the functions will either C or fall back on traditional +behaviour). + +C, C, C, C, C, C, +C, C, C, C, +C, C. + Example: Print the length of F: aio_stat "/etc/passwd", sub { @@ -434,7 +521,7 @@ }; -=item aio_statvfs $fh_or_path, $callback->($statvfs) +=item aio_statvfs $fh_or_path, $callback->($statvfs) Works like the POSIX C or C syscalls, depending on whether a file handle or path was passed. @@ -536,6 +623,8 @@ aio_mknod $path, IO::AIO::S_IFIFO | $mode, 0, sub { ... +See C for info about some potentially helpful extra constants +and functions. =item aio_link $srcpath, $dstpath, $callback->($status) @@ -598,8 +687,8 @@ =item IO::AIO::READDIR_DENTS -When this flag is off, then the callback gets an arrayref with of names -only (as with C), otherwise it gets an arrayref with +When this flag is off, then the callback gets an arrayref consisting of +names only (as with C), otherwise it gets an arrayref with C<[$name, $type, $inode]> arrayrefs, each describing a single directory entry in more detail. @@ -622,13 +711,13 @@ =item IO::AIO::READDIR_DIRS_FIRST When this flag is set, then the names will be returned in an order where -likely directories come first. This is useful when you need to quickly -find directories, or you want to find all directories while avoiding to -stat() each entry. +likely directories come first, in optimal stat order. This is useful when +you need to quickly find directories, or you want to find all directories +while avoiding to stat() each entry. If the system returns type information in readdir, then this is used -to find directories directly. Otherwise, likely directories are files -beginning with ".", or otherwise files with no dots, of which files with +to find directories directly. Otherwise, likely directories are names +beginning with ".", or otherwise names with no dots, of which names with short names are tried first. =item IO::AIO::READDIR_STAT_ORDER @@ -783,7 +872,7 @@ add $grp aio_copy $src, $dst, sub { $grp->result ($_[0]); - if (!$_[0]) { + unless ($_[0]) { aioreq_pri $pri; add $grp aio_unlink $src; } @@ -1041,9 +1130,10 @@ =item aio_msync $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) This is a rather advanced IO::AIO call, which only works on mmap(2)ed -scalars (see the L or L modules for details on this, note -that the scalar must only be modified in-place while an aio operation is -pending on it). +scalars (see the C function, although it also works on data +scalars managed by the L or L modules, note that the +scalar must only be modified in-place while an aio operation is pending on +it). It calls the C function of your OS, if available, with the memory area starting at C<$offset> in the string and ending C<$length> bytes @@ -1064,6 +1154,45 @@ C, which modifies the memory page s(by reading and writing an octet from it, which dirties the page). +=item aio_mlock $scalar, $offset = 0, $length = undef, $callback->($status) + +This is a rather advanced IO::AIO call, which works best on mmap(2)ed +scalars. + +It reads in all the pages of the underlying storage into memory (if any) +and locks them, so they are not getting swapped/paged out or removed. + +If C<$length> is undefined, then the scalar will be locked till the end. + +On systems that do not implement C, this function returns C<-1> +and sets errno to C. + +Note that the corresponding C is synchronous and is +documented under L. + +Example: open a file, mmap and mlock it - both will be undone when +C<$data> gets destroyed. + + open my $fh, "<", $path or die "$path: $!"; + my $data; + IO::AIO::mmap $data, -s $fh, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh; + aio_mlock $data; # mlock in background + +=item aio_mlockall $flags, $callback->($status) + +Calls the C function with the given C<$flags> (a combination of +C and C). + +On systems that do not implement C, this function returns C<-1> +and sets errno to C. + +Note that the corresponding C is synchronous and is +documented under L. + +Example: asynchronously lock all current and future pages into memory. + + aio_mlockall IO::AIO::MCL_FUTURE; + =item aio_group $callback->(...) This is a very special aio request: Instead of doing something, it is a @@ -1296,16 +1425,24 @@ =item IO::AIO::poll_cb -Process some outstanding events on the result pipe. You have to call this -regularly. Returns C<0> if all events could be processed, or C<-1> if it -returned earlier for whatever reason. Returns immediately when no events -are outstanding. The amount of events processed depends on the settings of -C and C. +Process some outstanding events on the result pipe. You have to call +this regularly. Returns C<0> if all events could be processed (or there +were no events to process), or C<-1> if it returned earlier for whatever +reason. Returns immediately when no events are outstanding. The amount of +events processed depends on the settings of C and +C. If not all requests were processed for whatever reason, the filehandle will still be ready when C returns, so normally you don't have to do anything special to have it called later. +Apart from calling C when the event filehandle becomes +ready, it can be beneficial to call this function from loops which submit +a lot of requests, to make sure the results get processed when they become +available and not just when the loop is finished and the event loop takes +over again. This function returns very fast when there are no outstanding +requests. + Example: Install an Event watcher that automatically calls IO::AIO::poll_cb with high priority (more examples can be found in the SYNOPSIS section, at the top of this document): @@ -1314,6 +1451,33 @@ poll => 'r', async => 1, cb => \&IO::AIO::poll_cb); +=item IO::AIO::poll_wait + +If there are any outstanding requests and none of them in the result +phase, wait till the result filehandle becomes ready for reading (simply +does a C on the filehandle. This is useful if you want to -synchronously wait for some requests to finish). - -See C for an example. - -=item IO::AIO::poll - -Waits until some requests have been handled. - -Returns the number of requests processed, but is otherwise strictly -equivalent to: - - IO::AIO::poll_wait, IO::AIO::poll_cb - -=item IO::AIO::flush - -Wait till all outstanding AIO requests have been handled. - -Strictly equivalent to: - - IO::AIO::poll_wait, IO::AIO::poll_cb - while IO::AIO::nreqs; - =back =head3 CONTROLLING THE NUMBER OF THREADS @@ -1416,10 +1553,11 @@ =item IO::AIO::max_idle $nthreads -Limit the number of threads (default: 4) that are allowed to idle (i.e., -threads that did not get a request to process within 10 seconds). That -means if a thread becomes idle while C<$nthreads> other threads are also -idle, it will free its resources and exit. +Limit the number of threads (default: 4) that are allowed to idle +(i.e., threads that did not get a request to process within the idle +timeout (default: 10 seconds). That means if a thread becomes idle while +C<$nthreads> other threads are also idle, it will free its resources and +exit. This is useful when you allow a large number of threads (e.g. 100 or 1000) to allow for extremely high load situations, but want to free resources @@ -1429,23 +1567,45 @@ creation is fast. If thread creation is very slow on your system you might want to use larger values. +=item IO::AIO::idle_timeout $seconds + +Sets the minimum idle timeout (default 10) after which worker threads are +allowed to exit. SEe C. + =item IO::AIO::max_outstanding $maxreqs +Sets the maximum number of outstanding requests to C<$nreqs>. If +you do queue up more than this number of requests, the next call to +C (and other functions calling C, such as +C or C) will block until the limit is no +longer exceeded. + +In other words, this setting does not enforce a queue limit, but can be +used to make poll functions block if the limit is exceeded. + This is a very bad function to use in interactive programs because it blocks, and a bad way to reduce concurrency because it is inexact: Better use an C together with a feed callback. -Sets the maximum number of outstanding requests to C<$nreqs>. If you -do queue up more than this number of requests, the next call to the -C (and C and other functions calling C) -function will block until the limit is no longer exceeded. - -The default value is very large, so there is no practical limit on the -number of outstanding requests. - -You can still queue as many requests as you want. Therefore, -C is mainly useful in simple scripts (with low values) or -as a stop gap to shield against fatal memory overflow (with large values). +It's main use is in scripts without an event loop - when you want to stat +a lot of files, you can write somehting like this: + + IO::AIO::max_outstanding 32; + + for my $path (...) { + aio_stat $path , ...; + IO::AIO::poll_cb; + } + + IO::AIO::flush; + +The call to C inside the loop will normally return instantly, but +as soon as more thna C<32> reqeusts are in-flight, it will block until +some requests have been handled. This keeps the loop from pushing a large +number of C requests onto the queue. + +The default value for C is very large, so there is no +practical limit on the number of outstanding requests. =back @@ -1493,7 +1653,7 @@ =item IO::AIO::fadvise $fh, $offset, $len, $advice -Simply calls the C function (see it's +Simply calls the C function (see its manpage for details). The following advice constants are avaiable: C, C, C, C, @@ -1502,6 +1662,92 @@ On systems that do not implement C, this function returns ENOSYS, otherwise the return value of C. +=item IO::AIO::madvise $scalar, $offset, $len, $advice + +Simply calls the C function (see its +manpage for details). The following advice constants are +avaiable: C, C, +C, C, C. + +On systems that do not implement C, this function returns +ENOSYS, otherwise the return value of C. + +=item IO::AIO::mprotect $scalar, $offset, $len, $protect + +Simply calls the C function on the preferably AIO::mmap'ed +$scalar (see its manpage for details). The following protect +constants are avaiable: C, C, +C, C. + +On systems that do not implement C, this function returns +ENOSYS, otherwise the return value of C. + +=item IO::AIO::mmap $scalar, $length, $prot, $flags, $fh[, $offset] + +Memory-maps a file (or anonymous memory range) and attaches it to the +given C<$scalar>, which will act like a string scalar. + +The only operations allowed on the scalar are C/C that don't +change the string length, and most read-only operations such as copying it +or searching it with regexes and so on. + +Anything else is unsafe and will, at best, result in memory leaks. + +The memory map associated with the C<$scalar> is automatically removed +when the C<$scalar> is destroyed, or when the C or +C functions are called. + +This calls the C(2) function internally. See your system's manual +page for details on the C<$length>, C<$prot> and C<$flags> parameters. + +The C<$length> must be larger than zero and smaller than the actual +filesize. + +C<$prot> is a combination of C, C, +C and/or C, + +C<$flags> can be a combination of C or +C, or a number of system-specific flags (when +not available, the are defined as 0): C +(which is set to C if your system only provides this +constant), C, C, +C, C or +C + +If C<$fh> is C, then a file descriptor of C<-1> is passed. + +C<$offset> is the offset from the start of the file - it generally must be +a multiple of C and defaults to C<0>. + +Example: + + use Digest::MD5; + use IO::AIO; + + open my $fh, ". + +=item IO::AIO::munlock $scalar, $offset = 0, $length = undef + +Calls the C function, undoing the effects of a previous +C call (see its description for details). + +=item IO::AIO::munlockall + +Calls the C function. + +On systems that do not implement C, this function returns +ENOSYS, otherwise the return value of C. + =back =cut @@ -1512,21 +1758,50 @@ 1; -=head2 FORK BEHAVIOUR +=head1 EVENT LOOP INTEGRATION + +It is recommended to use L to integrate IO::AIO +automatically into many event loops: + + # AnyEvent integration (EV, Event, Glib, Tk, POE, urxvt, pureperl...) + use AnyEvent::AIO; + +You can also integrate IO::AIO manually into many event loops, here are +some examples of how to do this: -This module should do "the right thing" when the process using it forks: + # EV integration + my $aio_w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; + + # Event integration + Event->io (fd => IO::AIO::poll_fileno, + poll => 'r', + cb => \&IO::AIO::poll_cb); + + # Glib/Gtk2 integration + add_watch Glib::IO IO::AIO::poll_fileno, + in => sub { IO::AIO::poll_cb; 1 }; + + # Tk integration + Tk::Event::IO->fileevent (IO::AIO::poll_fileno, "", + readable => \&IO::AIO::poll_cb); + + # Danga::Socket integration + Danga::Socket->AddOtherFds (IO::AIO::poll_fileno => + \&IO::AIO::poll_cb); + +=head2 FORK BEHAVIOUR -Before the fork, IO::AIO enters a quiescent state where no requests -can be added in other threads and no results will be processed. After -the fork the parent simply leaves the quiescent state and continues -request/result processing, while the child frees the request/result queue -(so that the requests started before the fork will only be handled in the -parent). Threads will be started on demand until the limit set in the -parent process has been reached again. - -In short: the parent will, after a short pause, continue as if fork had -not been called, while the child will act as if IO::AIO has not been used -yet. +Usage of pthreads in a program changes the semantics of fork +considerably. Specifically, only async-safe functions can be called after +fork. Perl doesn't know about this, so in general, you cannot call fork +with defined behaviour in perl. IO::AIO uses pthreads, so this applies, +but many other extensions and (for inexplicable reasons) perl itself often +is linked against pthreads, so this limitation applies. + +Some operating systems have extensions that allow safe use of fork, and +this module should do "the right thing" on those, and tries on others. At +the time of this writing (2011) only GNU/Linux supports these extensions +to POSIX. =head2 MEMORY USAGE