--- IO-AIO/README 2010/01/10 23:44:02 1.43 +++ IO-AIO/README 2011/06/29 11:25:17 1.48 @@ -4,7 +4,7 @@ SYNOPSIS use IO::AIO; - aio_open "/etc/passwd", O_RDONLY, 0, sub { + aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { my $fh = shift or die "/etc/passwd: $!"; ... @@ -74,7 +74,7 @@ my $aio_w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; # queue the request to open /etc/passwd - aio_open "/etc/passwd", O_RDONLY, 0, sub { + aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { my $fh = shift or die "error while opening: $!"; @@ -189,6 +189,8 @@ aio_pathsync $path, $callback->($status) aio_msync $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) aio_mtouch $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) + aio_mlock $scalar, $offset = 0, $length = undef, $callback->($status) + aio_mlockall $flags, $callback->($status) aio_group $callback->(...) aio_nop $callback->() @@ -204,6 +206,7 @@ IO::AIO::min_parallel $nthreads IO::AIO::max_parallel $nthreads IO::AIO::max_idle $nthreads + IO::AIO::idle_timeout $seconds IO::AIO::max_outstanding $maxreqs IO::AIO::nreqs IO::AIO::nready @@ -211,7 +214,9 @@ IO::AIO::sendfile $ofh, $ifh, $offset, $count IO::AIO::fadvise $fh, $offset, $len, $advice - IO::AIO::mlockall $flags + IO::AIO::madvise $scalar, $offset, $length, $advice + IO::AIO::mprotect $scalar, $offset, $length, $protect + IO::AIO::munlock $scalar, $offset = 0, $length = undef IO::AIO::munlockall AIO REQUEST FUNCTIONS @@ -294,7 +299,7 @@ Example: - aio_open "/etc/passwd", O_RDONLY, 0, sub { + aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { if ($_[0]) { print "open successful, fh is $_[0]\n"; ... @@ -303,6 +308,15 @@ } }; + In addition to all the common open modes/flags ("O_RDONLY", + "O_WRONLY", "O_RDWR", "O_CREAT", "O_TRUNC", "O_EXCL" and + "O_APPEND"), the following POSIX and non-POSIX constants are + available (missing ones on your system are, as usual, 0): + + "O_ASYNC", "O_DIRECT", "O_NOATIME", "O_CLOEXEC", "O_NOCTTY", + "O_NOFOLLOW", "O_NONBLOCK", "O_EXEC", "O_SEARCH", "O_DIRECTORY", + "O_DSYNC", "O_RSYNC", "O_SYNC" and "O_TTY_INIT". + aio_close $fh, $callback->($status) Asynchronously close a file and call the callback with the result code. @@ -355,23 +369,43 @@ reading at byte offset $in_offset, and starts writing at the current file offset of $out_fh. Because of that, it is not safe to issue more than one "aio_sendfile" per $out_fh, as they will interfere - with each other. + with each other. The same $in_fh works fine though, as this function + does not move or use the file offset of $in_fh. + + Please note that "aio_sendfile" can read more bytes from $in_fh than + are written, and there is no way to find out how many more bytes + have been read from "aio_sendfile" alone, as "aio_sendfile" only + provides the number of bytes written to $out_fh. Only if the result + value equals $length one can assume that $length bytes have been + read. + + Unlike with other "aio_" functions, it makes a lot of sense to use + "aio_sendfile" on non-blocking sockets, as long as one end + (typically the $in_fh) is a file - the file I/O will then be + asynchronous, while the socket I/O will be non-blocking. Note, + however, that you can run into a trap where "aio_sendfile" reads + some data with readahead, then fails to write all data, and when the + socket is ready the next time, the data in the cache is already + lost, forcing "aio_sendfile" to again hit the disk. Explicit + "aio_read" + "aio_write" let's you better control resource usage. - This call tries to make use of a native "sendfile" syscall to + This call tries to make use of a native "sendfile"-like syscall to provide zero-copy operation. For this to work, $out_fh should refer to a socket, and $in_fh should refer to an mmap'able file. If a native sendfile cannot be found or it fails with "ENOSYS", - "ENOTSUP", "EOPNOTSUPP", "EAFNOSUPPORT", "EPROTOTYPE" or "ENOTSOCK", - it will be emulated, so you can call "aio_sendfile" on any type of - filehandle regardless of the limitations of the operating system. - - Please note, however, that "aio_sendfile" can read more bytes from - $in_fh than are written, and there is no way to find out how many - bytes have been read from "aio_sendfile" alone, as "aio_sendfile" - only provides the number of bytes written to $out_fh. Only if the - result value equals $length one can assume that $length bytes have - been read. + "EINVAL", "ENOTSUP", "EOPNOTSUPP", "EAFNOSUPPORT", "EPROTOTYPE" or + "ENOTSOCK", it will be emulated, so you can call "aio_sendfile" on + any type of filehandle regardless of the limitations of the + operating system. + + As native sendfile syscalls (as practically any non-POSIX interface + hacked together in a hurry to improve benchmark numbers) tend to be + rather buggy on many systems, this implementation tries to work + around some known bugs in Linux and FreeBSD kernels (probably + others, too), but that might fail, so you really really should check + the return value of "aio_sendfile" - fewre bytes than expected might + have been transferred. aio_readahead $fh,$offset,$length, $callback->($retval) "aio_readahead" populates the page cache with data from a file so @@ -402,6 +436,15 @@ silently truncated unless perl itself is compiled with large file support. + To help interpret the mode and dev/rdev stat values, IO::AIO offers + the following constants and functions (if not implemented, the + constants will be 0 and the functions will either "croak" or fall + back on traditional behaviour). + + "S_IFMT", "S_IFIFO", "S_IFCHR", "S_IFBLK", "S_IFLNK", "S_IFREG", + "S_IFDIR", "S_IFWHT", "S_IFSOCK", "IO::AIO::major $dev_t", + "IO::AIO::minor $dev_t", "IO::AIO::makedev $major, $minor". + Example: Print the length of /etc/passwd: aio_stat "/etc/passwd", sub { @@ -499,6 +542,9 @@ aio_mknod $path, IO::AIO::S_IFIFO | $mode, 0, sub { ... + See "aio_stat" for info about some potentially helpful extra + constants and functions. + aio_link $srcpath, $dstpath, $callback->($status) Asynchronously create a new link to the existing object at $srcpath at the path $dstpath and call the callback with the result code. @@ -544,9 +590,9 @@ modified): IO::AIO::READDIR_DENTS - When this flag is off, then the callback gets an arrayref with - of names only (as with "aio_readdir"), otherwise it gets an - arrayref with "[$name, $type, $inode]" arrayrefs, each + When this flag is off, then the callback gets an arrayref + consisting of names only (as with "aio_readdir"), otherwise it + gets an arrayref with "[$name, $type, $inode]" arrayrefs, each describing a single directory entry in more detail. $name is the name of the entry. @@ -569,14 +615,15 @@ IO::AIO::READDIR_DIRS_FIRST When this flag is set, then the names will be returned in an - order where likely directories come first. This is useful when - you need to quickly find directories, or you want to find all - directories while avoiding to stat() each entry. + order where likely directories come first, in optimal stat + order. This is useful when you need to quickly find directories, + or you want to find all directories while avoiding to stat() + each entry. If the system returns type information in readdir, then this is used to find directories directly. Otherwise, likely directories - are files beginning with ".", or otherwise files with no dots, - of which files with short names are tried first. + are names beginning with ".", or otherwise names with no dots, + of which names with short names are tried first. IO::AIO::READDIR_STAT_ORDER When this flag is set, then the names will be returned in an @@ -753,6 +800,46 @@ "IO::AIO::MT_MODIFY", which modifies the memory page s(by reading and writing an octet from it, which dirties the page). + aio_mlock $scalar, $offset = 0, $length = undef, $callback->($status) + This is a rather advanced IO::AIO call, which works best on + mmap(2)ed scalars. + + It reads in all the pages of the underlying storage into memory (if + any) and locks them, so they are not getting swapped/paged out or + removed. + + If $length is undefined, then the scalar will be locked till the + end. + + On systems that do not implement "mlock", this function returns -1 + and sets errno to "ENOSYS". + + Note that the corresponding "munlock" is synchronous and is + documented under "MISCELLANEOUS FUNCTIONS". + + Example: open a file, mmap and mlock it - both will be undone when + $data gets destroyed. + + open my $fh, "<", $path or die "$path: $!"; + my $data; + IO::AIO::mmap $data, -s $fh, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh; + aio_mlock $data; # mlock in background + + aio_mlockall $flags, $callback->($status) + Calls the "mlockall" function with the given $flags (a combination + of "IO::AIO::MCL_CURRENT" and "IO::AIO::MCL_FUTURE"). + + On systems that do not implement "mlockall", this function returns + -1 and sets errno to "ENOSYS". + + Note that the corresponding "munlockall" is synchronous and is + documented under "MISCELLANEOUS FUNCTIONS". + + Example: asynchronously lock all current and future pages into + memory. + + aio_mlockall IO::AIO::MCL_FUTURE; + aio_group $callback->(...) This is a very special aio request: Instead of doing something, it is a container for other aio requests, which is useful if you want @@ -957,16 +1044,23 @@ IO::AIO::poll_cb Process some outstanding events on the result pipe. You have to call - this regularly. Returns 0 if all events could be processed, or -1 if - it returned earlier for whatever reason. Returns immediately when no - events are outstanding. The amount of events processed depends on - the settings of "IO::AIO::max_poll_req" and - "IO::AIO::max_poll_time". + this regularly. Returns 0 if all events could be processed (or there + were no events to process), or -1 if it returned earlier for + whatever reason. Returns immediately when no events are outstanding. + The amount of events processed depends on the settings of + "IO::AIO::max_poll_req" and "IO::AIO::max_poll_time". If not all requests were processed for whatever reason, the filehandle will still be ready when "poll_cb" returns, so normally you don't have to do anything special to have it called later. + Apart from calling "IO::AIO::poll_cb" when the event filehandle + becomes ready, it can be beneficial to call this function from loops + which submit a lot of requests, to make sure the results get + processed when they become available and not just when the loop is + finished and the event loop takes over again. This function returns + very fast when there are no outstanding requests. + Example: Install an Event watcher that automatically calls IO::AIO::poll_cb with high priority (more examples can be found in the SYNOPSIS section, at the top of this document): @@ -1069,9 +1163,10 @@ IO::AIO::max_idle $nthreads Limit the number of threads (default: 4) that are allowed to idle - (i.e., threads that did not get a request to process within 10 - seconds). That means if a thread becomes idle while $nthreads other - threads are also idle, it will free its resources and exit. + (i.e., threads that did not get a request to process within the idle + timeout (default: 10 seconds). That means if a thread becomes idle + while $nthreads other threads are also idle, it will free its + resources and exit. This is useful when you allow a large number of threads (e.g. 100 or 1000) to allow for extremely high load situations, but want to free @@ -1082,23 +1177,44 @@ creation is fast. If thread creation is very slow on your system you might want to use larger values. + IO::AIO::idle_timeout $seconds + Sets the minimum idle timeout (default 10) after which worker + threads are allowed to exit. SEe "IO::AIO::max_idle". + IO::AIO::max_outstanding $maxreqs + Sets the maximum number of outstanding requests to $nreqs. If you do + queue up more than this number of requests, the next call to + "IO::AIO::poll_cb" (and other functions calling "poll_cb", such as + "IO::AIO::flush" or "IO::AIO::poll") will block until the limit is + no longer exceeded. + + In other words, this setting does not enforce a queue limit, but can + be used to make poll functions block if the limit is exceeded. + This is a very bad function to use in interactive programs because it blocks, and a bad way to reduce concurrency because it is inexact: Better use an "aio_group" together with a feed callback. - Sets the maximum number of outstanding requests to $nreqs. If you do - queue up more than this number of requests, the next call to the - "poll_cb" (and "poll_some" and other functions calling "poll_cb") - function will block until the limit is no longer exceeded. - - The default value is very large, so there is no practical limit on - the number of outstanding requests. - - You can still queue as many requests as you want. Therefore, - "max_outstanding" is mainly useful in simple scripts (with low - values) or as a stop gap to shield against fatal memory overflow - (with large values). + It's main use is in scripts without an event loop - when you want to + stat a lot of files, you can write somehting like this: + + IO::AIO::max_outstanding 32; + + for my $path (...) { + aio_stat $path , ...; + IO::AIO::poll_cb; + } + + IO::AIO::flush; + + The call to "poll_cb" inside the loop will normally return + instantly, but as soon as more thna 32 reqeusts are in-flight, it + will block until some requests have been handled. This keeps the + loop from pushing a large number of "aio_stat" requests onto the + queue. + + The default value for "max_outstanding" is very large, so there is + no practical limit on the number of outstanding requests. STATISTICAL INFORMATION IO::AIO::nreqs @@ -1132,7 +1248,7 @@ Returns the number of bytes copied, or -1 on error. IO::AIO::fadvise $fh, $offset, $len, $advice - Simply calls the "posix_fadvise" function (see it's manpage for + Simply calls the "posix_fadvise" function (see its manpage for details). The following advice constants are avaiable: "IO::AIO::FADV_NORMAL", "IO::AIO::FADV_SEQUENTIAL", "IO::AIO::FADV_RANDOM", "IO::AIO::FADV_NOREUSE", @@ -1141,6 +1257,25 @@ On systems that do not implement "posix_fadvise", this function returns ENOSYS, otherwise the return value of "posix_fadvise". + IO::AIO::madvise $scalar, $offset, $len, $advice + Simply calls the "posix_madvise" function (see its manpage for + details). The following advice constants are avaiable: + "IO::AIO::MADV_NORMAL", "IO::AIO::MADV_SEQUENTIAL", + "IO::AIO::MADV_RANDOM", "IO::AIO::MADV_WILLNEED", + "IO::AIO::MADV_DONTNEED". + + On systems that do not implement "posix_madvise", this function + returns ENOSYS, otherwise the return value of "posix_madvise". + + IO::AIO::mprotect $scalar, $offset, $len, $protect + Simply calls the "mprotect" function on the preferably AIO::mmap'ed + $scalar (see its manpage for details). The following protect + constants are avaiable: "IO::AIO::PROT_NONE", "IO::AIO::PROT_READ", + "IO::AIO::PROT_WRITE", "IO::AIO::PROT_EXEC". + + On systems that do not implement "mprotect", this function returns + ENOSYS, otherwise the return value of "mprotect". + IO::AIO::mmap $scalar, $length, $prot, $flags, $fh[, $offset] Memory-maps a file (or anonymous memory range) and attaches it to the given $scalar, which will act like a string scalar. @@ -1194,12 +1329,9 @@ IO::AIO::munmap $scalar Removes a previous mmap and undefines the $scalar. - IO::AIO::mlockall $flags - Calls the "mlockall" function with the given $flags (a combination - of "IO::AIO::MCL_CURRENT" and "IO::AIO::MCL__FUTURE"). - - On systems that do not implement "mlockall", this function returns - ENOSYS, otherwise the return value of "mlockall". + IO::AIO::munlock $scalar, $offset = 0, $length = undef + Calls the "munlock" function, undoing the effects of a previous + "aio_mlock" call (see its description for details). IO::AIO::munlockall Calls the "munlockall" function. @@ -1238,19 +1370,17 @@ \&IO::AIO::poll_cb); FORK BEHAVIOUR - This module should do "the right thing" when the process using it forks: - - Before the fork, IO::AIO enters a quiescent state where no requests can - be added in other threads and no results will be processed. After the - fork the parent simply leaves the quiescent state and continues - request/result processing, while the child frees the request/result - queue (so that the requests started before the fork will only be handled - in the parent). Threads will be started on demand until the limit set in - the parent process has been reached again. - - In short: the parent will, after a short pause, continue as if fork had - not been called, while the child will act as if IO::AIO has not been - used yet. + Usage of pthreads in a program changes the semantics of fork + considerably. Specifically, only async-safe functions can be called + after fork. Perl doesn't know about this, so in general, you cannot call + fork with defined behaviour in perl. IO::AIO uses pthreads, so this + applies, but many other extensions and (for inexplicable reasons) perl + itself often is linked against pthreads, so this limitation applies. + + Some operating systems have extensions that allow safe use of fork, and + this module should do "the right thing" on those, and tries on others. + At the time of this writing (2011) only GNU/Linux supports these + extensions to POSIX. MEMORY USAGE Per-request usage: