--- IO-AIO/AIO.pm	2008/09/30 03:50:59	1.135
+++ IO-AIO/AIO.pm	2009/06/13 13:33:00	1.153
@@ -195,12 +195,12 @@
 use base 'Exporter';
 
 BEGIN {
-   our $VERSION = '3.07';
+   our $VERSION = '3.22';
 
    our @AIO_REQ = qw(aio_sendfile aio_read aio_write aio_open aio_close
-                     aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir
+                     aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_readdirx
                      aio_scandir aio_symlink aio_readlink aio_sync aio_fsync
-                     aio_fdatasync aio_pathsync aio_readahead
+                     aio_fdatasync aio_sync_file_range aio_pathsync aio_readahead
                      aio_rename aio_link aio_move aio_copy aio_group
                      aio_nop aio_mknod aio_load aio_rmtree aio_mkdir aio_chown
                      aio_chmod aio_utime aio_truncate);
@@ -211,6 +211,8 @@
                        nreqs nready npending nthreads
                        max_poll_time max_poll_reqs);
 
+   push @AIO_REQ, qw(aio_busy); # not exported
+
    @IO::AIO::GRP::ISA = 'IO::AIO::REQ';
 
    require XSLoader;
@@ -226,7 +228,7 @@
 and they all accept an additional (and optional) C<$callback> argument
 which must be a code reference. This code reference will get called with
 the syscall return code (e.g. most syscalls return C<-1> on error, unlike
-perl, which usually delivers "false") as it's sole argument when the given
+perl, which usually delivers "false") as its sole argument after the given
 syscall has been executed asynchronously.
 
 All functions expecting a filehandle keep a copy of the filehandle
@@ -250,7 +252,7 @@
 use something else to ensure your scalar has the correct contents.
 
 This works, btw. independent of the internal UTF-8 bit, which IO::AIO
-handles correctly wether it is set or not.
+handles correctly whether it is set or not.
 
 =over 4
 
@@ -338,16 +340,20 @@
 
 =item aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval)
 
-Reads or writes C<$length> bytes from the specified C<$fh> and C<$offset>
-into the scalar given by C<$data> and offset C<$dataoffset> and calls the
-callback without the actual number of bytes read (or -1 on error, just
-like the syscall).
+Reads or writes C<$length> bytes from or to the specified C<$fh> and
+C<$offset> into the scalar given by C<$data> and offset C<$dataoffset>
+and calls the callback without the actual number of bytes read (or -1 on
+error, just like the syscall).
+
+C<aio_read> will, like C<sysread>, shrink or grow the C<$data> scalar to
+offset plus the actual number of bytes read.
 
 If C<$offset> is undefined, then the current file descriptor offset will
 be used (and updated), otherwise the file descriptor offset will not be
 changed by these calls.
 
-If C<$length> is undefined in C<aio_write>, use the remaining length of C<$data>.
+If C<$length> is undefined in C<aio_write>, use the remaining length of
+C<$data>.
 
 If C<$dataoffset> is less than zero, it will be counted from the end of
 C<$data>.
@@ -529,8 +535,74 @@
 directory (i.e. opendir + readdir + closedir). The entries will not be
 sorted, and will B<NOT> include the C<.> and C<..> entries.
 
-The callback a single argument which is either C<undef> or an array-ref
-with the filenames.
+The callback is passed a single argument which is either C<undef> or an
+array-ref with the filenames.
+
+
+=item aio_readdirx $pathname, $flags, $callback->($entries, $flags)
+
+Quite similar to C<aio_readdir>, but the C<$flags> argument allows to tune
+behaviour and output format. In case of an error, C<$entries> will be
+C<undef>.
+
+The flags are a combination of the following constants, ORed together (the
+flags will also be passed to the callback, possibly modified):
+
+=over 4
+
+=item IO::AIO::READDIR_DENTS
+
+When this flag is off, then the callback gets an arrayref with of names
+only (as with C<aio_readdir>), otherwise it gets an arrayref with
+C<[$name, $type, $inode]> arrayrefs, each describing a single directory
+entry in more detail.
+
+C<$name> is the name of the entry.
+
+C<$type> is one of the C<IO::AIO::DT_xxx> constants:
+
+C<IO::AIO::DT_UNKNOWN>, C<IO::AIO::DT_FIFO>, C<IO::AIO::DT_CHR>, C<IO::AIO::DT_DIR>,
+C<IO::AIO::DT_BLK>, C<IO::AIO::DT_REG>, C<IO::AIO::DT_LNK>, C<IO::AIO::DT_SOCK>,
+C<IO::AIO::DT_WHT>.
+
+C<IO::AIO::DT_UNKNOWN> means just that: readdir does not know. If you need to
+know, you have to run stat yourself. Also, for speed reasons, the C<$type>
+scalars are read-only: you can not modify them.
+
+C<$inode> is the inode number (which might not be exact on systems with 64
+bit inode numbers and 32 bit perls). On systems that do not deliver the
+inode information, this will always be zero.
+
+=item IO::AIO::READDIR_DIRS_FIRST
+
+When this flag is set, then the names will be returned in an order where
+likely directories come first. This is useful when you need to quickly
+find directories, or you want to find all directories while avoiding to
+stat() each entry.
+
+If the system returns type information in readdir, then this is used
+to find directories directly.  Otherwise, likely directories are files
+beginning with ".", or otherwise files with no dots, of which files with
+short names are tried first.
+
+=item IO::AIO::READDIR_STAT_ORDER
+
+When this flag is set, then the names will be returned in an order
+suitable for stat()'ing each one. That is, when you plan to stat()
+all files in the given directory, then the returned order will likely
+be fastest.
+
+If both this flag and C<IO::AIO::READDIR_DIRS_FIRST> are specified, then
+the likely dirs come first, resulting in a less optimal stat order.
+
+=item IO::AIO::READDIR_FOUND_UNKNOWN
+
+This flag should not be set when calling C<aio_readdirx>. Instead, it
+is being set by C<aio_readdirx>, when any of the C<$type>'s found were
+C<IO::AIO::DT_UNKNOWN>. The absense of this flag therefore indicates that all
+C<$type>'s are known, which can be used to speed up some algorithms.
+
+=back
 
 
 =item aio_load $path, $data, $callback->($status)
@@ -587,7 +659,7 @@
    aioreq_pri $pri;
    add $grp aio_open $src, O_RDONLY, 0, sub {
       if (my $src_fh = $_[0]) {
-         my @stat = stat $src_fh;
+         my @stat = stat $src_fh; # hmm, might bock over nfs?
 
          aioreq_pri $pri;
          add $grp aio_open $dst, O_CREAT | O_WRONLY | O_TRUNC, 0200, sub {
@@ -598,13 +670,26 @@
                      $grp->result (0);
                      close $src_fh;
 
-                     # those should not normally block. should. should.
-                     utime $stat[8], $stat[9], $dst;
-                     chmod $stat[2] & 07777, $dst_fh;
-                     chown $stat[4], $stat[5], $dst_fh;
+                     my $ch = sub {
+                        aioreq_pri $pri;
+                        add $grp aio_chmod $dst_fh, $stat[2] & 07777, sub {
+                           aioreq_pri $pri;
+                           add $grp aio_chown $dst_fh, $stat[4], $stat[5], sub {
+                              aioreq_pri $pri;
+                              add $grp aio_close $dst_fh;
+                           }
+                        };
+                     };
 
                      aioreq_pri $pri;
-                     add $grp aio_close $dst_fh;
+                     add $grp aio_utime $dst_fh, $stat[8], $stat[9], sub {
+                        if ($_[0] < 0 && $! == ENOSYS) {
+                           aioreq_pri $pri;
+                           add $grp aio_utime $dst, $stat[8], $stat[9], $ch;
+                        } else {
+                           $ch->();
+                        }
+                     };
                   } else {
                      $grp->result (-1);
                      close $src_fh;
@@ -633,9 +718,9 @@
 destination) from C<$srcpath> to C<$dstpath> and call the callback with
 the C<0> (error) or C<-1> ok.
 
-This is a composite request that tries to rename(2) the file first. If
-rename files with C<EXDEV>, it copies the file with C<aio_copy> and, if
-that is successful, unlinking the C<$srcpath>.
+This is a composite request that tries to rename(2) the file first; if
+rename fails with C<EXDEV>, it copies the file with C<aio_copy> and, if
+that is successful, unlinks the C<$srcpath>.
 
 =cut
 
@@ -692,20 +777,24 @@
 
 The C<aio_readdir> cannot be avoided, but C<stat()>'ing every entry can.
 
-After reading the directory, the modification time, size etc. of the
-directory before and after the readdir is checked, and if they match (and
-isn't the current time), the link count will be used to decide how many
-entries are directories (if >= 2). Otherwise, no knowledge of the number
-of subdirectories will be assumed.
-
-Then entries will be sorted into likely directories (everything without
-a non-initial dot currently) and likely non-directories (everything
-else). Then every entry plus an appended C</.> will be C<stat>'ed,
-likely directories first. If that succeeds, it assumes that the entry
-is a directory or a symlink to directory (which will be checked
+If readdir returns file type information, then this is used directly to
+find directories.
+
+Otherwise, after reading the directory, the modification time, size etc.
+of the directory before and after the readdir is checked, and if they
+match (and isn't the current time), the link count will be used to decide
+how many entries are directories (if >= 2). Otherwise, no knowledge of the
+number of subdirectories will be assumed.
+
+Then entries will be sorted into likely directories a non-initial dot
+currently) and likely non-directories (see C<aio_readdirx>). Then every
+entry plus an appended C</.> will be C<stat>'ed, likely directories first,
+in order of their inode numbers. If that succeeds, it assumes that the
+entry is a directory or a symlink to directory (which will be checked
 seperately). This is often faster than stat'ing the entry itself because
 filesystems might detect the type of the entry without reading the inode
-data (e.g. ext2fs filetype feature).
+data (e.g. ext2fs filetype feature), even on systems that cannot return
+the filetype information on readdir.
 
 If the known number of directories (link count - 2) has been reached, the
 rest of the entries is assumed to be non-directories.
@@ -737,7 +826,7 @@
 
       # read the directory entries
       aioreq_pri $pri;
-      add $grp aio_readdir $path, sub {
+      add $grp aio_readdirx $path, READDIR_DIRS_FIRST, sub {
          my $entries = shift
             or return $grp->result ();
 
@@ -753,18 +842,11 @@
                $ndirs = -1;
             } else {
                # if nlink == 2, we are finished
-               # on non-posix-fs's, we rely on nlink < 2
+               # for non-posix-fs's, we rely on nlink < 2
                $ndirs = (stat _)[3] - 2
                   or return $grp->result ([], $entries);
             }
 
-            # sort into likely dirs and likely nondirs
-            # dirs == files without ".", short entries first
-            $entries = [map $_->[0],
-                           sort { $b->[1] cmp $a->[1] }
-                              map [$_, sprintf "%s%04d", (/.\./ ? "1" : "0"), length],
-                                 @$entries];
-
             my (@dirs, @nondirs);
 
             my $statgrp = add $grp aio_group sub {
@@ -774,7 +856,7 @@
             limit $statgrp $maxreq;
             feed $statgrp sub {
                return unless @$entries;
-               my $entry = pop @$entries;
+               my $entry = shift @$entries;
 
                aioreq_pri $pri;
                add $statgrp aio_stat "$path/$entry/.", sub {
@@ -857,6 +939,18 @@
 If this call isn't available because your OS lacks it or it couldn't be
 detected, it will be emulated by calling C<fsync> instead.
 
+=item aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status)
+
+Sync the data portion of the file specified by C<$offset> and C<$length>
+to disk (but NOT the metadata), by calling the Linux-specific
+sync_file_range call. If sync_file_range is not available or it returns
+ENOSYS, then fdatasync or fsync is being substituted.
+
+C<$flags> can be a combination of C<IO::AIO::SYNC_FILE_RANGE_WAIT_BEFORE>,
+C<IO::AIO::SYNC_FILE_RANGE_WRITE> and
+C<IO::AIO::SYNC_FILE_RANGE_WAIT_AFTER>: refer to the sync_file_range
+manpage for details.
+
 =item aio_pathsync $path, $callback->($status)
 
 This request tries to open, fsync and close the given path. This is a
@@ -953,8 +1047,9 @@
 Cancels the request, if possible. Has the effect of skipping execution
 when entering the B<execute> state and skipping calling the callback when
 entering the the B<result> state, but will leave the request otherwise
-untouched. That means that requests that currently execute will not be
-stopped and resources held by the request will not be freed prematurely.
+untouched (with the exception of readdir). That means that requests that
+currently execute will not be stopped and resources held by the request
+will not be freed prematurely.
 
 =item cb $req $callback->(...)
 
@@ -1062,9 +1157,9 @@
 Sets a feeder/generator on this group: every group can have an attached
 generator that generates requests if idle. The idea behind this is that,
 although you could just queue as many requests as you want in a group,
-this might starve other requests for a potentially long time.  For
-example, C<aio_scandir> might generate hundreds of thousands C<aio_stat>
-requests, delaying any later requests for a long time.
+this might starve other requests for a potentially long time. For example,
+C<aio_scandir> might generate hundreds of thousands C<aio_stat> requests,
+delaying any later requests for a long time.
 
 To avoid this, and allow incremental generation of requests, you can
 instead a group and set a feeder on it that generates those requests. The
@@ -1078,7 +1173,8 @@
 If the feed does not queue more requests when called, it will be
 automatically removed from the group.
 
-If the feed limit is C<0>, it will be set to C<2> automatically.
+If the feed limit is C<0> when this method is called, it will be set to
+C<2> automatically.
 
 Example:
 
@@ -1100,6 +1196,9 @@
 
 Setting the limit to C<0> will pause the feeding process.
 
+The default value for the limit is C<0>, but note that setting a feeder
+automatically bumps it up to C<2>.
+
 =back
 
 =head2 SUPPORT FUNCTIONS