--- IO-AIO/AIO.pm 2017/06/23 21:43:51 1.271 +++ IO-AIO/AIO.pm 2018/03/23 01:14:08 1.284 @@ -173,7 +173,7 @@ use base 'Exporter'; BEGIN { - our $VERSION = 4.35; + our $VERSION = 4.4; our @AIO_REQ = qw(aio_sendfile aio_seek aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_readdirx @@ -185,6 +185,7 @@ aio_chmod aio_utime aio_truncate aio_msync aio_mtouch aio_mlock aio_mlockall aio_statvfs + aio_slurp aio_wd); our @EXPORT = (@AIO_REQ, qw(aioreq_pri aioreq_nice)); @@ -279,6 +280,8 @@ IO::AIO::nreqs IO::AIO::nready IO::AIO::npending + $nfd = IO::AIO::get_fdlimit [EXPERIMENTAL] + IO::AIO::min_fdlimit $nfd [EXPERIMENTAL] IO::AIO::sendfile $ofh, $ifh, $offset, $count IO::AIO::fadvise $fh, $offset, $len, $advice @@ -611,91 +614,6 @@ fsid => 1810 } -Here is a (likely partial - send me updates!) list of fsid values used by -Linux - it is safe to hardcode these when C<$^O> is C: - - 0x0000adf5 adfs - 0x0000adff affs - 0x5346414f afs - 0x09041934 anon-inode filesystem - 0x00000187 autofs - 0x42465331 befs - 0x1badface bfs - 0x42494e4d binfmt_misc - 0x9123683e btrfs - 0x0027e0eb cgroupfs - 0xff534d42 cifs - 0x73757245 coda - 0x012ff7b7 coh - 0x28cd3d45 cramfs - 0x453dcd28 cramfs-wend (wrong endianness) - 0x64626720 debugfs - 0x00001373 devfs - 0x00001cd1 devpts - 0x0000f15f ecryptfs - 0x00414a53 efs - 0x0000137d ext - 0x0000ef53 ext2/ext3/ext4 - 0x0000ef51 ext2 - 0xf2f52010 f2fs - 0x00004006 fat - 0x65735546 fuseblk - 0x65735543 fusectl - 0x0bad1dea futexfs - 0x01161970 gfs2 - 0x47504653 gpfs - 0x00004244 hfs - 0xf995e849 hpfs - 0x00c0ffee hostfs - 0x958458f6 hugetlbfs - 0x2bad1dea inotifyfs - 0x00009660 isofs - 0x000072b6 jffs2 - 0x3153464a jfs - 0x6b414653 k-afs - 0x0bd00bd0 lustre - 0x0000137f minix - 0x0000138f minix 30 char names - 0x00002468 minix v2 - 0x00002478 minix v2 30 char names - 0x00004d5a minix v3 - 0x19800202 mqueue - 0x00004d44 msdos - 0x0000564c novell - 0x00006969 nfs - 0x6e667364 nfsd - 0x00003434 nilfs - 0x5346544e ntfs - 0x00009fa1 openprom - 0x7461636F ocfs2 - 0x00009fa0 proc - 0x6165676c pstorefs - 0x0000002f qnx4 - 0x68191122 qnx6 - 0x858458f6 ramfs - 0x52654973 reiserfs - 0x00007275 romfs - 0x67596969 rpc_pipefs - 0x73636673 securityfs - 0xf97cff8c selinux - 0x0000517b smb - 0x534f434b sockfs - 0x73717368 squashfs - 0x62656572 sysfs - 0x012ff7b6 sysv2 - 0x012ff7b5 sysv4 - 0x01021994 tmpfs - 0x15013346 udf - 0x00011954 ufs - 0x54190100 ufs byteswapped - 0x00009fa2 usbdevfs - 0x01021997 v9fs - 0xa501fcf5 vxfs - 0xabba1974 xenfs - 0x012ff7b4 xenix - 0x58465342 xfs - 0x012fd16d xia - =item aio_utime $fh_or_path, $atime, $mtime, $callback->($status) Works like perl's C function (including the special case of $atime @@ -742,11 +660,13 @@ to deallocate a file range. IO::AIO also supports C, to remove a range -(without leaving a hole) and C, to zero a range (see -your L manpage). +(without leaving a hole), C, to zero a range, +C to insert a range and C +to unshare shared blocks (see your L manpage). The file system block size used by C is presumably the -C returned by C. +C returned by C, but different filesystems and filetypes +can dictate other limitations. If C isn't available or cannot be emulated (currently no emulation will be attempted), passes C<-1> and sets C<$!> to C. @@ -871,10 +791,10 @@ =item IO::AIO::READDIR_DENTS -When this flag is off, then the callback gets an arrayref consisting of -names only (as with C), otherwise it gets an arrayref with -C<[$name, $type, $inode]> arrayrefs, each describing a single directory -entry in more detail. +Normally the callback gets an arrayref consisting of names only (as +with C). If this flag is set, then the callback gets an +arrayref with C<[$name, $type, $inode]> arrayrefs, each describing a +single directory entry in more detail: C<$name> is the name of the entry. @@ -884,9 +804,9 @@ C, C, C, C, C. -C means just that: readdir does not know. If you need to -know, you have to run stat yourself. Also, for speed reasons, the C<$type> -scalars are read-only: you can not modify them. +C means just that: readdir does not know. If you need +to know, you have to run stat yourself. Also, for speed/memory reasons, +the C<$type> scalars are read-only: you must not modify them. C<$inode> is the inode number (which might not be exact on systems with 64 bit inode numbers and 32 bit perls). This field has unspecified content on @@ -907,12 +827,14 @@ =item IO::AIO::READDIR_STAT_ORDER When this flag is set, then the names will be returned in an order -suitable for stat()'ing each one. That is, when you plan to stat() -all files in the given directory, then the returned order will likely -be fastest. - -If both this flag and C are specified, then -the likely dirs come first, resulting in a less optimal stat order. +suitable for stat()'ing each one. That is, when you plan to stat() most or +all files in the given directory, then the returned order will likely be +faster. + +If both this flag and C are specified, +then the likely dirs come first, resulting in a less optimal stat order +for stat'ing all entries, but likely a more optimal order for finding +subdirectories. =item IO::AIO::READDIR_FOUND_UNKNOWN @@ -924,11 +846,42 @@ =back +=item aio_slurp $pathname, $offset, $length, $data, $callback->($status) + +Opens, reads and closes the given file. The data is put into C<$data>, +which is resized as required. + +If C<$offset> is negative, then it is counted from the end of the file. + +If C<$length> is zero, then the remaining length of the file is +used. Also, in this case, the same limitations to modifying C<$data> apply +as when IO::AIO::mmap is used, i.e. it must only be modified in-place +with C. If the size of the file is known, specifying a non-zero +C<$length> results in a performance advantage. + +This request is similar to the older C request, but since it is +a single request, it might be more efficient to use. + +Example: load F into C<$passwd>. + + my $passwd; + aio_slurp "/etc/passwd", 0, 0, $passwd, sub { + $_[0] >= 0 + or die "/etc/passwd: $!\n"; + + printf "/etc/passwd is %d bytes long, and contains:\n", length $passwd; + print $passwd; + }; + IO::AIO::flush; + + =item aio_load $pathname, $data, $callback->($status) This is a composite request that tries to fully load the given file into memory. Status is the same as with aio_read. +Using C might be more efficient, as it is a single request. + =cut sub aio_load($$;$) { @@ -958,6 +911,8 @@ destination) from C<$srcpath> to C<$dstpath> and call the callback with a status of C<0> (ok) or C<-1> (error, see C<$!>). +Existing destination files will be truncated. + This is a composite request that creates the destination file with mode 0200 and copies the contents of the source file into it using C, followed by restoring atime, mtime, access mode and @@ -1076,7 +1031,7 @@ names, directories you can recurse into (directories), and ones you cannot recurse into (everything else, including symlinks to directories). -C is a composite request that creates of many sub requests_ +C is a composite request that generates many sub requests. C<$maxreq> specifies the maximum number of outstanding aio requests that this function generates. If it is C<< <= 0 >>, then a suitable default will be chosen (currently 4). @@ -1479,10 +1434,11 @@ C, C or C. -At the time of this writing (Linux 3.2), this requets is unreliable unless +At the time of this writing (Linux 3.2), this request is unreliable unless C<$count> is C, as the kernel has all sorts of bugs preventing -it to return all extents of a range for files with large number of -extents. The code works around all these issues if C<$count> is undef. +it to return all extents of a range for files with a large number of +extents. The code (only) works around all these issues if C<$count> is +C. =item aio_group $callback->(...) @@ -1605,8 +1561,8 @@ nowhere at all), while the directory fd, if available on the system, will still point to the original directory. Most functions accepting a pathname will use the directory fd on newer systems, and the string on -older systems. Some functions (such as realpath) will always rely on the -string form of the pathname. +older systems. Some functions (such as C) will always rely on +the string form of the pathname. So this functionality is mainly useful to get some protection against C, to easily get an absolute path out of a relative path for future @@ -2008,7 +1964,7 @@ use an C together with a feed callback. Its main use is in scripts without an event loop - when you want to stat -a lot of files, you can write somehting like this: +a lot of files, you can write something like this: IO::AIO::max_outstanding 32; @@ -2064,6 +2020,31 @@ =over 4 +=item $numfd = IO::AIO::get_fdlimit + +This function is I and subject to change. + +Tries to find the current file descriptor limit and returns it, or +C and sets C<$!> in case of an error. The limit is one larger than +the highest valid file descriptor number. + +=item IO::AIO::min_fdlimit [$numfd] + +This function is I and subject to change. + +Try to increase the current file descriptor limit(s) to at least C<$numfd> +by changing the soft or hard file descriptor resource limit. If C<$numfd> +is missing, it will try to set a very high limit, although this is not +recommended when you know the actual minimum that you require. + +If the limit cannot be raised enough, the function makes a best-effort +attempt to increase the limit as much as possible, using various +tricks, while still failing. You can query the resulting limit using +C. + +If an error occurs, returns C and sets C<$!>, otherwise returns +true. + =item IO::AIO::sendfile $ofh, $ifh, $offset, $count Calls the C function, which is like C, @@ -2089,8 +2070,8 @@ Simply calls the C function (see its manpage for details). The following advice constants are available: C, C, -C, C, C, -C. +C, C, +C. If C<$offset> is negative, counts from the end. If C<$length> is negative, the remaining length of the C<$scalar> is used. If possible, C<$length> @@ -2238,6 +2219,91 @@ time of this writing, C, C and C (Linux 3.4, for packet-based pipes) were supported. +Example: create a pipe race-free w.r.t. threads and fork: + + my ($rfh, $wfh) = IO::AIO::pipe2 IO::AIO::O_CLOEXEC + or die "pipe2: $!\n"; + +=item $fh = IO::AIO::eventfd [$initval, [$flags]] + +This is a direct interface to the Linux L system call. The +(unhelpful) defaults for C<$initval> and C<$flags> are C<0> for both. + +On success, the new eventfd filehandle is returned, otherwise returns +C. If the eventfd syscall is missing, fails with C. + +Please refer to L for more info on this call. + +The following symbol flag values are available: C, +C and C (Linux 2.6.30). + +Example: create a new eventfd filehandle: + + $fh = IO::AIO::eventfd 0, IO::AIO::O_CLOEXEC + or die "eventfd: $!\n"; + +=item $fh = IO::AIO::timerfd_create $clockid[, $flags] + +This is a direct interface to the Linux L system call. The +(unhelpful) default for C<$flags> is C<0>. + +On success, the new timerfd filehandle is returned, otherwise returns +C. If the eventfd syscall is missing, fails with C. + +Please refer to L for more info on this call. + +The following C<$clockid> values are +available: C, C +C (Linux 3.15) +C (Linux 3.11) and +C (Linux 3.11). + +The following C<$flags> values are available (Linux +2.6.27): C and C. + +Example: create a new timerfd and set it to one-second repeated alarms, +then wait for two alarms: + + my $fh = IO::AIO::timerfd_create IO::AIO::CLOCK_BOOTTIME, IO::AIO::TFD_CLOEXEC + or die "timerfd_create: $!\n"; + + defined IO::AIO::timerfd_settime $fh, 0, 1, 1 + or die "timerfd_settime: $!\n"; + + for (1..2) { + 8 == sysread $fh, my $buf, 8 + or die "timerfd read failure\n"; + + printf "number of expirations (likely 1): %d\n", + unpack "Q", $buf; + } + +=item ($cur_interval, $cur_value) = IO::AIO::timerfd_settime $fh, $flags, $new_interval, $nbw_value + +This is a direct interface to the Linux L system +call. Please refer to its manpage for more info on this call. + +The new itimerspec is specified using two (possibly fractional) second +values, C<$new_interval> and C<$new_value>). + +On success, the current interval and value are returned (as per +C). On failure, the empty list is returned. + +The following C<$flags> values are +available: C and +C. + +See C for a full example. + +=item ($cur_interval, $cur_value) = IO::AIO::timerfd_gettime $fh + +This is a direct interface to the Linux L system +call. Please refer to its manpage for more info on this call. + +On success, returns the current values of interval and value for the given +timerfd (as potentially fractional second values). On failure, the empty +list is returned. + =back =cut @@ -2312,6 +2378,15 @@ =back +=head2 LINUX-SPECIFIC CALLS + +When a call is documented as "linux-specific" then this means it +originated on GNU/Linux. C will usually try to autodetect the +availability and compatibility of such calls regardless of the platform +it is compiled on, so platforms such as FreeBSD which often implement +these calls will work. When in doubt, call them and see if they fail wth +C. + =head2 MEMORY USAGE Per-request usage: @@ -2333,7 +2408,18 @@ =head1 KNOWN BUGS -Known bugs will be fixed in the next release. +Known bugs will be fixed in the next release :) + +=head1 KNOWN ISSUES + +Calls that try to "import" foreign memory areas (such as C +or C) do not work with generic lvalues, such as +non-created hash slots or other scalars I didn't think of. It's best to +avoid such and either use scalar variables or making sure that the scalar +exists (e.g. by storing C) and isn't "funny" (e.g. tied). + +I am not sure anything can be done about this, so this is considered a +known issue, rather than a bug. =head1 SEE ALSO