--- libev/ev.pod	2011/01/17 12:11:12	1.360
+++ libev/ev.pod	2019/06/26 00:01:46	1.455
@@ -1,3 +1,5 @@
+=encoding utf-8
+
 =head1 NAME
 
 libev - a high performance full-featured event loop written in C
@@ -60,7 +62,7 @@
      // now wait for events to arrive
      ev_run (loop, 0);
 
-     // unloop was called, so exit
+     // break was called, so exit
      return 0;
    }
 
@@ -84,9 +86,9 @@
 
 This manual tries to be very detailed, but unfortunately, this also makes
 it very long. If you just want to know the basics of libev, I suggest
-reading L<ANATOMY OF A WATCHER>, then the L<EXAMPLE PROGRAM> above and
-look up the missing functions in L<GLOBAL FUNCTIONS> and the C<ev_io> and
-C<ev_timer> sections in L<WATCHER TYPES>.
+reading L</ANATOMY OF A WATCHER>, then the L</EXAMPLE PROGRAM> above and
+look up the missing functions in L</GLOBAL FUNCTIONS> and the C<ev_io> and
+C<ev_timer> sections in L</WATCHER TYPES>.
 
 =head1 ABOUT LIBEV
 
@@ -105,10 +107,10 @@
 
 =head2 FEATURES
 
-Libev supports C<select>, C<poll>, the Linux-specific C<epoll>, the
-BSD-specific C<kqueue> and the Solaris-specific event port mechanisms
-for file descriptor events (C<ev_io>), the Linux C<inotify> interface
-(for C<ev_stat>), Linux eventfd/signalfd (for faster and cleaner
+Libev supports C<select>, C<poll>, the Linux-specific aio and C<epoll>
+interfaces, the BSD-specific C<kqueue> and the Solaris-specific event port
+mechanisms for file descriptor events (C<ev_io>), the Linux C<inotify>
+interface (for C<ev_stat>), Linux eventfd/signalfd (for faster and cleaner
 inter-thread wakeup (C<ev_async>)/signal handling (C<ev_signal>)) relative
 timers (C<ev_timer>), absolute timers with customised rescheduling
 (C<ev_periodic>), synchronous signals (C<ev_signal>), process status
@@ -159,9 +161,13 @@
 so C<NDEBUG> will disable this checking): these are programming errors in
 the libev caller and need to be fixed there.
 
-Libev also has a few internal error-checking C<assert>ions, and also has
-extensive consistency checking code. These do not trigger under normal
-circumstances, as they indicate either a bug in libev or worse.
+Via the C<EV_FREQUENT> macro you can compile in and/or enable extensive
+consistency checking code inside libev that can be used to check for
+internal inconsistencies, suually caused by application bugs.
+
+Libev also has a few internal error-checking C<assert>ions. These do not
+trigger under normal circumstances, as they indicate either a bug in libev
+or worse.
 
 
 =head1 GLOBAL FUNCTIONS
@@ -176,13 +182,19 @@
 Returns the current time as libev would use it. Please note that the
 C<ev_now> function is usually faster and also often returns the timestamp
 you actually want to know. Also interesting is the combination of
-C<ev_update_now> and C<ev_now>.
+C<ev_now_update> and C<ev_now>.
 
 =item ev_sleep (ev_tstamp interval)
 
-Sleep for the given interval: The current thread will be blocked until
-either it is interrupted or the given time interval has passed. Basically
-this is a sub-second-resolution C<sleep ()>.
+Sleep for the given interval: The current thread will be blocked
+until either it is interrupted or the given time interval has
+passed (approximately - it might return a bit earlier even if not
+interrupted). Returns immediately if C<< interval <= 0 >>.
+
+Basically this is a sub-second-resolution C<sleep ()>.
+
+The range of the C<interval> is limited - libev only guarantees to work
+with sleep times of up to one day (C<< interval <= 86400 >>).
 
 =item int ev_version_major ()
 
@@ -243,7 +255,7 @@
 
 See the description of C<ev_embed> watchers for more info.
 
-=item ev_set_allocator (void *(*cb)(void *ptr, long size))
+=item ev_set_allocator (void *(*cb)(void *ptr, long size) throw ())
 
 Sets the allocation function to use (the prototype is similar - the
 semantics are identical to the C<realloc> C89/SuS/POSIX function). It is
@@ -259,12 +271,32 @@
 free some memory if it cannot allocate memory, to use a special allocator,
 or even to sleep a while and retry until some memory is available.
 
+Example: The following is the C<realloc> function that libev itself uses
+which should work with C<realloc> and C<free> functions of all kinds and
+is probably a good basis for your own implementation.
+
+   static void *
+   ev_realloc_emul (void *ptr, long size) EV_NOEXCEPT
+   {
+     if (size)
+       return realloc (ptr, size);
+
+     free (ptr);
+     return 0;
+   }
+
 Example: Replace the libev allocator with one that waits a bit and then
-retries (example requires a standards-compliant C<realloc>).
+retries.
 
    static void *
    persistent_realloc (void *ptr, size_t size)
    {
+     if (!size)
+       {
+         free (ptr);
+         return 0;
+       }
+
      for (;;)
        {
          void *newptr = realloc (ptr, size);
@@ -279,7 +311,7 @@
    ...
    ev_set_allocator (persistent_realloc);
 
-=item ev_set_syserr_cb (void (*cb)(const char *msg))
+=item ev_set_syserr_cb (void (*cb)(const char *msg) throw ())
 
 Set the callback function to call on a retryable system call error (such
 as failed select, poll, epoll_wait). The message is a printable string
@@ -392,8 +424,10 @@
 or setgid) then libev will I<not> look at the environment variable
 C<LIBEV_FLAGS>. Otherwise (the default), this environment variable will
 override the flags completely if it is found in the environment. This is
-useful to try out specific backends to test their performance, or to work
-around bugs.
+useful to try out specific backends to test their performance, to work
+around bugs, or to make libev threadsafe (accessing environment variables
+cannot be done in a threadsafe way, but usually it works if no other
+thread modifies them).
 
 =item C<EVFLAG_FORKCHECK>
 
@@ -403,13 +437,14 @@
 This works by calling C<getpid ()> on every iteration of the loop,
 and thus this might slow down your event loop if you do a lot of loop
 iterations and little real work, but is usually not noticeable (on my
-GNU/Linux system for example, C<getpid> is actually a simple 5-insn sequence
-without a system call and thus I<very> fast, but my GNU/Linux system also has
-C<pthread_atfork> which is even faster).
+GNU/Linux system for example, C<getpid> is actually a simple 5-insn
+sequence without a system call and thus I<very> fast, but my GNU/Linux
+system also has C<pthread_atfork> which is even faster). (Update: glibc
+versions 2.25 apparently removed the C<getpid> optimisation again).
 
 The big advantage of this flag is that you can forget about fork (and
-forget about forgetting to tell libev about forking) when you use this
-flag.
+forget about forgetting to tell libev about forking, although you still
+have to ignore C<SIGPIPE>) when you use this flag.
 
 This flag setting cannot be overridden or specified in the C<LIBEV_FLAGS>
 environment variable.
@@ -437,7 +472,7 @@
 =item C<EVFLAG_NOSIGMASK>
 
 When this flag is specified, then libev will avoid to modify the signal
-mask. Specifically, this means you ahve to make sure signals are unblocked
+mask. Specifically, this means you have to make sure signals are unblocked
 when you want to receive them.
 
 This behaviour is useful when you want to do your own signal handling, or
@@ -482,13 +517,13 @@
 
 =item C<EVBACKEND_EPOLL>   (value 4, Linux)
 
-Use the linux-specific epoll(7) interface (for both pre- and post-2.6.9
+Use the Linux-specific epoll(7) interface (for both pre- and post-2.6.9
 kernels).
 
-For few fds, this backend is a bit little slower than poll and select,
-but it scales phenomenally better. While poll and select usually scale
-like O(total_fds) where n is the total number of fds (or the highest fd),
-epoll scales either O(1) or O(active_fds).
+For few fds, this backend is a bit little slower than poll and select, but
+it scales phenomenally better. While poll and select usually scale like
+O(total_fds) where total_fds is the total number of fds (or the highest
+fd), epoll scales either O(1) or O(active_fds).
 
 The epoll mechanism deserves honorable mention as the most misdesigned
 of the more advanced event mechanisms: mere annoyances include silently
@@ -501,19 +536,22 @@
 set, which can take considerable time (one syscall per file descriptor)
 and is of course hard to detect.
 
-Epoll is also notoriously buggy - embedding epoll fds I<should> work, but
-of course I<doesn't>, and epoll just loves to report events for totally
-I<different> file descriptors (even already closed ones, so one cannot
-even remove them from the set) than registered in the set (especially
-on SMP systems). Libev tries to counter these spurious notifications by
-employing an additional generation counter and comparing that against the
-events to filter out spurious ones, recreating the set when required. Last
+Epoll is also notoriously buggy - embedding epoll fds I<should> work,
+but of course I<doesn't>, and epoll just loves to report events for
+totally I<different> file descriptors (even already closed ones, so
+one cannot even remove them from the set) than registered in the set
+(especially on SMP systems). Libev tries to counter these spurious
+notifications by employing an additional generation counter and comparing
+that against the events to filter out spurious ones, recreating the set
+when required. Epoll also erroneously rounds down timeouts, but gives you
+no way to know when and by how much, so sometimes you have to busy-wait
+because epoll returns immediately despite a nonzero timeout. And last
 not least, it also refuses to work with some file descriptors which work
 perfectly fine with C<select> (files, many character devices...).
 
-Epoll is truly the train wreck analog among event poll mechanisms,
-a frankenpoll, cobbled together in a hurry, no thought to design or
-interaction with others.
+Epoll is truly the train wreck among event poll mechanisms, a frankenpoll,
+cobbled together in a hurry, no thought to design or interaction with
+others. Oh, the pain, will it ever stop...
 
 While stopping, setting and starting an I/O watcher in the same iteration
 will result in some caching, there is still a system call per such
@@ -535,22 +573,66 @@
 the usage. So sad.
 
 While nominally embeddable in other event loops, this feature is broken in
-all kernel versions tested so far.
+a lot of kernel revisions, but probably(!) works in current versions.
+
+This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
+C<EVBACKEND_POLL>.
+
+=item C<EVBACKEND_LINUXAIO>   (value 64, Linux)
+
+Use the Linux-specific Linux AIO (I<not> C<< aio(7) >> but C<<
+io_submit(2) >>) event interface available in post-4.18 kernels (but libev
+only tries to use it in 4.19+).
+
+This is another Linux train wreck of an event interface.
+
+If this backend works for you (as of this writing, it was very
+experimental), it is the best event interface available on Linux and might
+be well worth enabling it - if it isn't available in your kernel this will
+be detected and this backend will be skipped.
+
+This backend can batch oneshot requests and supports a user-space ring
+buffer to receive events. It also doesn't suffer from most of the design
+problems of epoll (such as not being able to remove event sources from
+the epoll set), and generally sounds too good to be true. Because, this
+being the Linux kernel, of course it suffers from a whole new set of
+limitations, forcing you to fall back to epoll, inheriting all its design
+issues.
+
+For one, it is not easily embeddable (but probably could be done using
+an event fd at some extra overhead). It also is subject to a system wide
+limit that can be configured in F</proc/sys/fs/aio-max-nr>. If no AIO
+requests are left, this backend will be skipped during initialisation, and
+will switch to epoll when the loop is active.
+
+Most problematic in practice, however, is that not all file descriptors
+work with it. For example, in Linux 5.1, TCP sockets, pipes, event fds,
+files, F</dev/null> and many others are supported, but ttys do not work
+properly (a known bug that the kernel developers don't care about, see
+L<https://lore.kernel.org/patchwork/patch/1047453/>), so this is not
+(yet?) a generic event polling interface.
+
+Overall, it seems the Linux developers just don't want it to have a
+generic event handling mechanism other than C<select> or C<poll>.
+
+To work around all these problem, the current version of libev uses its
+epoll backend as a fallback for file descriptor types that do not work. Or
+falls back completely to epoll if the kernel acts up.
 
 This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
 C<EVBACKEND_POLL>.
 
 =item C<EVBACKEND_KQUEUE>  (value 8, most BSD clones)
 
-Kqueue deserves special mention, as at the time of this writing, it
-was broken on all BSDs except NetBSD (usually it doesn't work reliably
-with anything but sockets and pipes, except on Darwin, where of course
-it's completely useless). Unlike epoll, however, whose brokenness
-is by design, these kqueue bugs can (and eventually will) be fixed
-without API changes to existing programs. For this reason it's not being
-"auto-detected" unless you explicitly specify it in the flags (i.e. using
-C<EVBACKEND_KQUEUE>) or libev was compiled on a known-to-be-good (-enough)
-system like NetBSD.
+Kqueue deserves special mention, as at the time this backend was
+implemented, it was broken on all BSDs except NetBSD (usually it doesn't
+work reliably with anything but sockets and pipes, except on Darwin,
+where of course it's completely useless). Unlike epoll, however, whose
+brokenness is by design, these kqueue bugs can be (and mostly have been)
+fixed without API changes to existing programs. For this reason it's not
+being "auto-detected" on all platforms unless you explicitly specify it
+in the flags (i.e. using C<EVBACKEND_KQUEUE>) or libev was compiled on a
+known-to-be-good (-enough) system like NetBSD.
 
 You still can embed kqueue into a normal poll or select backend and use it
 only for sockets (after having made sure that sockets work with kqueue on
@@ -560,9 +642,9 @@
 kernel is more efficient (which says nothing about its actual speed, of
 course). While stopping, setting and starting an I/O watcher does never
 cause an extra system call as with C<EVBACKEND_EPOLL>, it still adds up to
-two event changes per incident. Support for C<fork ()> is very bad (but
-sane, unlike epoll) and it drops fds silently in similarly hard-to-detect
-cases
+two event changes per incident. Support for C<fork ()> is very bad (you
+might have to leak fds on fork, but it's more sane than epoll) and it
+drops fds silently in similarly hard-to-detect cases.
 
 This backend usually performs well under most conditions.
 
@@ -601,11 +683,11 @@
 
 On the negative side, the interface is I<bizarre> - so bizarre that
 even sun itself gets it wrong in their code examples: The event polling
-function sometimes returning events to the caller even though an error
+function sometimes returns events to the caller even though an error
 occurred, but with no indication whether it has done so or not (yes, it's
-even documented that way) - deadly for edge-triggered interfaces where
-you absolutely have to know whether an event occurred or not because you
-have to re-arm the watcher.
+even documented that way) - deadly for edge-triggered interfaces where you
+absolutely have to know whether an event occurred or not because you have
+to re-arm the watcher.
 
 Fortunately libev seems to be able to work around these idiocies.
 
@@ -646,6 +728,12 @@
 
    struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_KQUEUE);
 
+Example: Similarly, on linux, you mgiht want to take advantage of the
+linux aio backend if possible, but fall back to something else if that
+isn't available.
+
+   struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_LINUXAIO);
+
 =item ev_loop_destroy (loop)
 
 Destroys an event loop object (frees all memory and kernel state
@@ -671,13 +759,17 @@
 
 =item ev_loop_fork (loop)
 
-This function sets a flag that causes subsequent C<ev_run> iterations to
-reinitialise the kernel state for backends that have one. Despite the
-name, you can call it anytime, but it makes most sense after forking, in
-the child process. You I<must> call it (or use C<EVFLAG_FORKCHECK>) in the
-child before resuming or calling C<ev_run>.
+This function sets a flag that causes subsequent C<ev_run> iterations
+to reinitialise the kernel state for backends that have one. Despite
+the name, you can call it anytime you are allowed to start or stop
+watchers (except inside an C<ev_prepare> callback), but it makes most
+sense after forking, in the child process. You I<must> call it (or use
+C<EVFLAG_FORKCHECK>) in the child before resuming or calling C<ev_run>.
+
+In addition, if you want to reuse a loop (via this function or
+C<EVFLAG_FORKCHECK>), you I<also> have to ignore C<SIGPIPE>.
 
-Again, you I<have> to call it on I<any> loop that you want to re-use after 
+Again, you I<have> to call it on I<any> loop that you want to re-use after
 a fork, I<even if you do not plan to use the loop in the parent>. This is
 because some kernel interfaces *cough* I<kqueue> *cough* do funny things
 during fork.
@@ -757,7 +849,7 @@
 very long time without entering the event loop, updating libev's idea of
 the current time is a good idea.
 
-See also L<The special problem of time updates> in the C<ev_timer> section.
+See also L</The special problem of time updates> in the C<ev_timer> section.
 
 =item ev_suspend (loop)
 
@@ -785,18 +877,22 @@
 Calling C<ev_suspend>/C<ev_resume> has the side effect of updating the
 event loop time (see C<ev_now_update>).
 
-=item ev_run (loop, int flags)
+=item bool ev_run (loop, int flags)
 
 Finally, this is it, the event handler. This function usually is called
 after you have initialised all your watchers and you want to start
 handling events. It will ask the operating system for any new events, call
-the watcher callbacks, an then repeat the whole process indefinitely: This
+the watcher callbacks, and then repeat the whole process indefinitely: This
 is why event loops are called I<loops>.
 
 If the flags argument is specified as C<0>, it will keep handling events
 until either no event watchers are active anymore or C<ev_break> was
 called.
 
+The return value is false if there are no more active watchers (which
+usually means "all jobs done" or "deadlock"), and true in all other cases
+(which usually means " you should call C<ev_run> again").
+
 Please note that an explicit C<ev_break> is usually better than
 relying on all watchers to be stopped when deciding when a program has
 finished (especially in interactive programs), but having a program
@@ -804,8 +900,8 @@
 of relying on its watchers stopping correctly, that is truly a thing of
 beauty.
 
-This function is also I<mostly> exception-safe - you can break out of
-a C<ev_run> call by calling C<longjmp> in a callback, throwing a C++
+This function is I<mostly> exception-safe - you can break out of a
+C<ev_run> call by calling C<longjmp> in a callback, throwing a C++
 exception and so on. This does not decrement the C<ev_depth> value, nor
 will it clear any outstanding C<EVBREAK_ONE> breaks.
 
@@ -827,7 +923,9 @@
 own C<ev_run>"). However, a pair of C<ev_prepare>/C<ev_check> watchers is
 usually a better approach for this kind of thing.
 
-Here are the gory details of what C<ev_run> does:
+Here are the gory details of what C<ev_run> does (this is for your
+understanding, not a guarantee that things will work exactly like this in
+future versions):
 
    - Increment loop depth.
    - Reset the ev_break status.
@@ -870,7 +968,7 @@
    ... queue jobs here, make sure they register event watchers as long
    ... as they still have work to do (even an idle watcher will do..)
    ev_run (my_loop, 0);
-   ... jobs done or somebody called unloop. yeah!
+   ... jobs done or somebody called break. yeah!
 
 =item ev_break (loop, how)
 
@@ -943,10 +1041,11 @@
 By setting a higher I<io collect interval> you allow libev to spend more
 time collecting I/O events, so you can handle more events per iteration,
 at the cost of increasing latency. Timeouts (both C<ev_periodic> and
-C<ev_timer>) will be not affected. Setting this to a non-null value will
+C<ev_timer>) will not be affected. Setting this to a non-null value will
 introduce an additional C<ev_sleep ()> call into most loop iterations. The
 sleep time ensures that libev will not poll for I/O events more often then
-once per this interval, on average.
+once per this interval, on average (as long as the host time resolution is
+good enough).
 
 Likewise, by setting a higher I<timeout collect interval> you allow libev
 to spend more time collecting timeouts, at the expense of increased
@@ -1002,7 +1101,7 @@
 If you want to reset the callback, use C<ev_invoke_pending> as new
 callback.
 
-=item ev_set_loop_release_cb (loop, void (*release)(EV_P), void (*acquire)(EV_P))
+=item ev_set_loop_release_cb (loop, void (*release)(EV_P) throw (), void (*acquire)(EV_P) throw ())
 
 Sometimes you want to share the same loop between multiple threads. This
 can be done relatively simply by putting mutex_lock/unlock calls around
@@ -1010,7 +1109,7 @@
 
 However, C<ev_run> can run an indefinite time, so it is not feasible
 to wait for it to return. One way around this is to wake up the event
-loop via C<ev_break> and C<av_async_send>, another way is to set these
+loop via C<ev_break> and C<ev_async_send>, another way is to set these
 I<release> and I<acquire> callbacks on the loop.
 
 When set, then C<release> will be called just before the thread is
@@ -1160,13 +1259,18 @@
 
 =item C<EV_CHECK>
 
-All C<ev_prepare> watchers are invoked just I<before> C<ev_run> starts
-to gather new events, and all C<ev_check> watchers are invoked just after
-C<ev_run> has gathered them, but before it invokes any callbacks for any
-received events. Callbacks of both watcher types can start and stop as
-many watchers as they want, and all of them will be taken into account
-(for example, a C<ev_prepare> watcher might start an idle watcher to keep
-C<ev_run> from blocking).
+All C<ev_prepare> watchers are invoked just I<before> C<ev_run> starts to
+gather new events, and all C<ev_check> watchers are queued (not invoked)
+just after C<ev_run> has gathered them, but before it queues any callbacks
+for any received events. That means C<ev_prepare> watchers are the last
+watchers invoked before the event loop sleeps or polls for new events, and
+C<ev_check> watchers will be invoked before any other watchers of the same
+or lower priority within an event loop iteration.
+
+Callbacks of both watcher types can start and stop as many watchers as
+they want, and all of them will be taken into account (for example, a
+C<ev_prepare> watcher might start an idle watcher to keep C<ev_run> from
+blocking).
 
 =item C<EV_EMBED>
 
@@ -1299,7 +1403,7 @@
 
 Returns the callback currently set on the watcher.
 
-=item ev_cb_set (ev_TYPE *watcher, callback)
+=item ev_set_cb (ev_TYPE *watcher, callback)
 
 Change the callback. You can change the callback at virtually any time
 (modulo threads).
@@ -1327,7 +1431,7 @@
 The default priority used by watchers when no priority has been set is
 always C<0>, which is supposed to not be too high and not be too low :).
 
-See L<WATCHER PRIORITY MODELS>, below, for a more thorough treatment of
+See L</WATCHER PRIORITY MODELS>, below, for a more thorough treatment of
 priorities.
 
 =item ev_invoke (loop, ev_TYPE *watcher, int revents)
@@ -1362,7 +1466,7 @@
 
 =back
 
-See also the L<ASSOCIATING CUSTOM DATA WITH A WATCHER> and L<BUILDING YOUR
+See also the L</ASSOCIATING CUSTOM DATA WITH A WATCHER> and L</BUILDING YOUR
 OWN COMPOSITE WATCHERS> idioms.
 
 =head2 WATCHER STATES
@@ -1374,14 +1478,16 @@
 
 =over 4
 
-=item initialiased
+=item initialised
 
-Before a watcher can be registered with the event looop it has to be
+Before a watcher can be registered with the event loop it has to be
 initialised. This can be done with a call to C<ev_TYPE_init>, or calls to
 C<ev_init> followed by the watcher-specific C<ev_TYPE_set> function.
 
-In this state it is simply some block of memory that is suitable for use
-in an event loop. It can be moved around, freed, reused etc. at will.
+In this state it is simply some block of memory that is suitable for
+use in an event loop. It can be moved around, freed, reused etc. at
+will - as long as you either keep the memory contents intact, or call
+C<ev_TYPE_init> again.
 
 =item started/running/active
 
@@ -1419,8 +1525,9 @@
 freeing it is often a good idea.
 
 While stopped (and not pending) the watcher is essentially in the
-initialised state, that is it can be reused, moved, modified in any way
-you wish.
+initialised state, that is, it can be reused, moved, modified in any way
+you wish (but when you trash the memory block, you need to C<ev_TYPE_init>
+it again).
 
 =back
 
@@ -1579,13 +1686,13 @@
 
 =head3 The special problem of disappearing file descriptors
 
-Some backends (e.g. kqueue, epoll) need to be told about closing a file
-descriptor (either due to calling C<close> explicitly or any other means,
-such as C<dup2>). The reason is that you register interest in some file
-descriptor, but when it goes away, the operating system will silently drop
-this interest. If another file descriptor with the same number then is
-registered with libev, there is no efficient way to see that this is, in
-fact, a different file descriptor.
+Some backends (e.g. kqueue, epoll, linuxaio) need to be told about closing
+a file descriptor (either due to calling C<close> explicitly or any other
+means, such as C<dup2>). The reason is that you register interest in some
+file descriptor, but when it goes away, the operating system will silently
+drop this interest. If another file descriptor with the same number then
+is registered with libev, there is no efficient way to see that this is,
+in fact, a different file descriptor.
 
 To avoid having to explicitly tell libev about such cases, libev follows
 the following policy:  Each time C<ev_io_set> is being called, libev
@@ -1644,9 +1751,10 @@
 
 =head3 The special problem of fork
 
-Some backends (epoll, kqueue) do not support C<fork ()> at all or exhibit
-useless behaviour. Libev fully supports fork, but needs to be told about
-it in the child if you want to continue to use it in the child.
+Some backends (epoll, kqueue, probably linuxaio) do not support C<fork ()>
+at all or exhibit useless behaviour. Libev fully supports fork, but needs
+to be told about it in the child if you want to continue to use it in the
+child.
 
 To support fork in your child processes, you have to call C<ev_loop_fork
 ()> after a fork in the child, enable C<EVFLAG_FORKCHECK>, or resort to
@@ -1758,10 +1866,11 @@
 
 The callback is guaranteed to be invoked only I<after> its timeout has
 passed (not I<at>, so on systems with very low-resolution clocks this
-might introduce a small delay). If multiple timers become ready during the
-same loop iteration then the ones with earlier time-out values are invoked
-before ones of the same priority with later time-out values (but this is
-no longer true when a callback calls C<ev_run> recursively).
+might introduce a small delay, see "the special problem of being too
+early", below). If multiple timers become ready during the same loop
+iteration then the ones with earlier time-out values are invoked before
+ones of the same priority with later time-out values (but this is no
+longer true when a callback calls C<ev_run> recursively).
 
 =head3 Be smart about timeouts
 
@@ -1846,63 +1955,77 @@
 but remember the time of last activity, and check for a real timeout only
 within the callback:
 
+   ev_tstamp timeout = 60.;
    ev_tstamp last_activity; // time of last activity
+   ev_timer timer;
 
    static void
    callback (EV_P_ ev_timer *w, int revents)
    {
-     ev_tstamp now     = ev_now (EV_A);
-     ev_tstamp timeout = last_activity + 60.;
+     // calculate when the timeout would happen
+     ev_tstamp after = last_activity - ev_now (EV_A) + timeout;
 
-     // if last_activity + 60. is older than now, we did time out
-     if (timeout < now)
+     // if negative, it means we the timeout already occurred
+     if (after < 0.)
        {
          // timeout occurred, take action
        }
      else
        {
-         // callback was invoked, but there was some activity, re-arm
-         // the watcher to fire in last_activity + 60, which is
-         // guaranteed to be in the future, so "again" is positive:
-         w->repeat = timeout - now;
-         ev_timer_again (EV_A_ w);
+         // callback was invoked, but there was some recent 
+         // activity. simply restart the timer to time out
+         // after "after" seconds, which is the earliest time
+         // the timeout can occur.
+         ev_timer_set (w, after, 0.);
+         ev_timer_start (EV_A_ w);
        }
    }
 
-To summarise the callback: first calculate the real timeout (defined
-as "60 seconds after the last activity"), then check if that time has
-been reached, which means something I<did>, in fact, time out. Otherwise
-the callback was invoked too early (C<timeout> is in the future), so
-re-schedule the timer to fire at that future time, to see if maybe we have
-a timeout then.
-
-Note how C<ev_timer_again> is used, taking advantage of the
-C<ev_timer_again> optimisation when the timer is already running.
+To summarise the callback: first calculate in how many seconds the
+timeout will occur (by calculating the absolute time when it would occur,
+C<last_activity + timeout>, and subtracting the current time, C<ev_now
+(EV_A)> from that).
+
+If this value is negative, then we are already past the timeout, i.e. we
+timed out, and need to do whatever is needed in this case.
+
+Otherwise, we now the earliest time at which the timeout would trigger,
+and simply start the timer with this timeout value.
+
+In other words, each time the callback is invoked it will check whether
+the timeout occurred. If not, it will simply reschedule itself to check
+again at the earliest time it could time out. Rinse. Repeat.
 
 This scheme causes more callback invocations (about one every 60 seconds
 minus half the average time between activity), but virtually no calls to
 libev to change the timeout.
 
-To start the timer, simply initialise the watcher and set C<last_activity>
-to the current time (meaning we just have some activity :), then call the
-callback, which will "do the right thing" and start the timer:
-
-   ev_init (timer, callback);
-   last_activity = ev_now (loop);
-   callback (loop, timer, EV_TIMER);
+To start the machinery, simply initialise the watcher and set
+C<last_activity> to the current time (meaning there was some activity just
+now), then call the callback, which will "do the right thing" and start
+the timer:
+
+   last_activity = ev_now (EV_A);
+   ev_init (&timer, callback);
+   callback (EV_A_ &timer, 0);
 
-And when there is some activity, simply store the current time in
+When there is some activity, simply store the current time in
 C<last_activity>, no libev calls at all:
 
-   last_activity = ev_now (loop);
+   if (activity detected)
+     last_activity = ev_now (EV_A);
+
+When your timeout value changes, then the timeout can be changed by simply
+providing a new value, stopping the timer and calling the callback, which
+will again do the right thing (for example, time out immediately :).
+
+   timeout = new_value;
+   ev_timer_stop (EV_A_ &timer);
+   callback (EV_A_ &timer, 0);
 
 This technique is slightly more complex, but in most cases where the
 time-out is unlikely to be triggered, much more efficient.
 
-Changing the timeout is trivial as well (if it isn't hard-coded in the
-callback :) - just change the timeout and invoke the callback, which will
-fix things for you.
-
 =item 4. Wee, just use a double-linked list for your timeouts.
 
 If there is not one request, but many thousands (millions...), all
@@ -1938,10 +2061,47 @@
 off after the first million or so of active timers, i.e. it's usually
 overkill :)
 
+=head3 The special problem of being too early
+
+If you ask a timer to call your callback after three seconds, then
+you expect it to be invoked after three seconds - but of course, this
+cannot be guaranteed to infinite precision. Less obviously, it cannot be
+guaranteed to any precision by libev - imagine somebody suspending the
+process with a STOP signal for a few hours for example.
+
+So, libev tries to invoke your callback as soon as possible I<after> the
+delay has occurred, but cannot guarantee this.
+
+A less obvious failure mode is calling your callback too early: many event
+loops compare timestamps with a "elapsed delay >= requested delay", but
+this can cause your callback to be invoked much earlier than you would
+expect.
+
+To see why, imagine a system with a clock that only offers full second
+resolution (think windows if you can't come up with a broken enough OS
+yourself). If you schedule a one-second timer at the time 500.9, then the
+event loop will schedule your timeout to elapse at a system time of 500
+(500.9 truncated to the resolution) + 1, or 501.
+
+If an event library looks at the timeout 0.1s later, it will see "501 >=
+501" and invoke the callback 0.1s after it was started, even though a
+one-second delay was requested - this is being "too early", despite best
+intentions.
+
+This is the reason why libev will never invoke the callback if the elapsed
+delay equals the requested delay, but only when the elapsed delay is
+larger than the requested delay. In the example above, libev would only invoke
+the callback at system time 502, or 1.1s after the timer was started.
+
+So, while libev cannot guarantee that your callback will be invoked
+exactly when requested, it I<can> and I<does> guarantee that the requested
+delay has actually elapsed, or in other words, it always errs on the "too
+late" side of things.
+
 =head3 The special problem of time updates
 
-Establishing the current time is a costly operation (it usually takes at
-least two system calls): EV therefore updates its idea of the current
+Establishing the current time is a costly operation (it usually takes
+at least one system call): EV therefore updates its idea of the current
 time only before and after C<ev_run> collects new events, which causes a
 growing difference between C<ev_now ()> and C<ev_time ()> when handling
 lots of events in one iteration.
@@ -1950,13 +2110,48 @@
 time. This is usually the right thing as this timestamp refers to the time
 of the event triggering whatever timeout you are modifying/starting. If
 you suspect event processing to be delayed and you I<need> to base the
-timeout on the current time, use something like this to adjust for this:
+timeout on the current time, use something like the following to adjust
+for it:
 
-   ev_timer_set (&timer, after + ev_now () - ev_time (), 0.);
+   ev_timer_set (&timer, after + (ev_time () - ev_now ()), 0.);
 
 If the event loop is suspended for a long time, you can also force an
 update of the time returned by C<ev_now ()> by calling C<ev_now_update
-()>.
+()>, although that will push the event time of all outstanding events
+further into the future.
+
+=head3 The special problem of unsynchronised clocks
+
+Modern systems have a variety of clocks - libev itself uses the normal
+"wall clock" clock and, if available, the monotonic clock (to avoid time
+jumps).
+
+Neither of these clocks is synchronised with each other or any other clock
+on the system, so C<ev_time ()> might return a considerably different time
+than C<gettimeofday ()> or C<time ()>. On a GNU/Linux system, for example,
+a call to C<gettimeofday> might return a second count that is one higher
+than a directly following call to C<time>.
+
+The moral of this is to only compare libev-related timestamps with
+C<ev_time ()> and C<ev_now ()>, at least if you want better precision than
+a second or so.
+
+One more problem arises due to this lack of synchronisation: if libev uses
+the system monotonic clock and you compare timestamps from C<ev_time>
+or C<ev_now> from when you started your timer and when your callback is
+invoked, you will find that sometimes the callback is a bit "early".
+
+This is because C<ev_timer>s work in real time, not wall clock time, so
+libev makes sure your callback is not invoked before the delay happened,
+I<measured according to the real time>, not the system clock.
+
+If your timeouts are based on a physical timescale (e.g. "time out this
+connection after 100 seconds") then this shouldn't bother you as it is
+exactly the right behaviour.
+
+If you want to compare wall clock/system timestamps to your timers, then
+you need to use C<ev_periodic>s, as these are based on the wall clock
+time, where your comparisons will always generate correct results.
 
 =head3 The special problems of suspended animation
 
@@ -1996,11 +2191,11 @@
 
 =item ev_timer_set (ev_timer *, ev_tstamp after, ev_tstamp repeat)
 
-Configure the timer to trigger after C<after> seconds. If C<repeat>
-is C<0.>, then it will automatically be stopped once the timeout is
-reached. If it is positive, then the timer will automatically be
-configured to trigger again C<repeat> seconds later, again, and again,
-until stopped manually.
+Configure the timer to trigger after C<after> seconds (fractional and
+negative values are supported). If C<repeat> is C<0.>, then it will
+automatically be stopped once the timeout is reached. If it is positive,
+then the timer will automatically be configured to trigger again C<repeat>
+seconds later, again, and again, until stopped manually.
 
 The timer itself will do a best-effort at avoiding drift, that is, if
 you configure a timer to trigger every 10 seconds, then it will normally
@@ -2010,17 +2205,26 @@
 
 =item ev_timer_again (loop, ev_timer *)
 
-This will act as if the timer timed out and restart it again if it is
-repeating. The exact semantics are:
+This will act as if the timer timed out, and restarts it again if it is
+repeating. It basically works like calling C<ev_timer_stop>, updating the
+timeout to the C<repeat> value and calling C<ev_timer_start>.
+
+The exact semantics are as in the following rules, all of which will be
+applied to the watcher:
+
+=over 4
+
+=item If the timer is pending, the pending status is always cleared.
 
-If the timer is pending, its pending status is cleared.
+=item If the timer is started but non-repeating, stop it (as if it timed
+out, without invoking it).
 
-If the timer is started but non-repeating, stop it (as if it timed out).
+=item If the timer is repeating, make the C<repeat> value the new timeout
+and start the timer, if necessary.
 
-If the timer is repeating, either start it if necessary (with the
-C<repeat> value), or reset the running timer to the C<repeat> value.
+=back
 
-This sounds a bit complicated, see L<Be smart about timeouts>, above, for a
+This sounds a bit complicated, see L</Be smart about timeouts>, above, for a
 usage example.
 
 =item ev_tstamp ev_timer_remaining (loop, ev_timer *)
@@ -2083,7 +2287,7 @@
 
 Unlike C<ev_timer>, periodic watchers are not based on real time (or
 relative time, the physical time that passes) but on wall clock time
-(absolute time, the thing you can read on your calender or clock). The
+(absolute time, the thing you can read on your calendar or clock). The
 difference is that wall clock time can run faster or slower than real
 time, and time jumps are not uncommon (e.g. when you adjust your
 wrist-watch).
@@ -2098,8 +2302,8 @@
 
 C<ev_periodic> watchers can also be used to implement vastly more complex
 timers, such as triggering an event on each "midnight, local time", or
-other complicated rules. This cannot be done with C<ev_timer> watchers, as
-those cannot react to time jumps.
+other complicated rules. This cannot easily be done with C<ev_timer>
+watchers, as those cannot react to time jumps.
 
 As with timers, the callback is guaranteed to be invoked only when the
 point in time where it is supposed to trigger has passed. If multiple
@@ -2150,9 +2354,12 @@
 C<ev_periodic> will try to run the callback in this mode at the next possible
 time where C<time = offset (mod interval)>, regardless of any time jumps.
 
-For numerical stability it is preferable that the C<offset> value is near
-C<ev_now ()> (the current time), but there is no range requirement for
-this value, and in fact is often specified as zero.
+The C<interval> I<MUST> be positive, and for numerical stability, the
+interval value should be higher than C<1/8192> (which is around 100
+microseconds) and C<offset> should be higher than C<0> and should have
+at most a similar magnitude as the current time (say, within a factor of
+ten). Typical values for offset are, in fact, C<0> or something between
+C<0> and C<interval>, which is also the recommended range.
 
 Note also that there is an upper limit to how often a timer can fire (CPU
 speed for example), so if C<interval> is very small then timing stability
@@ -2192,10 +2399,28 @@
 equal to the passed C<now> value >>.
 
 This can be used to create very complex timers, such as a timer that
-triggers on "next midnight, local time". To do this, you would calculate the
-next midnight after C<now> and return the timestamp value for this. How
-you do this is, again, up to you (but it is not trivial, which is the main
-reason I omitted it as an example).
+triggers on "next midnight, local time". To do this, you would calculate
+the next midnight after C<now> and return the timestamp value for
+this. Here is a (completely untested, no error checking) example on how to
+do this:
+
+   #include <time.h>
+
+   static ev_tstamp
+   my_rescheduler (ev_periodic *w, ev_tstamp now)
+   {
+     time_t tnow = (time_t)now;
+     struct tm tm;
+     localtime_r (&tnow, &tm);
+
+     tm.tm_sec = tm.tm_min = tm.tm_hour = 0; // midnight current day
+     ++tm.tm_mday; // midnight next day
+
+     return mktime (&tm);
+   }
+
+Note: this code might run into trouble on days that have more then two
+midnights (beginning and end).
 
 =back
 
@@ -2270,7 +2495,7 @@
    ev_periodic_init (&hourly_tick, clock_cb,
                      fmod (ev_now (loop), 3600.), 3600., 0);
    ev_periodic_start (loop, &hourly_tick);
-  
+
 
 =head2 C<ev_signal> - signal me when a signal gets signalled!
 
@@ -2290,9 +2515,9 @@
 C<SIGINT> in both the default loop and another loop at the same time. At
 the moment, C<SIGCHLD> is permanently tied to the default loop.
 
-When the first watcher gets started will libev actually register something
-with the kernel (thus it coexists with your own signal handlers as long as
-you don't register any with libev for the same signal).
+Only after the first watcher for a signal is started will libev actually
+register something with the kernel. It thus coexists with your own signal
+handlers as long as you don't register any with libev for the same signal.
 
 If possible and supported, libev will install its handlers with
 C<SA_RESTART> (or equivalent) behaviour enabled, so system calls should
@@ -2487,8 +2712,9 @@
 
 This watches a file system path for attribute changes. That is, it calls
 C<stat> on that path in regular intervals (or when the OS says it changed)
-and sees if it changed compared to the last time, invoking the callback if
-it did.
+and sees if it changed compared to the last time, invoking the callback
+if it did. Starting the watcher C<stat>'s the file, so only changes that
+happen after the watcher has been started will be reported.
 
 The path does not need to exist: changing from "path exists" to "path does
 not exist" is a status change like any other. The condition "path does not
@@ -2728,6 +2954,20 @@
 "pseudo-background processing", or delay processing stuff to after the
 event loop has handled all outstanding events.
 
+=head3 Abusing an C<ev_idle> watcher for its side-effect
+
+As long as there is at least one active idle watcher, libev will never
+sleep unnecessarily. Or in other words, it will loop as fast as possible.
+For this to work, the idle watcher doesn't need to be invoked at all - the
+lowest priority will do.
+
+This mode of operation can be useful together with an C<ev_check> watcher,
+to do something on each event loop iteration - for example to balance load
+between different connections.
+
+See L</Abusing an ev_check watcher for its side-effect> for a longer
+example.
+
 =head3 Watcher-Specific Functions and Data Members
 
 =over 4
@@ -2748,7 +2988,12 @@
    static void
    idle_cb (struct ev_loop *loop, ev_idle *w, int revents)
    {
+     // stop the watcher
+     ev_idle_stop (loop, w);
+
+     // now we can free it
      free (w);
+
      // now do something you wanted to do when the program has
      // no longer anything immediate to do.
    }
@@ -2760,17 +3005,17 @@
 
 =head2 C<ev_prepare> and C<ev_check> - customise your event loop!
 
-Prepare and check watchers are usually (but not always) used in pairs:
+Prepare and check watchers are often (but not always) used in pairs:
 prepare watchers get invoked before the process blocks and check watchers
 afterwards.
 
-You I<must not> call C<ev_run> or similar functions that enter
-the current event loop from either C<ev_prepare> or C<ev_check>
-watchers. Other loops than the current one are fine, however. The
-rationale behind this is that you do not need to check for recursion in
-those watchers, i.e. the sequence will always be C<ev_prepare>, blocking,
-C<ev_check> so if you have one watcher of each kind they will always be
-called in pairs bracketing the blocking call.
+You I<must not> call C<ev_run> (or similar functions that enter the
+current event loop) or C<ev_loop_fork> from either C<ev_prepare> or
+C<ev_check> watchers. Other loops than the current one are fine,
+however. The rationale behind this is that you do not need to check
+for recursion in those watchers, i.e. the sequence will always be
+C<ev_prepare>, blocking, C<ev_check> so if you have one watcher of each
+kind they will always be called in pairs bracketing the blocking call.
 
 Their main purpose is to integrate other event mechanisms into libev and
 their use is somewhat advanced. They could be used, for example, to track
@@ -2798,9 +3043,10 @@
 loop from blocking if lower-priority coroutines are active, thus mapping
 low-priority coroutines to idle/background tasks).
 
-It is recommended to give C<ev_check> watchers highest (C<EV_MAXPRI>)
-priority, to ensure that they are being run before any other watchers
-after the poll (this doesn't matter for C<ev_prepare> watchers).
+When used for this purpose, it is recommended to give C<ev_check> watchers
+highest (C<EV_MAXPRI>) priority, to ensure that they are being run before
+any other watchers after the poll (this doesn't matter for C<ev_prepare>
+watchers).
 
 Also, C<ev_check> watchers (and C<ev_prepare> watchers, too) should not
 activate ("feed") events into libev. While libev fully supports this, they
@@ -2810,6 +3056,25 @@
 C<ev_check> watcher ran (always remind yourself to coexist peacefully with
 others).
 
+=head3 Abusing an C<ev_check> watcher for its side-effect
+
+C<ev_check> (and less often also C<ev_prepare>) watchers can also be
+useful because they are called once per event loop iteration. For
+example, if you want to handle a large number of connections fairly, you
+normally only do a bit of work for each active connection, and if there
+is more work to do, you wait for the next event loop iteration, so other
+connections have a chance of making progress.
+
+Using an C<ev_check> watcher is almost enough: it will be called on the
+next event loop iteration. However, that isn't as soon as possible -
+without external events, your C<ev_check> watcher will not be invoked.
+
+This is where C<ev_idle> watchers come in handy - all you need is a
+single global idle watcher that is active as long as you have one active
+C<ev_check> watcher. The C<ev_idle> watcher makes sure the event loop
+will not sleep, and the C<ev_check> watcher makes sure a callback gets
+invoked. Neither watcher alone can do that.
+
 =head3 Watcher-Specific Functions and Data Members
 
 =over 4
@@ -3019,7 +3284,7 @@
 
 =item ev_embed_init (ev_embed *, callback, struct ev_loop *embedded_loop)
 
-=item ev_embed_set (ev_embed *, callback, struct ev_loop *embedded_loop)
+=item ev_embed_set (ev_embed *, struct ev_loop *embedded_loop)
 
 Configures the watcher to embed the given loop, which must be
 embeddable. If the callback is C<0>, then C<ev_embed_sweep> will be
@@ -3050,7 +3315,7 @@
    struct ev_loop *loop_hi = ev_default_init (0);
    struct ev_loop *loop_lo = 0;
    ev_embed embed;
-   
+
    // see if there is a chance of getting one that works
    // (remember that a flags value of 0 means autodetection)
    loop_lo = ev_embeddable_backends () & ev_recommended_backends ()
@@ -3074,7 +3339,7 @@
    struct ev_loop *loop = ev_default_init (0);
    struct ev_loop *loop_socket = 0;
    ev_embed embed;
-   
+
    if (ev_supported_backends () & ~ev_recommended_backends () & EVBACKEND_KQUEUE)
      if ((loop_socket = ev_loop_new (EVBACKEND_KQUEUE))
        {
@@ -3092,15 +3357,15 @@
 
 Fork watchers are called when a C<fork ()> was detected (usually because
 whoever is a good citizen cared to tell libev about it by calling
-C<ev_default_fork> or C<ev_loop_fork>). The invocation is done before the
-event loop blocks next and before C<ev_check> watchers are being called,
-and only in the child after the fork. If whoever good citizen calling
-C<ev_default_fork> cheats and calls it in the wrong process, the fork
-handlers will be invoked, too, of course.
+C<ev_loop_fork>). The invocation is done before the event loop blocks next
+and before C<ev_check> watchers are being called, and only in the child
+after the fork. If whoever good citizen calling C<ev_default_fork> cheats
+and calls it in the wrong process, the fork handlers will be invoked, too,
+of course.
 
 =head3 The special problem of life after fork - how is it possible?
 
-Most uses of C<fork()> consist of forking, then some simple calls to set
+Most uses of C<fork ()> consist of forking, then some simple calls to set
 up/change the process environment, followed by a call to C<exec()>. This
 sequence should be handled by libev without any problems.
 
@@ -3187,7 +3452,7 @@
 
 =head2 C<ev_async> - how to wake up an event loop
 
-In general, you cannot use an C<ev_run> from multiple threads or other
+In general, you cannot use an C<ev_loop> from multiple threads or other
 asynchronous sources such as signal handlers (as opposed to multiple event
 loops - those are of course safe to use in different threads).
 
@@ -3199,14 +3464,11 @@
 This functionality is very similar to C<ev_signal> watchers, as signals,
 too, are asynchronous in nature, and signals, too, will be compressed
 (i.e. the number of callback invocations may be less than the number of
-C<ev_async_sent> calls). In fact, you could use signal watchers as a kind
+C<ev_async_send> calls). In fact, you could use signal watchers as a kind
 of "global async watchers" by using a watcher on an otherwise unused
 signal, and C<ev_feed_signal> to signal this watcher from another thread,
 even without knowing which loop owns the signal.
 
-Unlike C<ev_signal> watchers, C<ev_async> works with any event loop, not
-just the default loop.
-
 =head3 Queueing
 
 C<ev_async> does not support queueing of data in any way. The reason
@@ -3307,19 +3569,24 @@
 =item ev_async_send (loop, ev_async *)
 
 Sends/signals/activates the given C<ev_async> watcher, that is, feeds
-an C<EV_ASYNC> event on the watcher into the event loop. Unlike
-C<ev_feed_event>, this call is safe to do from other threads, signal or
-similar contexts (see the discussion of C<EV_ATOMIC_T> in the embedding
-section below on what exactly this means).
+an C<EV_ASYNC> event on the watcher into the event loop, and instantly
+returns.
+
+Unlike C<ev_feed_event>, this call is safe to do from other threads,
+signal or similar contexts (see the discussion of C<EV_ATOMIC_T> in the
+embedding section below on what exactly this means).
 
 Note that, as with other watchers in libev, multiple events might get
-compressed into a single callback invocation (another way to look at this
-is that C<ev_async> watchers are level-triggered, set on C<ev_async_send>,
-reset when the event loop detects that).
-
-This call incurs the overhead of a system call only once per event loop
-iteration, so while the overhead might be noticeable, it doesn't apply to
-repeated calls to C<ev_async_send> for the same event loop.
+compressed into a single callback invocation (another way to look at
+this is that C<ev_async> watchers are level-triggered: they are set on
+C<ev_async_send>, reset when the event loop detects that).
+
+This call incurs the overhead of at most one extra system call per event
+loop iteration, if the event loop is blocked, and no syscall at all if
+the event loop (or your program) is processing events. That means that
+repeated calls are basically free (there is no need to avoid calls for
+performance reasons) and that the overhead becomes smaller (typically
+zero) under load.
 
 =item bool = ev_async_pending (ev_async *)
 
@@ -3346,7 +3613,7 @@
 
 =over 4
 
-=item ev_once (loop, int fd, int events, ev_tstamp timeout, callback)
+=item ev_once (loop, int fd, int events, ev_tstamp timeout, callback, arg)
 
 This function combines a simple timer and an I/O watcher, calls your
 callback on whichever event happens first and automatically stops both
@@ -3384,7 +3651,7 @@
 =item ev_feed_fd_event (loop, int fd, int revents)
 
 Feed an event on the given fd, as if a file descriptor backend detected
-the given events it.
+the given events.
 
 =item ev_feed_signal_event (loop, int signum)
 
@@ -3468,6 +3735,46 @@
        (((char *)w) - offsetof (struct my_biggy, t2));
    }
 
+=head2 AVOIDING FINISHING BEFORE RETURNING
+
+Often you have structures like this in event-based programs:
+
+  callback ()
+  {
+    free (request);
+  }
+
+  request = start_new_request (..., callback);
+
+The intent is to start some "lengthy" operation. The C<request> could be
+used to cancel the operation, or do other things with it.
+
+It's not uncommon to have code paths in C<start_new_request> that
+immediately invoke the callback, for example, to report errors. Or you add
+some caching layer that finds that it can skip the lengthy aspects of the
+operation and simply invoke the callback with the result.
+
+The problem here is that this will happen I<before> C<start_new_request>
+has returned, so C<request> is not set.
+
+Even if you pass the request by some safer means to the callback, you
+might want to do something to the request after starting it, such as
+canceling it, which probably isn't working so well when the callback has
+already been invoked.
+
+A common way around all these issues is to make sure that
+C<start_new_request> I<always> returns before the callback is invoked. If
+C<start_new_request> immediately knows the result, it can artificially
+delay invoking the callback by using a C<prepare> or C<idle> watcher for
+example, or more sneakily, by reusing an existing (stopped) watcher and
+pushing it into the pending queue:
+
+   ev_set_cb (watcher, callback);
+   ev_feed_event (EV_A_ watcher, 0);
+
+This way, C<start_new_request> can safely return before the callback is
+invoked, while not delaying callback invocation too much.
+
 =head2 MODEL/NESTED EVENT LOOP INVOCATIONS AND EXIT CONDITIONS
 
 Often (especially in GUI toolkits) there are places where you have
@@ -3478,7 +3785,7 @@
 main C<ev_run> call, but not the nested one (e.g. user clicked "Quit", but
 a modal "Are you sure?" dialog is still waiting), or just the nested one
 and not the main one (e.g. user clocked "Ok" in a modal dialog), or some
-other combination: In these cases, C<ev_break> will not work alone.
+other combination: In these cases, a simple C<ev_break> will not work.
 
 The solution is to maintain "break this loop" variable for each C<ev_run>
 invocation, and use a loop around C<ev_run> until the condition is
@@ -3490,7 +3797,7 @@
    while (!exit_main_loop)
      ev_run (EV_DEFAULT_ EVRUN_ONCE);
 
-   // in a model watcher
+   // in a modal watcher
    int exit_nested_loop = 0;
 
    while (!exit_nested_loop)
@@ -3546,7 +3853,7 @@
       ev_set_invoke_pending_cb (EV_A_ l_invoke);
       ev_set_loop_release_cb (EV_A_ l_release, l_acquire);
 
-      // then create the thread running ev_loop
+      // then create the thread running ev_run
       pthread_create (&u->tid, 0, l_run, EV_A);
    }
 
@@ -3674,25 +3981,25 @@
    void
    wait_for_event (ev_watcher *w)
    {
-     ev_cb_set (w) = current_coro;
+     ev_set_cb (w, current_coro);
      switch_to (libev_coro);
    }
 
 That basically suspends the coroutine inside C<wait_for_event> and
 continues the libev coroutine, which, when appropriate, switches back to
-this or any other coroutine. I am sure if you sue this your own :)
+this or any other coroutine.
 
 You can do similar tricks if you have, say, threads with an event queue -
 instead of storing a coroutine, you store the queue object and instead of
 switching to a coroutine, you push the watcher onto the queue and notify
 any waiters.
 
-To embed libev, see L<EMBEDDING>, but in short, it's easiest to create two
+To embed libev, see L</EMBEDDING>, but in short, it's easiest to create two
 files, F<my_ev.h> and F<my_ev.c> that include the respective libev files:
 
    // my_ev.h
    #define EV_CB_DECLARE(type)   struct my_coro *cb;
-   #define EV_CB_INVOKE(watcher) switch_to ((watcher)->cb);
+   #define EV_CB_INVOKE(watcher) switch_to ((watcher)->cb)
    #include "../libev/ev.h"
 
    // my_ev.c
@@ -3741,12 +4048,45 @@
 
 =head1 C++ SUPPORT
 
+=head2 C API
+
+The normal C API should work fine when used from C++: both ev.h and the
+libev sources can be compiled as C++. Therefore, code that uses the C API
+will work fine.
+
+Proper exception specifications might have to be added to callbacks passed
+to libev: exceptions may be thrown only from watcher callbacks, all other
+callbacks (allocator, syserr, loop acquire/release and periodic reschedule
+callbacks) must not throw exceptions, and might need a C<noexcept>
+specification. If you have code that needs to be compiled as both C and
+C++ you can use the C<EV_NOEXCEPT> macro for this:
+
+   static void
+   fatal_error (const char *msg) EV_NOEXCEPT
+   {
+     perror (msg);
+     abort ();
+   }
+
+   ...
+   ev_set_syserr_cb (fatal_error);
+
+The only API functions that can currently throw exceptions are C<ev_run>,
+C<ev_invoke>, C<ev_invoke_pending> and C<ev_loop_destroy> (the latter
+because it runs cleanup watchers).
+
+Throwing exceptions in watcher callbacks is only supported if libev itself
+is compiled with a C++ compiler or your C and C++ environments allow
+throwing exceptions through C libraries (most do).
+
+=head2 C++ API
+
 Libev comes with some simplistic wrapper classes for C++ that mainly allow
 you to use some convenience methods to start/stop watchers and also change
 the callback model to a model using method callbacks on objects.
 
 To use it,
-   
+
    #include <ev++.h>
 
 This automatically includes F<ev.h> and puts all of its definitions (many
@@ -3765,6 +4105,10 @@
 you need support for other types of functors please contact the author
 (preferably after implementing it).
 
+For all this to work, your C++ compiler either has to use the same calling
+conventions as your C compiler (for static member functions), or you have
+to embed libev and compile libev itself as C++.
+
 Here is a list of things available in the C<ev> namespace:
 
 =over 4
@@ -3783,7 +4127,7 @@
 For each C<ev_TYPE> watcher in F<ev.h> there is a corresponding class of
 the same name in the C<ev> namespace, with the exception of C<ev_signal>
 which is called C<ev::sig> to avoid clashes with the C<signal> macro
-defines by many implementations.
+defined by many implementations.
 
 All of those classes have these methods:
 
@@ -3855,7 +4199,7 @@
        ...
      }
    }
-    
+
    myfunctor f;
 
    ev::io w;
@@ -3883,10 +4227,14 @@
 
 =item w->set ([arguments])
 
-Basically the same as C<ev_TYPE_set>, with the same arguments. Either this
-method or a suitable start method must be called at least once. Unlike the
-C counterpart, an active watcher gets automatically stopped and restarted
-when reconfiguring it with this method.
+Basically the same as C<ev_TYPE_set> (except for C<ev::embed> watchers>),
+with the same arguments. Either this method or a suitable start method
+must be called at least once. Unlike the C counterpart, an active watcher
+gets automatically stopped and restarted when reconfiguring it with this
+method.
+
+For C<ev::embed> watchers this method is called C<set_embed>, to avoid
+clashing with the C<set (loop)> method.
 
 =item w->start ()
 
@@ -3926,7 +4274,7 @@
    class myclass
    {
      ev::io   io  ; void io_cb   (ev::io   &w, int revents);
-     ev::io2  io2 ; void io2_cb  (ev::io   &w, int revents);
+     ev::io   io2 ; void io2_cb  (ev::io   &w, int revents);
      ev::idle idle; void idle_cb (ev::idle &w, int revents);
 
      myclass (int fd)
@@ -3987,7 +4335,7 @@
 =item D
 
 Leandro Lucarella has written a D language binding (F<ev.d>) for libev, to
-be found at L<http://proj.llucax.com.ar/wiki/evd>.
+be found at L<http://www.llucax.com.ar/proj/ev.d/index.html>.
 
 =item Ocaml
 
@@ -4000,6 +4348,14 @@
 time of this writing, only C<ev_io> and C<ev_timer>), to be found at
 L<http://github.com/brimworks/lua-ev>.
 
+=item Javascript
+
+Node.js (L<http://nodejs.org>) uses libev as the underlying event library.
+
+=item Others
+
+There are others, and I stopped counting.
+
 =back
 
 
@@ -4045,7 +4401,11 @@
 =item C<EV_DEFAULT>, C<EV_DEFAULT_>
 
 Similar to the other two macros, this gives you the value of the default
-loop, if multiple loops are supported ("ev loop default").
+loop, if multiple loops are supported ("ev loop default"). The default loop
+will be initialised if it isn't already initialised.
+
+For non-multiplicity builds, these macros do nothing, so you always have
+to initialise the loop somewhere.
 
 =item C<EV_DEFAULT_UC>, C<EV_DEFAULT_UC_>
 
@@ -4122,11 +4482,12 @@
 
    ev_win32.c      required on win32 platforms only
 
-   ev_select.c     only when select backend is enabled (which is enabled by default)
-   ev_poll.c       only when poll backend is enabled (disabled by default)
-   ev_epoll.c      only when the epoll backend is enabled (disabled by default)
-   ev_kqueue.c     only when the kqueue backend is enabled (disabled by default)
-   ev_port.c       only when the solaris port backend is enabled (disabled by default)
+   ev_select.c     only when select backend is enabled
+   ev_poll.c       only when poll backend is enabled
+   ev_epoll.c      only when the epoll backend is enabled
+   ev_linuxaio.c   only when the linux aio backend is enabled
+   ev_kqueue.c     only when the kqueue backend is enabled
+   ev_port.c       only when the solaris port backend is enabled
 
 F<ev.c> includes the backend files directly when enabled, so you only need
 to compile this single file.
@@ -4201,6 +4562,15 @@
 In standalone mode, libev will still try to automatically deduce the
 configuration, but has to be more conservative.
 
+=item EV_USE_FLOOR
+
+If defined to be C<1>, libev will use the C<floor ()> function for its
+periodic reschedule calculations, otherwise libev will fall back on a
+portable (slower) implementation. If you enable this, you usually have to
+link against libm or something equivalent. Enabling this when the C<floor>
+function is not available will fail, so the safe default is to not enable
+this.
+
 =item EV_USE_MONOTONIC
 
 If defined to be C<1>, libev will try to detect the availability of the
@@ -4295,6 +4665,13 @@
 file descriptors again. Note that the replacement function has to close
 the underlying OS handle.
 
+=item EV_USE_WSASOCKET
+
+If defined to be C<1>, libev will use C<WSASocket> to create its internal
+communication socket, which works better in some environments. Otherwise,
+the normal C<socket> function will be used, which works better in other
+environments.
+
 =item EV_USE_POLL
 
 If defined to be C<1>, libev will compile in support for the C<poll>(2)
@@ -4309,6 +4686,13 @@
 backend for GNU/Linux systems. If undefined, it will be enabled if the
 headers indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.
 
+=item EV_USE_LINUXAIO
+
+If defined to be C<1>, libev will compile in support for the Linux
+aio backend. Due to it's currenbt limitations it has to be requested
+explicitly.  If undefined, it will be enabled on linux, otherwise
+disabled.
+
 =item EV_USE_KQUEUE
 
 If defined to be C<1>, libev will compile in support for the BSD style
@@ -4339,13 +4723,28 @@
 be detected at runtime. If undefined, it will be enabled if the headers
 indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.
 
+=item EV_NO_SMP
+
+If defined to be C<1>, libev will assume that memory is always coherent
+between threads, that is, threads can be used, but threads never run on
+different cpus (or different cpu cores). This reduces dependencies
+and makes libev faster.
+
+=item EV_NO_THREADS
+
+If defined to be C<1>, libev will assume that it will never be called from
+different threads (that includes signal handlers), which is a stronger
+assumption than C<EV_NO_SMP>, above. This reduces dependencies and makes
+libev faster.
+
 =item EV_ATOMIC_T
 
 Libev requires an integer type (suitable for storing C<0> or C<1>) whose
-access is atomic with respect to other threads or signal contexts. No such
-type is easily found in the C language, so you can provide your own type
-that you know is safe for your purposes. It is used both for signal handler "locking"
-as well as for signal and thread safety in C<ev_async> watchers.
+access is atomic with respect to other threads or signal contexts. No
+such type is easily found in the C language, so you can provide your own
+type that you know is safe for your purposes. It is used both for signal
+handler "locking" as well as for signal and thread safety in C<ev_async>
+watchers.
 
 In the absence of this define, libev will use C<sig_atomic_t volatile>
 (from F<signal.h>), which is usually good enough on most platforms.
@@ -4382,6 +4781,10 @@
 for multiple event loops and there is no first event loop pointer
 argument. Instead, all functions act on the single default loop.
 
+Note that C<EV_DEFAULT> and C<EV_DEFAULT_> will no longer provide a
+default loop when multiplicity is switched off - you always have to
+initialise the loop manually in this case.
+
 =item EV_MINPRI
 
 =item EV_MAXPRI
@@ -4427,7 +4830,7 @@
    #define EV_ASYNC_ENABLE 1
 
 The actual value is a bitset, it can be a combination of the following
-values:
+values (by default, all of these are enabled):
 
 =over 4
 
@@ -4442,6 +4845,9 @@
 gcc is recommended, as well as C<-DNDEBUG>, as libev contains a number of
 assertions.
 
+The default is off when C<__OPTIMIZE_SIZE__> is defined by your compiler
+(e.g. gcc with C<-Os>).
+
 =item C<2> - faster/larger data structures
 
 Replaces the small 2-heap for timer management by a faster 4-heap, larger
@@ -4449,6 +4855,9 @@
 and can additionally have an effect on the size of data structures at
 runtime.
 
+The default is off when C<__OPTIMIZE_SIZE__> is defined by your compiler
+(e.g. gcc with C<-Os>).
+
 =item C<4> - full API configuration
 
 This enables priorities (sets C<EV_MAXPRI>=2 and C<EV_MINPRI>=-2), and
@@ -4489,6 +4898,20 @@
 your program might be left out as well - a binary starting a timer and an
 I/O watcher then might come out at only 5Kb.
 
+=item EV_API_STATIC
+
+If this symbol is defined (by default it is not), then all identifiers
+will have static linkage. This means that libev will not export any
+identifiers, and you cannot link against libev anymore. This can be useful
+when you embed libev, only want to use libev functions in a single file,
+and do not want its identifiers to be visible.
+
+To use this, define C<EV_API_STATIC> and include F<ev.c> in the file that
+wants to use libev.
+
+This option only works when libev is compiled with a C compiler, as C++
+doesn't support the required declaration syntax.
+
 =item EV_AVOID_STDIO
 
 If this is set to C<1> at compiletime, then libev will avoid using stdio
@@ -4556,6 +4979,9 @@
 verification code will be called very frequently, which will slow down
 libev considerably.
 
+Verification errors are reported via C's C<assert> mechanism, so if you
+disable that (e.g. by defining C<NDEBUG>) then no errors will be reported.
+
 The default is C<1>, unless C<EV_FEATURES> overrides it, in which case it
 will be C<0>.
 
@@ -4702,7 +5128,7 @@
 
 =back
 
-See also L<THREAD LOCKING EXAMPLE>.
+See also L</THREAD LOCKING EXAMPLE>.
 
 =head3 COROUTINES
 
@@ -4877,7 +5303,7 @@
 the form of the C<EVBACKEND_SELECT> backend, and only supports socket
 descriptors. This only applies when using Win32 natively, not when using
 e.g. cygwin. Actually, it only applies to the microsofts own compilers,
-as every compielr comes with a slightly differently broken/incompatible
+as every compiler comes with a slightly differently broken/incompatible
 environment.
 
 Lifting these limitations would basically require the full
@@ -4981,6 +5407,11 @@
 callback: The watcher callbacks have different type signatures, but libev
 calls them using an C<ev_watcher *> internally.
 
+=item null pointers and integer zero are represented by 0 bytes
+
+Libev uses C<memset> to initialise structs and arrays to C<0> bytes, and
+relies on this setting pointers and integers to null.
+
 =item pointer accesses must be thread-atomic
 
 Accessing a pointer value must be atomic, it must both be readable and
@@ -5003,8 +5434,8 @@
 C<pthread_sigmask> could complicate things, however.
 
 The most portable way to handle signals is to block signals in all threads
-except the initial one, and run the default loop in the initial thread as
-well.
+except the initial one, and run the signal handling loop in the initial
+thread as well.
 
 =item C<long> must be large enough for common memory allocation sizes
 
@@ -5020,8 +5451,12 @@
 have at least 51 bits of mantissa (and 9 bits of exponent), which is
 good enough for at least into the year 4000 with millisecond accuracy
 (the design goal for libev). This requirement is overfulfilled by
-implementations using IEEE 754, which is basically all existing ones. With
-IEEE 754 doubles, you get microsecond accuracy until at least 2200.
+implementations using IEEE 754, which is basically all existing ones.
+
+With IEEE 754 doubles, you get microsecond accuracy until at least the
+year 2255 (and millisecond accuracy till the year 287396 - by then, libev
+is either obsolete or somebody patched it to use C<long double> or
+something like that, just kidding).
 
 =back
 
@@ -5093,8 +5528,9 @@
 =item Processing signals: O(max_signal_number)
 
 Sending involves a system call I<iff> there were no other C<ev_async_send>
-calls in the current loop iteration. Checking for async and signal events
-involves iterating over all running async watchers or all signal numbers.
+calls in the current loop iteration and the loop is currently
+blocked. Checking for async and signal events involves iterating over all
+running async watchers or all signal numbers.
 
 =back
 
@@ -5113,7 +5549,7 @@
 =item C<EV_COMPAT3> backwards compatibility mechanism
 
 The backward compatibility mechanism can be controlled by
-C<EV_COMPAT3>. See L<PREPROCESSOR SYMBOLS/MACROS> in the L<EMBEDDING>
+C<EV_COMPAT3>. See L</"PREPROCESSOR SYMBOLS/MACROS"> in the L</EMBEDDING>
 section.
 
 =item C<ev_default_destroy> and C<ev_default_fork> have been removed
@@ -5166,7 +5602,7 @@
 =item active
 
 A watcher is active as long as it has been started and not yet stopped.
-See L<WATCHER STATES> for details.
+See L</WATCHER STATES> for details.
 
 =item application
 
@@ -5212,7 +5648,7 @@
 =item pending
 
 A watcher is pending as soon as the corresponding event has been
-detected. See L<WATCHER STATES> for details.
+detected. See L</WATCHER STATES> for details.
 
 =item real time
 
@@ -5221,7 +5657,7 @@
 =item wall-clock time
 
 The time and date as shown on clocks. Unlike real time, it can actually
-be wrong and jump forwards and backwards, e.g. when the you adjust your
+be wrong and jump forwards and backwards, e.g. when you adjust your
 clock.
 
 =item watcher
@@ -5234,5 +5670,5 @@
 =head1 AUTHOR
 
 Marc Lehmann <libev@schmorp.de>, with repeated corrections by Mikael
-Magnusson and Emanuele Giaquinta.
+Magnusson and Emanuele Giaquinta, and minor corrections by many others.