[ViewVC] Diff of: cvs/libev/ev

Comparing libev/ev_linuxaio.c (file contents):
Revision 1.25 by root, Tue Jun 25 04:52:30 2019 UTC vs.
Revision 1.35 by root, Wed Jun 26 00:11:24 2019 UTC

  *    POLLOUT|POLLIN, but polling for POLLIN fails. just great,
  *    so we have to fall back to something else (hello, epoll),
  *    but at least the fallback can be slow, because these are
  *    exceptional cases, right?
  * d) hmm, you have to tell the kernel the maximum number of watchers
- *    you want to queue when initialiasing the aio context. but of
+ *    you want to queue when initialising the aio context. but of
  *    course the real limit is magically calculated in the kernel, and
  *    is often higher then we asked for. so we just have to destroy
  *    the aio context and re-create it a bit larger if we hit the limit.
  *    (starts to remind you of epoll? well, it's a bit more deterministic
  *    and less gambling, but still ugly as hell).
  *    limit. or the kernel simply doesn't want to handle your watchers.
  *    what the fuck do we do then? you guessed it, in the middle
  *    of event handling we have to switch to 100% epoll polling. and
  *    that better is as fast as normal epoll polling, so you practically
  *    have to use the normal epoll backend with all its quirks.
- * f) end result of this trainwreck: it inherits all the disadvantages
+ * f) end result of this train wreck: it inherits all the disadvantages
  *    from epoll, while adding a number on its own. why even bother to use
  *    it? because if conditions are right and your fds are supported and you
  *    don't hit a limit, this backend is actually faster, doesn't gamble with
  *    your fds, batches watchers and events and doesn't require costly state
  *    recreates. well, until it does.
  * g) all of this makes this backend use almost twice as much code as epoll.
- *    which in turn uses twice as much code as poll. and thats not counting
+ *    which in turn uses twice as much code as poll. and that#s not counting
  *    the fact that this backend also depends on the epoll backend, making
  *    it three times as much code as poll, or kqueue.
  * h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now
- *    it's clear that whwetaver linux does is far, far far worse.
+ *    it's clear that whatever linux comes up with is far, far, far worse.
  */
 #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */
 #include <poll.h>
 #include <linux/aio_abi.h>
   unsigned header_length;  /* size of aio_ring */
   struct io_event io_events[0];
 };
+/*
+ * define some syscall wrappers for common architectures
+ * this is mostly for nice looks during debugging, not performance.
+ * our syscalls return < 0, not == -1, on error. which is good
+ * enough for linux aio.
+ * TODO: arm is also common nowadays, maybe even mips and x86
+ * TODO: after implementing this, it suddenly looks like overkill, but its hard to remove...
+ */
+#if __GNUC__ && __linux && ECB_AMD64 && !defined __OPTIMIZE_SIZE__
+  /* the costly errno access probably kills this for size optimisation */
+  #define ev_syscall(nr,narg,arg1,arg2,arg3,arg4,arg5)                 \
+    ({                                                                 \
+        long res;                                                      \
+        register unsigned long r5 __asm__ ("r8" );                     \
+        register unsigned long r4 __asm__ ("r10");                     \
+        register unsigned long r3 __asm__ ("rdx");                     \
+        register unsigned long r2 __asm__ ("rsi");                     \
+        register unsigned long r1 __asm__ ("rdi");                     \
+        if (narg >= 5) r5 = (unsigned long)(arg5);                     \
+        if (narg >= 4) r4 = (unsigned long)(arg4);                     \
+        if (narg >= 3) r3 = (unsigned long)(arg3);                     \
+        if (narg >= 2) r2 = (unsigned long)(arg2);                     \
+        if (narg >= 1) r1 = (unsigned long)(arg1);                     \
+        __asm__ __volatile__ (                                         \
+          "syscall\n\t"                                                \
+          : "=a" (res)                                                 \
+          : "0" (nr), "r" (r1), "r" (r2), "r" (r3), "r" (r4), "r" (r5) \
+          : "cc", "r11", "cx", "memory");                              \
+        errno = -res;                                                  \
+        res;                                                           \
+    })
+#endif
+#ifdef ev_syscall
+  #define ev_syscall0(nr)                          ev_syscall (nr, 0,    0,    0,    0,    0,    0
+  #define ev_syscall1(nr,arg1)                     ev_syscall (nr, 1, arg1,    0,    0,    0,    0)
+  #define ev_syscall2(nr,arg1,arg2)                ev_syscall (nr, 2, arg1, arg2,    0,    0,    0)
+  #define ev_syscall3(nr,arg1,arg2,arg3)           ev_syscall (nr, 3, arg1, arg2, arg3,    0,    0)
+  #define ev_syscall4(nr,arg1,arg2,arg3,arg4)      ev_syscall (nr, 3, arg1, arg2, arg3, arg4,    0)
+  #define ev_syscall5(nr,arg1,arg2,arg3,arg4,arg5) ev_syscall (nr, 5, arg1, arg2, arg3, arg4, arg5)
+#else
+  #define ev_syscall0(nr)                          syscall (nr)
+  #define ev_syscall1(nr,arg1)                     syscall (nr, arg1)
+  #define ev_syscall2(nr,arg1,arg2)                syscall (nr, arg1, arg2)
+  #define ev_syscall3(nr,arg1,arg2,arg3)           syscall (nr, arg1, arg2, arg3)
+  #define ev_syscall4(nr,arg1,arg2,arg3,arg4)      syscall (nr, arg1, arg2, arg3, arg4)
+  #define ev_syscall5(nr,arg1,arg2,arg3,arg4,arg5) syscall (nr, arg1, arg2, arg3, arg4, arg5)
+#endif
 inline_size
 int
 evsys_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
 {
-  return syscall (SYS_io_setup, nr_events, ctx_idp);
+  return ev_syscall2 (SYS_io_setup, nr_events, ctx_idp);
 }
 inline_size
 int
 evsys_io_destroy (aio_context_t ctx_id)
 {
-  return syscall (SYS_io_destroy, ctx_id);
+  return ev_syscall1 (SYS_io_destroy, ctx_id);
 }
 inline_size
 int
 evsys_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[])
 {
-  return syscall (SYS_io_submit, ctx_id, nr, cbp);
+  return ev_syscall3 (SYS_io_submit, ctx_id, nr, cbp);
 }
 inline_size
 int
 evsys_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result)
 {
-  return syscall (SYS_io_cancel, ctx_id, cbp, result);
+  return ev_syscall3 (SYS_io_cancel, ctx_id, cbp, result);
 }
 inline_size
 int
 evsys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
 {
-  return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
+  return ev_syscall5 (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
 }
 /*****************************************************************************/
 /* actual backed implementation */
     requests = requests / one_page * one_page + first_page;
   return requests;
 }
-/* we use out own wrapper structure in acse we ever want to do something "clever" */
+/* we use out own wrapper structure in case we ever want to do something "clever" */
 typedef struct aniocb
 {
   struct iocb io;
   /*int inuse;*/
 } *ANIOCBP;
 void
 linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count)
 {
   while (count--)
     {
-      /* TODO: quite the overhead to allocate every iocb separately, maybe use our own alocator? */
+      /* TODO: quite the overhead to allocate every iocb separately, maybe use our own allocator? */
       ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb));
       /* full zero initialise is probably not required at the moment, but
        * this is not well documented, so we better do it.
        */
   ANIOCBP iocb = linuxaio_iocbps [fd];
   if (iocb->io.aio_reqprio < 0)
     {
       /* we handed this fd over to epoll, so undo this first */
-      /* we do it manually becvause the optimisations on epoll_modfy won't do us any good */
+      /* we do it manually because the optimisations on epoll_modify won't do us any good */
       epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0);
+      anfds [fd].emask = 0;
       iocb->io.aio_reqprio = 0;
     }
   if (iocb->io.aio_buf)
-    /* io_cancel always returns some error on relevant kernels, but works */
+    {
-    evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0);
+      evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0);
+      /* on relevant kernels, io_cancel fails with EINPROGRES if everything is fine */
+      assert (("libev: linuxaio unexpected io_cancel failed", errno != EINPROGRESS));
+    }
   if (nev)
     {
       iocb->io.aio_buf =
           (nev & EV_READ ? POLLIN : 0)
 linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents)
 {
   epoll_poll (EV_A_ 0);
 }
-static void
+inline_speed
+void
 linuxaio_fd_rearm (EV_P_ int fd)
 {
   anfds [fd].events = 0;
   linuxaio_iocbps [fd]->io.aio_buf = 0;
   fd_change (EV_A_ fd, EV_ANFD_REIFY);
         fd,
         (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
         | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
       );
-      /* linux aio is oneshot: rearm fd. TODO: this does more work than needed */
+      /* linux aio is oneshot: rearm fd. TODO: this does more work than strictly needed */
       linuxaio_fd_rearm (EV_A_ fd);
       --nr;
       ++ev;
     }
 }
-/* get any events from ringbuffer, return true if any were handled */
+/* get any events from ring buffer, return true if any were handled */
 static int
 linuxaio_get_events_from_ring (EV_P)
 {
   struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
     {
       linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head);
       linuxaio_parse_events (EV_A_ ring->io_events, tail);
     }
-  ECB_MEMORY_FENCE_RELAXED;
+  ECB_MEMORY_FENCE_RELEASE;
   /* as an extension to C, we hope that the volatile will make this atomic and once-only */
   *(volatile unsigned *)&ring->head = tail;
-  /* make sure kernel can see our new head value - probably not required */
-  ECB_MEMORY_FENCE_RELEASE;
   return 1;
 }
 /* read at least one event from kernel, or timeout */
       /* ignored */;
     else
       ev_syserr ("(libev) linuxaio io_getevents");
   else if (res)
     {
-      /* at least one event received, handle it and any remaining ones in the ring buffer */
+      /* at least one event available, handle it and any remaining ones in the ring buffer */
       linuxaio_parse_events (EV_A_ ioev, res);
       linuxaio_get_events_from_ring (EV_A);
     }
 }
-static int
+inline_size
+int
 linuxaio_io_setup (EV_P)
 {
   linuxaio_ctx = 0;
   return evsys_io_setup (linuxaio_nr_events (EV_A), &linuxaio_ctx);
 }
   /* first phase: submit new iocbs */
   /* io_submit might return less than the requested number of iocbs */
   /* this is, afaics, only because of errors, but we go by the book and use a loop, */
-  /* which allows us to pinpoint the errornous iocb */
+  /* which allows us to pinpoint the erroneous iocb */
   for (submitted = 0; submitted < linuxaio_submitcnt; )
     {
       int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);
       if (expect_false (res < 0))
             res = 1; /* skip this iocb - another iocb, another chance */
           }
         else if (errno == EAGAIN)
           {
             /* This happens when the ring buffer is full, or some other shit we
-             * dont' know and isn't documented. Most likely because we have too
+             * don't know and isn't documented. Most likely because we have too
              * many requests and linux aio can't be assed to handle them.
              * In this case, we try to allocate a larger ring buffer, freeing
              * ours first. This might fail, in which case we have to fall back to 100%
              * epoll.
              * God, how I hate linux not getting its act together. Ever.
             /* it's easiest to handle this mess in another iteration */
             return;
           }
         else if (errno == EBADF)
           {
+            assert (("libev: event loop rejected bad fd", errno != EBADF));
             fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes);
             res = 1; /* skip this iocb */
           }
         else
 int
 linuxaio_init (EV_P_ int flags)
 {
   /* would be great to have a nice test for IOCB_CMD_POLL instead */
   /* also: test some semi-common fd types, such as files and ttys in recommended_backends */
-  /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work */
+  /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work, and we need that */
   if (ev_linux_version () < 0x041300)
     return 0;
   if (!epoll_init (EV_A_ 0))
     return 0;
 void
 linuxaio_destroy (EV_P)
 {
   epoll_destroy (EV_A);
   linuxaio_free_iocbp (EV_A);
-  evsys_io_destroy (linuxaio_ctx);
+  evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */
 }
 inline_size
 void
 linuxaio_fork (EV_P)
   linuxaio_iteration = 0; /* we start over in the child */
   while (linuxaio_io_setup (EV_A) < 0)
     ev_syserr ("(libev) linuxaio io_setup");
+  /* forking epoll should also effectively unregister all fds from the backend */
   epoll_fork (EV_A);
   ev_io_stop  (EV_A_ &linuxaio_epoll_w);
   ev_io_set   (EV_A_ &linuxaio_epoll_w, backend_fd, EV_READ);
   ev_io_start (EV_A_ &linuxaio_epoll_w);

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing libev/ev_linuxaio.c (file contents): Revision 1.25 by root, Tue Jun 25 04:52:30 2019 UTC vs. Revision 1.35 by root, Wed Jun 26 00:11:24 2019 UTC

Diff Legend

Comparing libev/ev_linuxaio.c (file contents):
Revision 1.25 by root, Tue Jun 25 04:52:30 2019 UTC vs.
Revision 1.35 by root, Wed Jun 26 00:11:24 2019 UTC