[ViewVC] Diff of: cvs/libev/ev

Comparing libev/ev_iouring.c (file contents):
Revision 1.9 by root, Fri Dec 27 16:15:58 2019 UTC vs.
Revision 1.15 by root, Sat Dec 28 05:53:48 2019 UTC

 /* TODO: resize cq/sq size independently */
 #include <sys/timerfd.h>
 #include <sys/mman.h>
 #include <poll.h>
+#include <stdint.h>
 #define IOURING_INIT_ENTRIES 32
 /*****************************************************************************/
 /* syscall wrapdadoop - this section has the raw api/abi definitions */
   __u32 resv[4];
   struct io_sqring_offsets sq_off;
   struct io_cqring_offsets cq_off;
 };
+#define IORING_SETUP_CQSIZE 0x00000008
-#define IORING_OP_POLL_ADD    6
+#define IORING_OP_POLL_ADD        6
-#define IORING_OP_POLL_REMOVE 7
+#define IORING_OP_POLL_REMOVE     7
+#define IORING_OP_TIMEOUT        11
+#define IORING_OP_TIMEOUT_REMOVE 12
+/* relative or absolute, reference clock is CLOCK_MONOTONIC */
+struct iouring_kernel_timespec
+{
+  int64_t tv_sec;
+  long long tv_nsec;
+};
+#define IORING_TIMEOUT_ABS 0x00000001
 #define IORING_ENTER_GETEVENTS 0x01
 #define IORING_OFF_SQ_RING 0x00000000ULL
 #define IORING_OFF_CQ_RING 0x08000000ULL
 #define IORING_OFF_SQES	   0x10000000ULL
-#define IORING_FEAT_SINGLE_MMAP   0x1
+#define IORING_FEAT_SINGLE_MMAP   0x00000001
-#define IORING_FEAT_NODROP        0x2
+#define IORING_FEAT_NODROP        0x00000002
-#define IORING_FEAT_SUBMIT_STABLE 0x4
+#define IORING_FEAT_SUBMIT_STABLE 0x00000004
 inline_size
 int
 evsys_io_uring_setup (unsigned entries, struct io_uring_params *params)
 {
 /* the submit/completion queue entries */
 #define EV_SQES         ((struct io_uring_sqe *)         iouring_sqes)
 #define EV_CQES         ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes))
+/* TODO: this is not enough, we might have to reap events */
+/* TODO: but we can't, as that will re-arm events, causing */
+/* TODO: an endless loop in fd_reify */
+static int
+iouring_enter (EV_P_ ev_tstamp timeout)
+{
+  int res;
+  EV_RELEASE_CB;
+  res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1,
+                              timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0);
+  assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit)));
+  iouring_to_submit = 0;
+  EV_ACQUIRE_CB;
+  return res;
+}
 static
 struct io_uring_sqe *
 iouring_sqe_get (EV_P)
 {
   unsigned tail = EV_SQ_VAR (tail);
-  if (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries))
+  while (ecb_expect_false (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries)))
     {
-      /* queue full, flush */
+      /* queue full, need to flush */
-      evsys_io_uring_enter (iouring_fd, iouring_to_submit, 0, 0, 0, 0);
-      iouring_to_submit = 0;
-    }
+      int res = iouring_enter (EV_A_ EV_TS_CONST (0.));
+      /* io_uring_enter might fail with EBUSY and won't submit anything */
+      /* unfortunately, we can't handle this at the moment */
+      if (res < 0 && errno == EBUSY)
+        /* the sane thing might be to resize, but we can't */
+        //TODO
+        ev_syserr ("(libev) io_uring_enter could not clear sq");
+      else
+        break;
+      /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE */
+    }
-  assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));
+  /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/
   return EV_SQES + (tail & EV_SQ_VAR (ring_mask));
 }
 inline_size
   iouring_tfd     = -1;
   iouring_sq_ring = MAP_FAILED;
   iouring_cq_ring = MAP_FAILED;
   iouring_sqes    = MAP_FAILED;
+  if (!have_monotonic) /* cannot really happen, but what if11 */
+    return -1;
   for (;;)
     {
       iouring_fd = evsys_io_uring_setup (iouring_entries, &params);
       if (iouring_fd >= 0)
         break; /* yippie */
       if (errno != EINVAL)
         return -1; /* we failed */
+#if TODO
+      if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP))
+        return -1; /* we require the above features */
+#endif
       /* EINVAL: lots of possible reasons, but maybe
        * it is because we hit the unqueryable hardcoded size limit
        */
     {
       /* we assume the sqe's are all "properly" initialised */
       struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
       sqe->opcode    = IORING_OP_POLL_REMOVE;
       sqe->fd        = fd;
+      /* Jens Axboe notified me that user_data is not what is documented, but is
+       * some kind of unique ID that has to match, otherwise the request cannot
+       * be removed. Since we don't *really* have that, we pass in the old
+       * generation counter - if that fails, too bad, it will hopefully be removed
+       * at close time and then be ignored. */
+      sqe->addr      = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
-      sqe->user_data = -1;
+      sqe->user_data = (uint64_t)-1;
       iouring_sqe_submit (EV_A_ sqe);
       /* increment generation counter to avoid handling old events */
       ++anfds [fd].egen;
     }
 {
   int      fd  = cqe->user_data & 0xffffffffU;
   uint32_t gen = cqe->user_data >> 32;
   int      res = cqe->res;
-  /* ignore fd removal events, if there are any. TODO: verify */
+  /* user_data -1 is a remove that we are not atm. interested in */
-  if (cqe->user_data == (__u64)-1)
+  if (cqe->user_data == (uint64_t)-1)
-    abort ();//D
+    return;
   assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax));
   /* documentation lies, of course. the result value is NOT like
    * normal syscalls, but like linux raw syscalls, i.e. negative
    * error numbers. fortunate, as otherwise there would be no way
    * to get error codes at all. still, why not document this?
    */
   /* ignore event if generation doesn't match */
+  /* other than skipping removal events, */
   /* this should actually be very rare */
   if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen))
     return;
   if (ecb_expect_false (res < 0))
     {
-      //TODO: EINVAL handling (was something failed with this fd)
+      /*TODO: EINVAL handling (was something failed with this fd)*/
-      //TODO: EBUSY happens when?
+      /*TODO: EBUSY happens when?*/
       if (res == -EBADF)
         {
           assert (("libev: event loop rejected bad fd", res != -EBADF));
           fd_kill (EV_A_ fd);
 iouring_overflow (EV_P)
 {
   /* we have two options, resize the queue (by tearing down
    * everything and recreating it, or living with it
    * and polling.
-   * we implement this by resizing tghe queue, and, if that fails,
+   * we implement this by resizing the queue, and, if that fails,
    * we just recreate the state on every failure, which
    * kind of is a very inefficient poll.
    * one danger is, due to the bios toward lower fds,
    * we will only really get events for those, so
    * maybe we need a poll() fallback, after all.
   else
     {
       /* we hit the kernel limit, we should fall back to something else.
        * we can either poll() a few times and hope for the best,
        * poll always, or switch to epoll.
-       * since we use epoll anyways, go epoll.
+       * TODO: is this necessary with newer kernels?
        */
       iouring_internal_destroy (EV_A);
-      /* this should make it so that on return, we don'T call any uring functions */
+      /* this should make it so that on return, we don't call any uring functions */
       iouring_to_submit = 0;
       for (;;)
         {
           backend = epoll_init (EV_A_ 0);
     iouring_tfd_update (EV_A_ timeout);
   /* only enter the kernel if we have something to submit, or we need to wait */
   if (timeout || iouring_to_submit)
     {
-      int res;
+      int res = iouring_enter (EV_A_ timeout);
-      EV_RELEASE_CB;
-      res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1,
-                                  timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0);
-      iouring_to_submit = 0;
-      EV_ACQUIRE_CB;
       if (ecb_expect_false (res < 0))
         if (errno == EINTR)
           /* ignore */;
+        else if (errno == EBUSY)
+          /* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */;
         else
           ev_syserr ("(libev) iouring setup");
       else
         iouring_handle_cq (EV_A);
     }

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing libev/ev_iouring.c (file contents): Revision 1.9 by root, Fri Dec 27 16:15:58 2019 UTC vs. Revision 1.15 by root, Sat Dec 28 05:53:48 2019 UTC

Diff Legend

Comparing libev/ev_iouring.c (file contents):
Revision 1.9 by root, Fri Dec 27 16:15:58 2019 UTC vs.
Revision 1.15 by root, Sat Dec 28 05:53:48 2019 UTC