--- libev/ev_iouring.c 2019/12/27 22:16:10 1.12 +++ libev/ev_iouring.c 2019/12/28 07:39:18 1.17 @@ -230,20 +230,62 @@ #define EV_SQES ((struct io_uring_sqe *) iouring_sqes) #define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) +inline_speed +int +iouring_enter (EV_P_ ev_tstamp timeout) +{ + int res; + + EV_RELEASE_CB; + + res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, + timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0); + + assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit))); + + iouring_to_submit = 0; + + EV_ACQUIRE_CB; + + return res; +} + +/* TODO: can we move things around so we don't need this forward-reference? */ +static void +iouring_poll (EV_P_ ev_tstamp timeout); + static struct io_uring_sqe * iouring_sqe_get (EV_P) { - unsigned tail = EV_SQ_VAR (tail); - - if (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries)) + unsigned tail; + + for (;;) { - /* queue full, flush */ - evsys_io_uring_enter (iouring_fd, iouring_to_submit, 0, 0, 0, 0); - iouring_to_submit = 0; + tail = EV_SQ_VAR (tail); + + if (ecb_expect_true (tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries))) + break; /* whats the problem, we have free sqes */ + + /* queue full, need to flush and possibly handle some events */ + +#if EV_FEATURE_CODE + /* first we ask the kernel nicely, most often this frees up some sqes */ + int res = iouring_enter (EV_A_ EV_TS_CONST (0.)); + + ECB_MEMORY_FENCE_ACQUIRE; /* better safe than sorry */ + + if (res >= 0) + continue; /* yes, it worked, try again */ +#endif + + /* some problem, possibly EBUSY - do the full poll and let it handle any issues */ + + iouring_poll (EV_A_ EV_TS_CONST (0.)); + /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE for us */ } - assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries))); + /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/ return EV_SQES + (tail & EV_SQ_VAR (ring_mask)); } @@ -319,7 +361,7 @@ return -1; /* we failed */ #if TODO - if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP)) + if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE)) return -1; /* we require the above features */ #endif @@ -402,7 +444,13 @@ struct io_uring_sqe *sqe = iouring_sqe_get (EV_A); sqe->opcode = IORING_OP_POLL_REMOVE; sqe->fd = fd; - sqe->user_data = -1; + /* Jens Axboe notified me that user_data is not what is documented, but is + * some kind of unique ID that has to match, otherwise the request cannot + * be removed. Since we don't *really* have that, we pass in the old + * generation counter - if that fails, too bad, it will hopefully be removed + * at close time and then be ignored. */ + sqe->addr = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32); + sqe->user_data = (uint64_t)-1; iouring_sqe_submit (EV_A_ sqe); /* increment generation counter to avoid handling old events */ @@ -453,9 +501,8 @@ uint32_t gen = cqe->user_data >> 32; int res = cqe->res; - /* ignore fd removal events, if there are any. TODO: verify */ - /* TODO: yes, this triggers */ - if (cqe->user_data == (__u64)-1) + /* user_data -1 is a remove that we are not atm. interested in */ + if (cqe->user_data == (uint64_t)-1) return; assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); @@ -467,6 +514,7 @@ */ /* ignore event if generation doesn't match */ + /* other than skipping removal events, */ /* this should actually be very rare */ if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen)) return; @@ -590,7 +638,11 @@ iouring_poll (EV_P_ ev_tstamp timeout) { /* if we have events, no need for extra syscalls, but we might have to queue events */ - if (iouring_handle_cq (EV_A)) + /* we also clar the timeout if there are outstanding fdchanges */ + /* the latter should only happen if both the sq and cq are full, most likely */ + /* because we have a lot of event sources that immediately complete */ + /* TODO: fdchacngecnt is always 0 because fd_reify does not have two buffers yet */ + if (iouring_handle_cq (EV_A) || fdchangecnt) timeout = EV_TS_CONST (0.); else /* no events, so maybe wait for some */ @@ -599,19 +651,13 @@ /* only enter the kernel if we have something to submit, or we need to wait */ if (timeout || iouring_to_submit) { - int res; - - EV_RELEASE_CB; - - res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, - timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0); - iouring_to_submit = 0; - - EV_ACQUIRE_CB; + int res = iouring_enter (EV_A_ timeout); if (ecb_expect_false (res < 0)) if (errno == EINTR) /* ignore */; + else if (errno == EBUSY) + /* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */; else ev_syserr ("(libev) iouring setup"); else