--- libev/ev_iouring.c 2019/12/20 05:20:23 1.6 +++ libev/ev_iouring.c 2019/12/27 21:56:29 1.11 @@ -49,8 +49,9 @@ * e) why 3 mmaps instead of one? one would be more space-efficient, * and I can't see what benefit three would have (other than being * somehow resizable/relocatable, but that's apparently not possible). + * (FIXME: newer kernels can use 2 mmaps only, need to look into this). * f) hmm, it's practiclaly undebuggable (gdb can't access the memory, and - the bizarre way structure offsets are commuinicated makes it hard to + * the bizarre way structure offsets are communicated makes it hard to * just print the ring buffer heads, even *iff* the memory were visible * in gdb. but then, that's also ok, really. * g) well, you cannot specify a timeout when waiting for events. no, @@ -60,8 +61,10 @@ * like a ยต-optimisation by the io_uring author for his personal * applications, to the detriment of everybody else who just wants * an event loop. but, umm, ok, if that's all, it could be worse. + * (FIXME: jens mentioned timeout commands, need to investigate) * h) there is a hardcoded limit of 4096 outstanding events. okay, * at least there is no arbitrary low system-wide limit... + * (FIXME: apparently, this was increased to 32768 in later kernels( * i) unlike linux aio, you *can* register more then the limit * of fd events, and the kernel will "gracefully" signal an * overflow, after which you could destroy and recreate the kernel @@ -69,6 +72,7 @@ * totally insane, but kind of questions the point a high * performance I/O framework when it doesn't really work * under stress. + * (FIXME: iouring should no longer drop events, need to investigate) * j) but, oh my! is has exactly the same bugs as the linux aio backend, * where some undocumented poll combinations just fail. * so we need epoll AGAIN as a fallback. AGAIN! epoll!! and of course, @@ -78,6 +82,10 @@ * or might not get fixed (do I hold my breath?). */ +/* TODO: use internal TIMEOUT */ +/* TODO: take advantage of single mmap, NODROP etc. */ +/* TODO: resize cq/sq size independently */ + #include #include #include @@ -98,7 +106,10 @@ __u8 flags; __u16 ioprio; __s32 fd; - __u64 off; + union { + __u64 off; + __u64 addr2; + }; __u64 addr; __u32 len; union { @@ -107,6 +118,11 @@ __u16 poll_events; __u32 sync_range_flags; __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; }; __u64 user_data; union { @@ -153,7 +169,8 @@ __u32 flags; __u32 sq_thread_cpu; __u32 sq_thread_idle; - __u32 resv[5]; + __u32 features; + __u32 resv[4]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; @@ -167,6 +184,10 @@ #define IORING_OFF_CQ_RING 0x08000000ULL #define IORING_OFF_SQES 0x10000000ULL +#define IORING_FEAT_SINGLE_MMAP 0x1 +#define IORING_FEAT_NODROP 0x2 +#define IORING_FEAT_SUBMIT_STABLE 0x4 + inline_size int evsys_io_uring_setup (unsigned entries, struct io_uring_params *params) @@ -238,12 +259,6 @@ iouring_tfd_to = EV_TSTAMP_HUGE; } -static void -iouring_epoll_cb (EV_P_ struct ev_io *w, int revents) -{ - epoll_poll (EV_A_ 0); -} - /* called for full and partial cleanup */ ecb_cold static int @@ -256,8 +271,11 @@ if (iouring_cq_ring != MAP_FAILED) munmap (iouring_cq_ring, iouring_cq_ring_size); if (iouring_sqes != MAP_FAILED) munmap (iouring_sqes , iouring_sqes_size ); - if (ev_is_active (&iouring_epoll_w)) ev_ref (EV_A); ev_io_stop (EV_A_ &iouring_epoll_w); - if (ev_is_active (&iouring_tfd_w )) ev_ref (EV_A); ev_io_stop (EV_A_ &iouring_tfd_w ); + if (ev_is_active (&iouring_tfd_w)) + { + ev_ref (EV_A); + ev_io_stop (EV_A_ &iouring_tfd_w); + } } ecb_cold @@ -283,6 +301,11 @@ if (errno != EINVAL) return -1; /* we failed */ +#if TODO + if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP)) + return -1; /* we require the above features */ +#endif + /* EINVAL: lots of possible reasons, but maybe * it is because we hit the unqueryable hardcoded size limit */ @@ -344,14 +367,7 @@ while (iouring_internal_init (EV_A) < 0) ev_syserr ("(libev) io_uring_setup"); - /* forking epoll should also effectively unregister all fds from the backend */ - epoll_fork (EV_A); - /* epoll_fork already did this. hopefully */ - /*fd_rearm_all (EV_A);*/ - - ev_io_stop (EV_A_ &iouring_epoll_w); - ev_io_set (EV_A_ &iouring_epoll_w, backend_fd, EV_READ); - ev_io_start (EV_A_ &iouring_epoll_w); + fd_rearm_all (EV_A); ev_io_stop (EV_A_ &iouring_tfd_w); ev_io_set (EV_A_ &iouring_tfd_w, iouring_tfd, EV_READ); @@ -363,15 +379,6 @@ static void iouring_modify (EV_P_ int fd, int oev, int nev) { - if (ecb_expect_false (anfds [fd].eflags)) - { - /* we handed this fd over to epoll, so undo this first */ - /* we do it manually because the optimisations on epoll_modify won't do us any good */ - epoll_ctl (iouring_fd, EPOLL_CTL_DEL, fd, 0); - anfds [fd].eflags = 0; - oev = 0; - } - if (oev) { /* we assume the sqe's are all "properly" initialised */ @@ -430,8 +437,9 @@ int res = cqe->res; /* ignore fd removal events, if there are any. TODO: verify */ + /* TODO: yes, this triggers */ if (cqe->user_data == (__u64)-1) - abort ();//D + return; assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); @@ -448,17 +456,10 @@ if (ecb_expect_false (res < 0)) { - if (res == -EINVAL) - { - /* we assume this error code means the fd/poll combination is buggy - * and fall back to epoll. - * this error code might also indicate a bug, but the kernel doesn't - * distinguish between those two conditions, so... sigh... - */ + /*TODO: EINVAL handling (was something failed with this fd)*/ + /*TODO: EBUSY happens when?*/ - epoll_modify (EV_A_ fd, 0, anfds [fd].events); - } - else if (res == -EBADF) + if (res == -EBADF) { assert (("libev: event loop rejected bad fd", res != -EBADF)); fd_kill (EV_A_ fd); @@ -494,7 +495,7 @@ /* we have two options, resize the queue (by tearing down * everything and recreating it, or living with it * and polling. - * we implement this by resizing tghe queue, and, if that fails, + * we implement this by resizing the queue, and, if that fails, * we just recreate the state on every failure, which * kind of is a very inefficient poll. * one danger is, due to the bios toward lower fds, @@ -516,12 +517,12 @@ /* we hit the kernel limit, we should fall back to something else. * we can either poll() a few times and hope for the best, * poll always, or switch to epoll. - * since we use epoll anyways, go epoll. + * TODO: is this necessary with newer kernels? */ iouring_internal_destroy (EV_A); - /* this should make it so that on return, we don'T call any uring functions */ + /* this should make it so that on return, we don't call any uring functions */ iouring_to_submit = 0; for (;;) @@ -605,9 +606,6 @@ int iouring_init (EV_P_ int flags) { - if (!epoll_init (EV_A_ 0)) - return 0; - iouring_entries = IOURING_INIT_ENTRIES; iouring_max_entries = 0; @@ -617,15 +615,8 @@ return 0; } - ev_io_init (&iouring_epoll_w, iouring_epoll_cb, backend_fd, EV_READ); - ev_set_priority (&iouring_epoll_w, EV_MAXPRI); - ev_io_init (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ); - ev_set_priority (&iouring_tfd_w, EV_MAXPRI); - - ev_io_start (EV_A_ &iouring_epoll_w); - ev_unref (EV_A); /* watcher should not keep loop alive */ - + ev_set_priority (&iouring_tfd_w, EV_MINPRI); ev_io_start (EV_A_ &iouring_tfd_w); ev_unref (EV_A); /* watcher should not keep loop alive */ @@ -640,6 +631,5 @@ iouring_destroy (EV_P) { iouring_internal_destroy (EV_A); - epoll_destroy (EV_A); }