… | |
… | |
47 | * without any syscalls. what's not to like? |
47 | * without any syscalls. what's not to like? |
48 | * d) ok, it's vastly more complex, but that's ok, really. |
48 | * d) ok, it's vastly more complex, but that's ok, really. |
49 | * e) why 3 mmaps instead of one? one would be more space-efficient, |
49 | * e) why 3 mmaps instead of one? one would be more space-efficient, |
50 | * and I can't see what benefit three would have (other than being |
50 | * and I can't see what benefit three would have (other than being |
51 | * somehow resizable/relocatable, but that's apparently not possible). |
51 | * somehow resizable/relocatable, but that's apparently not possible). |
|
|
52 | * (FIXME: newer kernels can use 2 mmaps only, need to look into this). |
52 | * f) hmm, it's practiclaly undebuggable (gdb can't access the memory, and |
53 | * f) hmm, it's practiclaly undebuggable (gdb can't access the memory, and |
53 | the bizarre way structure offsets are commuinicated makes it hard to |
54 | * the bizarre way structure offsets are communicated makes it hard to |
54 | * just print the ring buffer heads, even *iff* the memory were visible |
55 | * just print the ring buffer heads, even *iff* the memory were visible |
55 | * in gdb. but then, that's also ok, really. |
56 | * in gdb. but then, that's also ok, really. |
56 | * g) well, you cannot specify a timeout when waiting for events. no, |
57 | * g) well, you cannot specify a timeout when waiting for events. no, |
57 | * seriously, the interface doesn't support a timeout. never seen _that_ |
58 | * seriously, the interface doesn't support a timeout. never seen _that_ |
58 | * before. sure, you can use a timerfd, but that's another syscall |
59 | * before. sure, you can use a timerfd, but that's another syscall |
59 | * you could have avoided. overall, this bizarre omission smells |
60 | * you could have avoided. overall, this bizarre omission smells |
60 | * like a µ-optimisation by the io_uring author for his personal |
61 | * like a µ-optimisation by the io_uring author for his personal |
61 | * applications, to the detriment of everybody else who just wants |
62 | * applications, to the detriment of everybody else who just wants |
62 | * an event loop. but, umm, ok, if that's all, it could be worse. |
63 | * an event loop. but, umm, ok, if that's all, it could be worse. |
|
|
64 | * (FIXME: jens mentioned timeout commands, need to investigate) |
63 | * h) there is a hardcoded limit of 4096 outstanding events. okay, |
65 | * h) there is a hardcoded limit of 4096 outstanding events. okay, |
64 | * at least there is no arbitrary low system-wide limit... |
66 | * at least there is no arbitrary low system-wide limit... |
|
|
67 | * (FIXME: apparently, this was increased to 32768 in later kernels( |
65 | * i) unlike linux aio, you *can* register more then the limit |
68 | * i) unlike linux aio, you *can* register more then the limit |
66 | * of fd events, and the kernel will "gracefully" signal an |
69 | * of fd events, and the kernel will "gracefully" signal an |
67 | * overflow, after which you could destroy and recreate the kernel |
70 | * overflow, after which you could destroy and recreate the kernel |
68 | * state, a bit bigger, or fall back to e.g. poll. thats not |
71 | * state, a bit bigger, or fall back to e.g. poll. thats not |
69 | * totally insane, but kind of questions the point a high |
72 | * totally insane, but kind of questions the point a high |
70 | * performance I/O framework when it doesn't really work |
73 | * performance I/O framework when it doesn't really work |
71 | * under stress. |
74 | * under stress. |
|
|
75 | * (FIXME: iouring should no longer drop events, need to investigate) |
72 | * j) but, oh my! is has exactly the same bugs as the linux aio backend, |
76 | * j) but, oh my! is has exactly the same bugs as the linux aio backend, |
73 | * where some undocumented poll combinations just fail. |
77 | * where some undocumented poll combinations just fail. |
74 | * so we need epoll AGAIN as a fallback. AGAIN! epoll!! and of course, |
78 | * so we need epoll AGAIN as a fallback. AGAIN! epoll!! and of course, |
75 | * this is completely undocumented, have I mantioned this already? |
79 | * this is completely undocumented, have I mantioned this already? |
76 | * k) overall, the *API* itself is, I dare to say, not a total trainwreck. |
80 | * k) overall, the *API* itself is, I dare to say, not a total trainwreck. |
… | |
… | |
361 | /*****************************************************************************/ |
365 | /*****************************************************************************/ |
362 | |
366 | |
363 | static void |
367 | static void |
364 | iouring_modify (EV_P_ int fd, int oev, int nev) |
368 | iouring_modify (EV_P_ int fd, int oev, int nev) |
365 | { |
369 | { |
366 | fprintf (stderr,"modify %d (%d, %d) %d\n", fd, oev,nev, anfds[fd].eflags);//D |
|
|
367 | if (ecb_expect_false (anfds [fd].eflags)) |
370 | if (ecb_expect_false (anfds [fd].eflags)) |
368 | { |
371 | { |
369 | /* we handed this fd over to epoll, so undo this first */ |
372 | /* we handed this fd over to epoll, so undo this first */ |
370 | /* we do it manually because the optimisations on epoll_modify won't do us any good */ |
373 | /* we do it manually because the optimisations on epoll_modify won't do us any good */ |
371 | epoll_ctl (iouring_fd, EPOLL_CTL_DEL, fd, 0); |
374 | epoll_ctl (iouring_fd, EPOLL_CTL_DEL, fd, 0); |
… | |
… | |
471 | } |
474 | } |
472 | |
475 | |
473 | return; |
476 | return; |
474 | } |
477 | } |
475 | |
478 | |
476 | fprintf (stderr, "fd %d event, rearm\n", fd);//D |
|
|
477 | |
|
|
478 | /* feed events, we do not expect or handle POLLNVAL */ |
479 | /* feed events, we do not expect or handle POLLNVAL */ |
479 | fd_event ( |
480 | fd_event ( |
480 | EV_A_ |
481 | EV_A_ |
481 | fd, |
482 | fd, |
482 | (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) |
483 | (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) |
… | |
… | |
574 | static void |
575 | static void |
575 | iouring_poll (EV_P_ ev_tstamp timeout) |
576 | iouring_poll (EV_P_ ev_tstamp timeout) |
576 | { |
577 | { |
577 | /* if we have events, no need for extra syscalls, but we might have to queue events */ |
578 | /* if we have events, no need for extra syscalls, but we might have to queue events */ |
578 | if (iouring_handle_cq (EV_A)) |
579 | if (iouring_handle_cq (EV_A)) |
579 | timeout = 0.; |
580 | timeout = EV_TS_CONST (0.); |
580 | else |
581 | else |
581 | /* no events, so maybe wait for some */ |
582 | /* no events, so maybe wait for some */ |
582 | iouring_tfd_update (EV_A_ timeout); |
583 | iouring_tfd_update (EV_A_ timeout); |
583 | |
584 | |
584 | /* only enter the kernel if we have somethign to submit, or we need to wait */ |
585 | /* only enter the kernel if we have something to submit, or we need to wait */ |
585 | if (timeout || iouring_to_submit) |
586 | if (timeout || iouring_to_submit) |
586 | { |
587 | { |
587 | int res; |
588 | int res; |
588 | |
589 | |
589 | EV_RELEASE_CB; |
590 | EV_RELEASE_CB; |
590 | |
591 | |
591 | res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, |
592 | res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, |
592 | timeout ? IORING_ENTER_GETEVENTS : 0, 0, 0); |
593 | timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0); |
593 | iouring_to_submit = 0; |
594 | iouring_to_submit = 0; |
594 | |
595 | |
595 | EV_ACQUIRE_CB; |
596 | EV_ACQUIRE_CB; |
596 | |
597 | |
597 | if (ecb_expect_false (res < 0)) |
598 | if (ecb_expect_false (res < 0)) |
… | |
… | |
609 | iouring_init (EV_P_ int flags) |
610 | iouring_init (EV_P_ int flags) |
610 | { |
611 | { |
611 | if (!epoll_init (EV_A_ 0)) |
612 | if (!epoll_init (EV_A_ 0)) |
612 | return 0; |
613 | return 0; |
613 | |
614 | |
|
|
615 | iouring_entries = IOURING_INIT_ENTRIES; |
|
|
616 | iouring_max_entries = 0; |
|
|
617 | |
|
|
618 | if (iouring_internal_init (EV_A) < 0) |
|
|
619 | { |
|
|
620 | iouring_internal_destroy (EV_A); |
|
|
621 | return 0; |
|
|
622 | } |
|
|
623 | |
614 | ev_io_init (EV_A_ &iouring_epoll_w, iouring_epoll_cb, backend_fd, EV_READ); |
624 | ev_io_init (&iouring_epoll_w, iouring_epoll_cb, backend_fd, EV_READ); |
615 | ev_set_priority (&iouring_epoll_w, EV_MAXPRI); |
625 | ev_set_priority (&iouring_epoll_w, EV_MAXPRI); |
616 | |
626 | |
617 | ev_io_init (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ); |
627 | ev_io_init (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ); |
618 | ev_set_priority (&iouring_tfd_w, EV_MAXPRI); |
628 | ev_set_priority (&iouring_tfd_w, EV_MAXPRI); |
619 | |
|
|
620 | iouring_entries = IOURING_INIT_ENTRIES; |
|
|
621 | iouring_max_entries = 0; |
|
|
622 | |
|
|
623 | if (iouring_internal_init (EV_A) < 0) |
|
|
624 | { |
|
|
625 | iouring_internal_destroy (EV_A); |
|
|
626 | return 0; |
|
|
627 | } |
|
|
628 | |
629 | |
629 | ev_io_start (EV_A_ &iouring_epoll_w); |
630 | ev_io_start (EV_A_ &iouring_epoll_w); |
630 | ev_unref (EV_A); /* watcher should not keep loop alive */ |
631 | ev_unref (EV_A); /* watcher should not keep loop alive */ |
631 | |
632 | |
632 | ev_io_start (EV_A_ &iouring_tfd_w); |
633 | ev_io_start (EV_A_ &iouring_tfd_w); |