… | |
… | |
228 | |
228 | |
229 | /* the submit/completion queue entries */ |
229 | /* the submit/completion queue entries */ |
230 | #define EV_SQES ((struct io_uring_sqe *) iouring_sqes) |
230 | #define EV_SQES ((struct io_uring_sqe *) iouring_sqes) |
231 | #define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) |
231 | #define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) |
232 | |
232 | |
|
|
233 | /* TODO: this is not enough, we might have to reap events */ |
|
|
234 | /* TODO: but we can't, as that will re-arm events, causing */ |
|
|
235 | /* TODO: an endless loop in fd_reify */ |
|
|
236 | static int |
|
|
237 | iouring_enter (EV_P_ ev_tstamp timeout) |
|
|
238 | { |
|
|
239 | int res; |
|
|
240 | |
|
|
241 | EV_RELEASE_CB; |
|
|
242 | |
|
|
243 | res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, |
|
|
244 | timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0); |
|
|
245 | |
|
|
246 | assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit))); |
|
|
247 | |
|
|
248 | iouring_to_submit = 0; |
|
|
249 | |
|
|
250 | EV_ACQUIRE_CB; |
|
|
251 | |
|
|
252 | return res; |
|
|
253 | } |
|
|
254 | |
233 | static |
255 | static |
234 | struct io_uring_sqe * |
256 | struct io_uring_sqe * |
235 | iouring_sqe_get (EV_P) |
257 | iouring_sqe_get (EV_P) |
236 | { |
258 | { |
237 | unsigned tail = EV_SQ_VAR (tail); |
259 | unsigned tail = EV_SQ_VAR (tail); |
238 | |
260 | |
239 | if (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries)) |
261 | while (ecb_expect_false (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries))) |
240 | { |
262 | { |
241 | /* queue full, flush */ |
263 | /* queue full, need to flush */ |
242 | evsys_io_uring_enter (iouring_fd, iouring_to_submit, 0, 0, 0, 0); |
|
|
243 | iouring_to_submit = 0; |
|
|
244 | } |
|
|
245 | |
264 | |
|
|
265 | int res = iouring_enter (EV_A_ EV_TS_CONST (0.)); |
|
|
266 | |
|
|
267 | /* io_uring_enter might fail with EBUSY and won't submit anything */ |
|
|
268 | /* unfortunately, we can't handle this at the moment */ |
|
|
269 | |
|
|
270 | if (res < 0 && errno == EBUSY) |
|
|
271 | /* the sane thing might be to resize, but we can't */ |
|
|
272 | //TODO |
|
|
273 | ev_syserr ("(libev) io_uring_enter could not clear sq"); |
|
|
274 | else |
|
|
275 | break; |
|
|
276 | |
|
|
277 | /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE */ |
|
|
278 | } |
|
|
279 | |
246 | assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries))); |
280 | /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/ |
247 | |
281 | |
248 | return EV_SQES + (tail & EV_SQ_VAR (ring_mask)); |
282 | return EV_SQES + (tail & EV_SQ_VAR (ring_mask)); |
249 | } |
283 | } |
250 | |
284 | |
251 | inline_size |
285 | inline_size |
… | |
… | |
400 | { |
434 | { |
401 | /* we assume the sqe's are all "properly" initialised */ |
435 | /* we assume the sqe's are all "properly" initialised */ |
402 | struct io_uring_sqe *sqe = iouring_sqe_get (EV_A); |
436 | struct io_uring_sqe *sqe = iouring_sqe_get (EV_A); |
403 | sqe->opcode = IORING_OP_POLL_REMOVE; |
437 | sqe->opcode = IORING_OP_POLL_REMOVE; |
404 | sqe->fd = fd; |
438 | sqe->fd = fd; |
|
|
439 | /* Jens Axboe notified me that user_data is not what is documented, but is |
|
|
440 | * some kind of unique ID that has to match, otherwise the request cannot |
|
|
441 | * be removed. Since we don't *really* have that, we pass in the old |
|
|
442 | * generation counter - if that fails, too bad, it will hopefully be removed |
|
|
443 | * at close time and then be ignored. */ |
|
|
444 | sqe->addr = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32); |
405 | sqe->user_data = -1; |
445 | sqe->user_data = (uint64_t)-1; |
406 | iouring_sqe_submit (EV_A_ sqe); |
446 | iouring_sqe_submit (EV_A_ sqe); |
407 | |
447 | |
408 | /* increment generation counter to avoid handling old events */ |
448 | /* increment generation counter to avoid handling old events */ |
409 | ++anfds [fd].egen; |
449 | ++anfds [fd].egen; |
410 | } |
450 | } |
… | |
… | |
451 | { |
491 | { |
452 | int fd = cqe->user_data & 0xffffffffU; |
492 | int fd = cqe->user_data & 0xffffffffU; |
453 | uint32_t gen = cqe->user_data >> 32; |
493 | uint32_t gen = cqe->user_data >> 32; |
454 | int res = cqe->res; |
494 | int res = cqe->res; |
455 | |
495 | |
456 | /* ignore fd removal events, if there are any. TODO: verify */ |
496 | /* user_data -1 is a remove that we are not atm. interested in */ |
457 | /* TODO: yes, this triggers */ |
|
|
458 | if (cqe->user_data == (__u64)-1) |
497 | if (cqe->user_data == (uint64_t)-1) |
459 | return; |
498 | return; |
460 | |
499 | |
461 | assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
500 | assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
462 | |
501 | |
463 | /* documentation lies, of course. the result value is NOT like |
502 | /* documentation lies, of course. the result value is NOT like |
… | |
… | |
465 | * error numbers. fortunate, as otherwise there would be no way |
504 | * error numbers. fortunate, as otherwise there would be no way |
466 | * to get error codes at all. still, why not document this? |
505 | * to get error codes at all. still, why not document this? |
467 | */ |
506 | */ |
468 | |
507 | |
469 | /* ignore event if generation doesn't match */ |
508 | /* ignore event if generation doesn't match */ |
|
|
509 | /* other than skipping removal events, */ |
470 | /* this should actually be very rare */ |
510 | /* this should actually be very rare */ |
471 | if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen)) |
511 | if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen)) |
472 | return; |
512 | return; |
473 | |
513 | |
474 | if (ecb_expect_false (res < 0)) |
514 | if (ecb_expect_false (res < 0)) |
… | |
… | |
597 | iouring_tfd_update (EV_A_ timeout); |
637 | iouring_tfd_update (EV_A_ timeout); |
598 | |
638 | |
599 | /* only enter the kernel if we have something to submit, or we need to wait */ |
639 | /* only enter the kernel if we have something to submit, or we need to wait */ |
600 | if (timeout || iouring_to_submit) |
640 | if (timeout || iouring_to_submit) |
601 | { |
641 | { |
602 | int res; |
642 | int res = iouring_enter (EV_A_ timeout); |
603 | |
|
|
604 | EV_RELEASE_CB; |
|
|
605 | |
|
|
606 | res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, |
|
|
607 | timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0); |
|
|
608 | iouring_to_submit = 0; |
|
|
609 | |
|
|
610 | EV_ACQUIRE_CB; |
|
|
611 | |
643 | |
612 | if (ecb_expect_false (res < 0)) |
644 | if (ecb_expect_false (res < 0)) |
613 | if (errno == EINTR) |
645 | if (errno == EINTR) |
614 | /* ignore */; |
646 | /* ignore */; |
|
|
647 | else if (errno == EBUSY) |
|
|
648 | /* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */; |
615 | else |
649 | else |
616 | ev_syserr ("(libev) iouring setup"); |
650 | ev_syserr ("(libev) iouring setup"); |
617 | else |
651 | else |
618 | iouring_handle_cq (EV_A); |
652 | iouring_handle_cq (EV_A); |
619 | } |
653 | } |