… | |
… | |
228 | |
228 | |
229 | /* the submit/completion queue entries */ |
229 | /* the submit/completion queue entries */ |
230 | #define EV_SQES ((struct io_uring_sqe *) iouring_sqes) |
230 | #define EV_SQES ((struct io_uring_sqe *) iouring_sqes) |
231 | #define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) |
231 | #define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) |
232 | |
232 | |
233 | /* TODO: this is not enough, we might have to reap events */ |
233 | inline_speed |
234 | /* TODO: but we can't, as that will re-arm events, causing */ |
234 | int |
235 | /* TODO: an endless loop in fd_reify */ |
|
|
236 | static int |
|
|
237 | iouring_enter (EV_P_ ev_tstamp timeout) |
235 | iouring_enter (EV_P_ ev_tstamp timeout) |
238 | { |
236 | { |
239 | int res; |
237 | int res; |
240 | |
238 | |
241 | EV_RELEASE_CB; |
239 | EV_RELEASE_CB; |
… | |
… | |
249 | |
247 | |
250 | EV_ACQUIRE_CB; |
248 | EV_ACQUIRE_CB; |
251 | |
249 | |
252 | return res; |
250 | return res; |
253 | } |
251 | } |
|
|
252 | |
|
|
253 | /* TODO: can we move things around so we don't need this forward-reference? */ |
|
|
254 | static void |
|
|
255 | iouring_poll (EV_P_ ev_tstamp timeout); |
254 | |
256 | |
255 | static |
257 | static |
256 | struct io_uring_sqe * |
258 | struct io_uring_sqe * |
257 | iouring_sqe_get (EV_P) |
259 | iouring_sqe_get (EV_P) |
258 | { |
260 | { |
|
|
261 | unsigned tail; |
|
|
262 | |
|
|
263 | for (;;) |
|
|
264 | { |
259 | unsigned tail = EV_SQ_VAR (tail); |
265 | tail = EV_SQ_VAR (tail); |
260 | |
266 | |
261 | while (ecb_expect_false (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries))) |
267 | if (ecb_expect_true (tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries))) |
262 | { |
268 | break; /* whats the problem, we have free sqes */ |
263 | /* queue full, need to flush */ |
|
|
264 | |
269 | |
|
|
270 | /* queue full, need to flush and possibly handle some events */ |
|
|
271 | |
|
|
272 | #if EV_FEATURE_CODE |
|
|
273 | /* first we ask the kernel nicely, most often this frees up some sqes */ |
265 | int res = iouring_enter (EV_A_ EV_TS_CONST (0.)); |
274 | int res = iouring_enter (EV_A_ EV_TS_CONST (0.)); |
266 | |
275 | |
267 | /* io_uring_enter might fail with EBUSY and won't submit anything */ |
276 | ECB_MEMORY_FENCE_ACQUIRE; /* better safe than sorry */ |
268 | /* unfortunately, we can't handle this at the moment */ |
|
|
269 | |
277 | |
270 | if (res < 0 && errno == EBUSY) |
278 | if (res >= 0) |
271 | //TODO |
279 | continue; /* yes, it worked, try again */ |
272 | ev_syserr ("(libev) io_uring_enter could not clear sq"); |
280 | #endif |
273 | else |
281 | |
274 | break; |
282 | /* some problem, possibly EBUSY - do the full poll and let it handle any issues */ |
275 | |
283 | |
|
|
284 | iouring_poll (EV_A_ EV_TS_CONST (0.)); |
276 | /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE */ |
285 | /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE for us */ |
277 | } |
286 | } |
278 | |
287 | |
279 | /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/ |
288 | /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/ |
280 | |
289 | |
281 | return EV_SQES + (tail & EV_SQ_VAR (ring_mask)); |
290 | return EV_SQES + (tail & EV_SQ_VAR (ring_mask)); |
… | |
… | |
350 | |
359 | |
351 | if (errno != EINVAL) |
360 | if (errno != EINVAL) |
352 | return -1; /* we failed */ |
361 | return -1; /* we failed */ |
353 | |
362 | |
354 | #if TODO |
363 | #if TODO |
355 | if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP)) |
364 | if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE)) |
356 | return -1; /* we require the above features */ |
365 | return -1; /* we require the above features */ |
357 | #endif |
366 | #endif |
358 | |
367 | |
359 | /* EINVAL: lots of possible reasons, but maybe |
368 | /* EINVAL: lots of possible reasons, but maybe |
360 | * it is because we hit the unqueryable hardcoded size limit |
369 | * it is because we hit the unqueryable hardcoded size limit |
… | |
… | |
438 | /* Jens Axboe notified me that user_data is not what is documented, but is |
447 | /* Jens Axboe notified me that user_data is not what is documented, but is |
439 | * some kind of unique ID that has to match, otherwise the request cannot |
448 | * some kind of unique ID that has to match, otherwise the request cannot |
440 | * be removed. Since we don't *really* have that, we pass in the old |
449 | * be removed. Since we don't *really* have that, we pass in the old |
441 | * generation counter - if that fails, too bad, it will hopefully be removed |
450 | * generation counter - if that fails, too bad, it will hopefully be removed |
442 | * at close time and then be ignored. */ |
451 | * at close time and then be ignored. */ |
443 | sqe->user_data = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32); |
452 | sqe->addr = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32); |
|
|
453 | sqe->user_data = (uint64_t)-1; |
444 | iouring_sqe_submit (EV_A_ sqe); |
454 | iouring_sqe_submit (EV_A_ sqe); |
445 | |
455 | |
446 | /* increment generation counter to avoid handling old events */ |
456 | /* increment generation counter to avoid handling old events */ |
447 | ++anfds [fd].egen; |
457 | ++anfds [fd].egen; |
448 | } |
458 | } |
… | |
… | |
488 | iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe) |
498 | iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe) |
489 | { |
499 | { |
490 | int fd = cqe->user_data & 0xffffffffU; |
500 | int fd = cqe->user_data & 0xffffffffU; |
491 | uint32_t gen = cqe->user_data >> 32; |
501 | uint32_t gen = cqe->user_data >> 32; |
492 | int res = cqe->res; |
502 | int res = cqe->res; |
|
|
503 | |
|
|
504 | /* user_data -1 is a remove that we are not atm. interested in */ |
|
|
505 | if (cqe->user_data == (uint64_t)-1) |
|
|
506 | return; |
493 | |
507 | |
494 | assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
508 | assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
495 | |
509 | |
496 | /* documentation lies, of course. the result value is NOT like |
510 | /* documentation lies, of course. the result value is NOT like |
497 | * normal syscalls, but like linux raw syscalls, i.e. negative |
511 | * normal syscalls, but like linux raw syscalls, i.e. negative |
… | |
… | |
622 | |
636 | |
623 | static void |
637 | static void |
624 | iouring_poll (EV_P_ ev_tstamp timeout) |
638 | iouring_poll (EV_P_ ev_tstamp timeout) |
625 | { |
639 | { |
626 | /* if we have events, no need for extra syscalls, but we might have to queue events */ |
640 | /* if we have events, no need for extra syscalls, but we might have to queue events */ |
|
|
641 | /* we also clar the timeout if there are outstanding fdchanges */ |
|
|
642 | /* the latter should only happen if both the sq and cq are full, most likely */ |
|
|
643 | /* because we have a lot of event sources that immediately complete */ |
|
|
644 | /* TODO: fdchacngecnt is always 0 because fd_reify does not have two buffers yet */ |
627 | if (iouring_handle_cq (EV_A)) |
645 | if (iouring_handle_cq (EV_A) || fdchangecnt) |
628 | timeout = EV_TS_CONST (0.); |
646 | timeout = EV_TS_CONST (0.); |
629 | else |
647 | else |
630 | /* no events, so maybe wait for some */ |
648 | /* no events, so maybe wait for some */ |
631 | iouring_tfd_update (EV_A_ timeout); |
649 | iouring_tfd_update (EV_A_ timeout); |
632 | |
650 | |