… | |
… | |
56 | * POLLOUT|POLLIN, but polling for POLLIN fails. just great, |
56 | * POLLOUT|POLLIN, but polling for POLLIN fails. just great, |
57 | * so we have to fall back to something else (hello, epoll), |
57 | * so we have to fall back to something else (hello, epoll), |
58 | * but at least the fallback can be slow, because these are |
58 | * but at least the fallback can be slow, because these are |
59 | * exceptional cases, right? |
59 | * exceptional cases, right? |
60 | * d) hmm, you have to tell the kernel the maximum number of watchers |
60 | * d) hmm, you have to tell the kernel the maximum number of watchers |
61 | * you want to queue when initialiasing the aio context. but of |
61 | * you want to queue when initialising the aio context. but of |
62 | * course the real limit is magically calculated in the kernel, and |
62 | * course the real limit is magically calculated in the kernel, and |
63 | * is often higher then we asked for. so we just have to destroy |
63 | * is often higher then we asked for. so we just have to destroy |
64 | * the aio context and re-create it a bit larger if we hit the limit. |
64 | * the aio context and re-create it a bit larger if we hit the limit. |
65 | * (starts to remind you of epoll? well, it's a bit more deterministic |
65 | * (starts to remind you of epoll? well, it's a bit more deterministic |
66 | * and less gambling, but still ugly as hell). |
66 | * and less gambling, but still ugly as hell). |
… | |
… | |
68 | * limit. or the kernel simply doesn't want to handle your watchers. |
68 | * limit. or the kernel simply doesn't want to handle your watchers. |
69 | * what the fuck do we do then? you guessed it, in the middle |
69 | * what the fuck do we do then? you guessed it, in the middle |
70 | * of event handling we have to switch to 100% epoll polling. and |
70 | * of event handling we have to switch to 100% epoll polling. and |
71 | * that better is as fast as normal epoll polling, so you practically |
71 | * that better is as fast as normal epoll polling, so you practically |
72 | * have to use the normal epoll backend with all its quirks. |
72 | * have to use the normal epoll backend with all its quirks. |
73 | * f) end result of this trainwreck: it inherits all the disadvantages |
73 | * f) end result of this train wreck: it inherits all the disadvantages |
74 | * from epoll, while adding a number on its own. why even bother to use |
74 | * from epoll, while adding a number on its own. why even bother to use |
75 | * it? because if conditions are right and your fds are supported and you |
75 | * it? because if conditions are right and your fds are supported and you |
76 | * don't hit a limit, this backend is actually faster, doesn't gamble with |
76 | * don't hit a limit, this backend is actually faster, doesn't gamble with |
77 | * your fds, batches watchers and events and doesn't require costly state |
77 | * your fds, batches watchers and events and doesn't require costly state |
78 | * recreates. well, until it does. |
78 | * recreates. well, until it does. |
79 | * g) all of this makes this backend use almost twice as much code as epoll. |
79 | * g) all of this makes this backend use almost twice as much code as epoll. |
80 | * which in turn uses twice as much code as poll. and thats not counting |
80 | * which in turn uses twice as much code as poll. and that#s not counting |
81 | * the fact that this backend also depends on the epoll backend, making |
81 | * the fact that this backend also depends on the epoll backend, making |
82 | * it three times as much code as poll, or kqueue. |
82 | * it three times as much code as poll, or kqueue. |
83 | * h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now |
83 | * h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now |
84 | * it's clear that whetaver linux comes up with is far, far, far worse. |
84 | * it's clear that whatever linux comes up with is far, far, far worse. |
85 | */ |
85 | */ |
86 | |
86 | |
87 | #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ |
87 | #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ |
88 | #include <poll.h> |
88 | #include <poll.h> |
89 | #include <linux/aio_abi.h> |
89 | #include <linux/aio_abi.h> |
… | |
… | |
190 | requests = requests / one_page * one_page + first_page; |
190 | requests = requests / one_page * one_page + first_page; |
191 | |
191 | |
192 | return requests; |
192 | return requests; |
193 | } |
193 | } |
194 | |
194 | |
195 | /* we use out own wrapper structure in acse we ever want to do something "clever" */ |
195 | /* we use out own wrapper structure in case we ever want to do something "clever" */ |
196 | typedef struct aniocb |
196 | typedef struct aniocb |
197 | { |
197 | { |
198 | struct iocb io; |
198 | struct iocb io; |
199 | /*int inuse;*/ |
199 | /*int inuse;*/ |
200 | } *ANIOCBP; |
200 | } *ANIOCBP; |
… | |
… | |
203 | void |
203 | void |
204 | linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count) |
204 | linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count) |
205 | { |
205 | { |
206 | while (count--) |
206 | while (count--) |
207 | { |
207 | { |
208 | /* TODO: quite the overhead to allocate every iocb separately, maybe use our own alocator? */ |
208 | /* TODO: quite the overhead to allocate every iocb separately, maybe use our own allocator? */ |
209 | ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb)); |
209 | ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb)); |
210 | |
210 | |
211 | /* full zero initialise is probably not required at the moment, but |
211 | /* full zero initialise is probably not required at the moment, but |
212 | * this is not well documented, so we better do it. |
212 | * this is not well documented, so we better do it. |
213 | */ |
213 | */ |
… | |
… | |
238 | ANIOCBP iocb = linuxaio_iocbps [fd]; |
238 | ANIOCBP iocb = linuxaio_iocbps [fd]; |
239 | |
239 | |
240 | if (iocb->io.aio_reqprio < 0) |
240 | if (iocb->io.aio_reqprio < 0) |
241 | { |
241 | { |
242 | /* we handed this fd over to epoll, so undo this first */ |
242 | /* we handed this fd over to epoll, so undo this first */ |
243 | /* we do it manually becvause the optimisations on epoll_modfy won't do us any good */ |
243 | /* we do it manually because the optimisations on epoll_modfy won't do us any good */ |
244 | epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); |
244 | epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); |
245 | iocb->io.aio_reqprio = 0; |
245 | iocb->io.aio_reqprio = 0; |
246 | } |
246 | } |
247 | |
247 | |
248 | if (iocb->io.aio_buf) |
248 | if (iocb->io.aio_buf) |
… | |
… | |
301 | --nr; |
301 | --nr; |
302 | ++ev; |
302 | ++ev; |
303 | } |
303 | } |
304 | } |
304 | } |
305 | |
305 | |
306 | /* get any events from ringbuffer, return true if any were handled */ |
306 | /* get any events from ring buffer, return true if any were handled */ |
307 | static int |
307 | static int |
308 | linuxaio_get_events_from_ring (EV_P) |
308 | linuxaio_get_events_from_ring (EV_P) |
309 | { |
309 | { |
310 | struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; |
310 | struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; |
311 | |
311 | |
… | |
… | |
397 | |
397 | |
398 | /* first phase: submit new iocbs */ |
398 | /* first phase: submit new iocbs */ |
399 | |
399 | |
400 | /* io_submit might return less than the requested number of iocbs */ |
400 | /* io_submit might return less than the requested number of iocbs */ |
401 | /* this is, afaics, only because of errors, but we go by the book and use a loop, */ |
401 | /* this is, afaics, only because of errors, but we go by the book and use a loop, */ |
402 | /* which allows us to pinpoint the errornous iocb */ |
402 | /* which allows us to pinpoint the erroneous iocb */ |
403 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
403 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
404 | { |
404 | { |
405 | int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
405 | int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
406 | |
406 | |
407 | if (expect_false (res < 0)) |
407 | if (expect_false (res < 0)) |
… | |
… | |
421 | res = 1; /* skip this iocb - another iocb, another chance */ |
421 | res = 1; /* skip this iocb - another iocb, another chance */ |
422 | } |
422 | } |
423 | else if (errno == EAGAIN) |
423 | else if (errno == EAGAIN) |
424 | { |
424 | { |
425 | /* This happens when the ring buffer is full, or some other shit we |
425 | /* This happens when the ring buffer is full, or some other shit we |
426 | * dont' know and isn't documented. Most likely because we have too |
426 | * don't know and isn't documented. Most likely because we have too |
427 | * many requests and linux aio can't be assed to handle them. |
427 | * many requests and linux aio can't be assed to handle them. |
428 | * In this case, we try to allocate a larger ring buffer, freeing |
428 | * In this case, we try to allocate a larger ring buffer, freeing |
429 | * ours first. This might fail, in which case we have to fall back to 100% |
429 | * ours first. This might fail, in which case we have to fall back to 100% |
430 | * epoll. |
430 | * epoll. |
431 | * God, how I hate linux not getting its act together. Ever. |
431 | * God, how I hate linux not getting its act together. Ever. |
… | |
… | |
480 | int |
480 | int |
481 | linuxaio_init (EV_P_ int flags) |
481 | linuxaio_init (EV_P_ int flags) |
482 | { |
482 | { |
483 | /* would be great to have a nice test for IOCB_CMD_POLL instead */ |
483 | /* would be great to have a nice test for IOCB_CMD_POLL instead */ |
484 | /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ |
484 | /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ |
485 | /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work */ |
485 | /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work, and we need that */ |
486 | if (ev_linux_version () < 0x041300) |
486 | if (ev_linux_version () < 0x041300) |
487 | return 0; |
487 | return 0; |
488 | |
488 | |
489 | if (!epoll_init (EV_A_ 0)) |
489 | if (!epoll_init (EV_A_ 0)) |
490 | return 0; |
490 | return 0; |