… | |
… | |
35 | * and other provisions required by the GPL. If you do not delete the |
35 | * and other provisions required by the GPL. If you do not delete the |
36 | * provisions above, a recipient may use your version of this file under |
36 | * provisions above, a recipient may use your version of this file under |
37 | * either the BSD or the GPL. |
37 | * either the BSD or the GPL. |
38 | */ |
38 | */ |
39 | |
39 | |
40 | #define EPOLL_FALLBACK 1 |
40 | /* |
|
|
41 | * general notes about linux aio: |
|
|
42 | * |
|
|
43 | * a) at first, the linux aio IOCB_CMD_POLL functionality introduced in |
|
|
44 | * 4.18 looks too good to be true: both watchers and events can be |
|
|
45 | * batched, and events can even be handled in userspace using |
|
|
46 | * a ring buffer shared with the kernel. watchers can be canceled |
|
|
47 | * regardless of whether the fd has been closed. no problems with fork. |
|
|
48 | * ok, the ring buffer is 200% undocumented (there isn't even a |
|
|
49 | * header file), but otherwise, it's pure bliss! |
|
|
50 | * b) ok, watchers are one-shot, so you have to re-arm active ones |
|
|
51 | * on every iteration. so much for syscall-less event handling, |
|
|
52 | * but at least these re-arms can be batched, no big deal, right? |
|
|
53 | * c) well, linux as usual: the documentation lies to you: io_submit |
|
|
54 | * sometimes returns EINVAL because the kernel doesn't feel like |
|
|
55 | * handling your poll mask - ttys can be polled for POLLOUT, |
|
|
56 | * POLLOUT|POLLIN, but polling for POLLIN fails. just great, |
|
|
57 | * so we have to fall back to something else (hello, epoll), |
|
|
58 | * but at least the fallback can be slow, because these are |
|
|
59 | * exceptional cases, right? |
|
|
60 | * d) hmm, you have to tell the kernel the maximum number of watchers |
|
|
61 | * you want to queue when initialiasing the aio context. but of |
|
|
62 | * course the real limit is magically calculated in the kernel, and |
|
|
63 | * is often higher then we asked for. so we just have to destroy |
|
|
64 | * the aio context and re-create it a bit larger if we hit the limit. |
|
|
65 | * (starts to remind you of epoll? well, it's a bit more deterministic |
|
|
66 | * and less gambling, but still ugly as hell). |
|
|
67 | * e) that's when you find out you can also hit an arbitrary system-wide |
|
|
68 | * limit. or the kernel simply doesn't want to handle your watchers. |
|
|
69 | * what the fuck do we do then? you guessed it, in the middle |
|
|
70 | * of event handling we have to switch to 100% epoll polling. and |
|
|
71 | * that better is as fast as normal epoll polling, so you practically |
|
|
72 | * have to use the normal epoll backend with all its quirks. |
|
|
73 | * f) end result of this trainwreck: it inherits all the disadvantages |
|
|
74 | * from epoll, while adding a number on its own. why even bother to use |
|
|
75 | * it? because if conditions are right and your fds are supported and you |
|
|
76 | * don't hit a limit, this backend is actually faster, doesn't gamble with |
|
|
77 | * your fds, batches watchers and events and doesn't require costly state |
|
|
78 | * recreates. well, until it does. |
|
|
79 | * g) all of this makes this backend use almost twice as much code as epoll. |
|
|
80 | * which in turn uses twice as much code as poll. and thats not counting |
|
|
81 | * the fact that this backend also depends on the epoll backend, making |
|
|
82 | * it three times as much code as poll, or kqueue. |
|
|
83 | * h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now |
|
|
84 | * it's clear that whwetaver linux does is far, far far worse. |
|
|
85 | */ |
41 | |
86 | |
42 | #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ |
87 | #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ |
43 | #include <poll.h> |
88 | #include <poll.h> |
44 | #include <linux/aio_abi.h> |
89 | #include <linux/aio_abi.h> |
45 | |
90 | |
46 | #if EPOLL_FALLBACK |
|
|
47 | # include <sys/epoll.h> |
|
|
48 | #endif |
|
|
49 | |
|
|
50 | /* we try to fill 4kB pages exactly. |
|
|
51 | * the ring buffer header is 32 bytes, every io event is 32 bytes. |
|
|
52 | * the kernel takes the io event number, doubles it, adds 2, adds the ring buffer. |
|
|
53 | * therefore the calculation below will use "exactly" 4kB for the ring buffer |
|
|
54 | */ |
|
|
55 | #define EV_LINUXAIO_DEPTH (128 / 2 - 2 - 1) /* max. number of io events per batch */ |
|
|
56 | |
|
|
57 | /*****************************************************************************/ |
91 | /*****************************************************************************/ |
58 | /* syscall wrapdadoop - this section has the raw syscall definitions */ |
92 | /* syscall wrapdadoop - this section has the raw api/abi definitions */ |
59 | |
93 | |
60 | #include <sys/syscall.h> /* no glibc wrappers */ |
94 | #include <sys/syscall.h> /* no glibc wrappers */ |
61 | |
95 | |
62 | /* aio_abi.h is not versioned in any way, so we cannot test for its existance */ |
96 | /* aio_abi.h is not versioned in any way, so we cannot test for its existance */ |
63 | #define IOCB_CMD_POLL 5 |
97 | #define IOCB_CMD_POLL 5 |
64 | |
98 | |
65 | /* taken from linux/fs/aio.c */ |
99 | /* taken from linux/fs/aio.c. yup, that's a .c file. |
|
|
100 | * not only is this totally undocumented, not even the source code |
|
|
101 | * can tell you what the future semantics of compat_features and |
|
|
102 | * incompat_features are, or what header_length actually is for. |
|
|
103 | */ |
66 | #define AIO_RING_MAGIC 0xa10a10a1 |
104 | #define AIO_RING_MAGIC 0xa10a10a1 |
67 | #define AIO_RING_INCOMPAT_FEATURES 0 |
105 | #define AIO_RING_INCOMPAT_FEATURES 0 |
68 | struct aio_ring |
106 | struct aio_ring |
69 | { |
107 | { |
70 | unsigned id; /* kernel internal index number */ |
108 | unsigned id; /* kernel internal index number */ |
… | |
… | |
116 | } |
154 | } |
117 | |
155 | |
118 | /*****************************************************************************/ |
156 | /*****************************************************************************/ |
119 | /* actual backed implementation */ |
157 | /* actual backed implementation */ |
120 | |
158 | |
|
|
159 | ecb_cold |
|
|
160 | static int |
|
|
161 | linuxaio_nr_events (EV_P) |
|
|
162 | { |
|
|
163 | /* we start with 16 iocbs and incraese from there |
|
|
164 | * that's tiny, but the kernel has a rather low system-wide |
|
|
165 | * limit that can be reached quickly, so let's be parsimonious |
|
|
166 | * with this resource. |
|
|
167 | * Rest assured, the kernel generously rounds up small and big numbers |
|
|
168 | * in different ways (but doesn't seem to charge you for it). |
|
|
169 | * The 15 here is because the kernel usually has a power of two as aio-max-nr, |
|
|
170 | * and this helps to take advantage of that limit. |
|
|
171 | */ |
|
|
172 | |
|
|
173 | /* we try to fill 4kB pages exactly. |
|
|
174 | * the ring buffer header is 32 bytes, every io event is 32 bytes. |
|
|
175 | * the kernel takes the io requests number, doubles it, adds 2 |
|
|
176 | * and adds the ring buffer. |
|
|
177 | * the way we use this is by starting low, and then roughly doubling the |
|
|
178 | * size each time we hit a limit. |
|
|
179 | */ |
|
|
180 | |
|
|
181 | int requests = 15 << linuxaio_iteration; |
|
|
182 | int one_page = (4096 |
|
|
183 | / sizeof (struct io_event) ) / 2; /* how many fit into one page */ |
|
|
184 | int first_page = ((4096 - sizeof (struct aio_ring)) |
|
|
185 | / sizeof (struct io_event) - 2) / 2; /* how many fit into the first page */ |
|
|
186 | |
|
|
187 | /* if everything fits into one page, use count exactly */ |
|
|
188 | if (requests > first_page) |
|
|
189 | /* otherwise, round down to full pages and add the first page */ |
|
|
190 | requests = requests / one_page * one_page + first_page; |
|
|
191 | |
|
|
192 | return requests; |
|
|
193 | } |
|
|
194 | |
121 | /* we use out own wrapper structure in acse we ever want to do something "clever" */ |
195 | /* we use out own wrapper structure in acse we ever want to do something "clever" */ |
122 | typedef struct aniocb |
196 | typedef struct aniocb |
123 | { |
197 | { |
124 | struct iocb io; |
198 | struct iocb io; |
125 | /*int inuse;*/ |
199 | /*int inuse;*/ |
… | |
… | |
161 | linuxaio_modify (EV_P_ int fd, int oev, int nev) |
235 | linuxaio_modify (EV_P_ int fd, int oev, int nev) |
162 | { |
236 | { |
163 | array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp); |
237 | array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp); |
164 | ANIOCBP iocb = linuxaio_iocbps [fd]; |
238 | ANIOCBP iocb = linuxaio_iocbps [fd]; |
165 | |
239 | |
166 | #if EPOLL_FALLBACK |
|
|
167 | if (iocb->io.aio_reqprio < 0) |
240 | if (iocb->io.aio_reqprio < 0) |
168 | { |
241 | { |
|
|
242 | /* we handed this fd over to epoll, so undo this first */ |
|
|
243 | /* we do it manually becvause the optimisations on epoll_modfy won't do us any good */ |
169 | epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); |
244 | epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); |
170 | iocb->io.aio_reqprio = 0; |
245 | iocb->io.aio_reqprio = 0; |
171 | } |
246 | } |
172 | #endif |
|
|
173 | |
247 | |
174 | if (iocb->io.aio_buf) |
248 | if (iocb->io.aio_buf) |
175 | evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); /* always returns an error relevant kernels */ |
249 | /* io_cancel always returns some error on relevant kernels, but works */ |
|
|
250 | evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); |
176 | |
251 | |
177 | if (nev) |
252 | if (nev) |
178 | { |
253 | { |
179 | iocb->io.aio_buf = |
254 | iocb->io.aio_buf = |
180 | (nev & EV_READ ? POLLIN : 0) |
255 | (nev & EV_READ ? POLLIN : 0) |
… | |
… | |
186 | array_needsize (struct iocb *, linuxaio_submits, linuxaio_submitmax, linuxaio_submitcnt, array_needsize_noinit); |
261 | array_needsize (struct iocb *, linuxaio_submits, linuxaio_submitmax, linuxaio_submitcnt, array_needsize_noinit); |
187 | linuxaio_submits [linuxaio_submitcnt - 1] = &iocb->io; |
262 | linuxaio_submits [linuxaio_submitcnt - 1] = &iocb->io; |
188 | } |
263 | } |
189 | } |
264 | } |
190 | |
265 | |
191 | #if EPOLL_FALLBACK |
|
|
192 | |
|
|
193 | static void |
|
|
194 | linuxaio_rearm_epoll (EV_P_ struct iocb *iocb, int op) |
|
|
195 | { |
|
|
196 | struct epoll_event eev; |
|
|
197 | |
|
|
198 | eev.events = EPOLLONESHOT; |
|
|
199 | if (iocb->aio_buf & POLLIN ) eev.events |= EPOLLIN ; |
|
|
200 | if (iocb->aio_buf & POLLOUT) eev.events |= EPOLLOUT; |
|
|
201 | eev.data.fd = iocb->aio_fildes; |
|
|
202 | |
|
|
203 | if (epoll_ctl (backend_fd, op, iocb->aio_fildes, &eev) < 0) |
|
|
204 | ev_syserr ("(libeio) linuxaio epoll_ctl"); |
|
|
205 | } |
|
|
206 | |
|
|
207 | static void |
266 | static void |
208 | linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents) |
267 | linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents) |
209 | { |
268 | { |
210 | struct epoll_event events[16]; |
269 | epoll_poll (EV_A_ 0); |
211 | |
|
|
212 | for (;;) |
|
|
213 | { |
|
|
214 | int idx; |
|
|
215 | int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0); |
|
|
216 | |
|
|
217 | if (expect_false (res < 0)) |
|
|
218 | ev_syserr ("(libev) linuxaio epoll_wait"); |
|
|
219 | else if (!res) |
|
|
220 | break; |
|
|
221 | |
|
|
222 | for (idx = res; idx--; ) |
|
|
223 | { |
|
|
224 | int fd = events [idx].data.fd; |
|
|
225 | uint32_t ev = events [idx].events; |
|
|
226 | |
|
|
227 | assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
|
|
228 | |
|
|
229 | linuxaio_rearm_epoll (EV_A_ &linuxaio_iocbps [fd]->io, EPOLL_CTL_MOD); |
|
|
230 | |
|
|
231 | fd_event (EV_A_ fd, |
|
|
232 | (ev & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0) |
|
|
233 | | (ev & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0)); |
|
|
234 | } |
|
|
235 | |
|
|
236 | if (res < sizeof (events) / sizeof (events [0])) |
|
|
237 | break; |
|
|
238 | } |
|
|
239 | } |
270 | } |
240 | |
271 | |
241 | #endif |
272 | static void |
|
|
273 | linuxaio_fd_rearm (EV_P_ int fd) |
|
|
274 | { |
|
|
275 | anfds [fd].events = 0; |
|
|
276 | linuxaio_iocbps [fd]->io.aio_buf = 0; |
|
|
277 | fd_change (EV_A_ fd, EV_ANFD_REIFY); |
|
|
278 | } |
242 | |
279 | |
243 | static void |
280 | static void |
244 | linuxaio_parse_events (EV_P_ struct io_event *ev, int nr) |
281 | linuxaio_parse_events (EV_P_ struct io_event *ev, int nr) |
245 | { |
282 | { |
246 | while (nr) |
283 | while (nr) |
247 | { |
284 | { |
248 | int fd = ev->data; |
285 | int fd = ev->data; |
249 | int res = ev->res; |
286 | int res = ev->res; |
250 | |
287 | |
251 | assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
288 | assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); |
252 | |
|
|
253 | /* linux aio is oneshot: rearm fd. TODO: this does more work than needed */ |
|
|
254 | linuxaio_iocbps [fd]->io.aio_buf = 0; |
|
|
255 | anfds [fd].events = 0; |
|
|
256 | fd_change (EV_A_ fd, 0); |
|
|
257 | |
289 | |
258 | /* feed events, we do not expect or handle POLLNVAL */ |
290 | /* feed events, we do not expect or handle POLLNVAL */ |
259 | fd_event ( |
291 | fd_event ( |
260 | EV_A_ |
292 | EV_A_ |
261 | fd, |
293 | fd, |
262 | (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) |
294 | (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) |
263 | | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) |
295 | | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) |
264 | ); |
296 | ); |
265 | |
297 | |
|
|
298 | /* linux aio is oneshot: rearm fd. TODO: this does more work than needed */ |
|
|
299 | linuxaio_fd_rearm (EV_A_ fd); |
|
|
300 | |
266 | --nr; |
301 | --nr; |
267 | ++ev; |
302 | ++ev; |
268 | } |
303 | } |
269 | } |
304 | } |
270 | |
305 | |
… | |
… | |
346 | linuxaio_parse_events (EV_A_ ioev, res); |
381 | linuxaio_parse_events (EV_A_ ioev, res); |
347 | linuxaio_get_events_from_ring (EV_A); |
382 | linuxaio_get_events_from_ring (EV_A); |
348 | } |
383 | } |
349 | } |
384 | } |
350 | |
385 | |
|
|
386 | static int |
|
|
387 | linuxaio_io_setup (EV_P) |
|
|
388 | { |
|
|
389 | linuxaio_ctx = 0; |
|
|
390 | return evsys_io_setup (linuxaio_nr_events (EV_A), &linuxaio_ctx); |
|
|
391 | } |
|
|
392 | |
351 | static void |
393 | static void |
352 | linuxaio_poll (EV_P_ ev_tstamp timeout) |
394 | linuxaio_poll (EV_P_ ev_tstamp timeout) |
353 | { |
395 | { |
354 | int submitted; |
396 | int submitted; |
355 | |
397 | |
… | |
… | |
358 | /* io_submit might return less than the requested number of iocbs */ |
400 | /* io_submit might return less than the requested number of iocbs */ |
359 | /* this is, afaics, only because of errors, but we go by the book and use a loop, */ |
401 | /* this is, afaics, only because of errors, but we go by the book and use a loop, */ |
360 | /* which allows us to pinpoint the errornous iocb */ |
402 | /* which allows us to pinpoint the errornous iocb */ |
361 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
403 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
362 | { |
404 | { |
363 | #if 0 |
|
|
364 | int res; |
|
|
365 | if (linuxaio_submits[submitted]->aio_fildes == backend_fd) |
|
|
366 | res = evsys_io_submit (linuxaio_ctx, 1, linuxaio_submits + submitted); |
|
|
367 | else |
|
|
368 | { res = -1; errno = EINVAL; }; |
|
|
369 | #else |
|
|
370 | int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
405 | int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
371 | #endif |
|
|
372 | |
406 | |
373 | if (expect_false (res < 0)) |
407 | if (expect_false (res < 0)) |
374 | if (errno == EAGAIN) |
|
|
375 | { |
|
|
376 | /* This happens when the ring buffer is full, at least. I assume this means |
|
|
377 | * that the event was queued synchronously during io_submit, and thus |
|
|
378 | * the buffer overflowed. |
|
|
379 | * In this case, we just try in next loop iteration. |
|
|
380 | * This should not result in a few fds taking priority, as the interface |
|
|
381 | * is one-shot, and we submit iocb's in a round-robin fashion. |
|
|
382 | * TODO: maybe make "submitted" persistent, so we don't have to memmove? |
|
|
383 | */ |
|
|
384 | if (ecb_expect_false (submitted)) |
|
|
385 | { |
|
|
386 | memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits)); |
|
|
387 | linuxaio_submitcnt -= submitted; |
|
|
388 | } |
|
|
389 | |
|
|
390 | timeout = 0; |
|
|
391 | break; |
|
|
392 | } |
|
|
393 | #if EPOLL_FALLBACK |
|
|
394 | else if (errno == EINVAL) |
408 | if (errno == EINVAL) |
395 | { |
409 | { |
396 | /* This happens for unsupported fds, officially, but in my testing, |
410 | /* This happens for unsupported fds, officially, but in my testing, |
397 | * also randomly happens for supported fds. We fall back to good old |
411 | * also randomly happens for supported fds. We fall back to good old |
398 | * poll() here, under the assumption that this is a very rare case. |
412 | * poll() here, under the assumption that this is a very rare case. |
399 | * See https://lore.kernel.org/patchwork/patch/1047453/ to see |
413 | * See https://lore.kernel.org/patchwork/patch/1047453/ to see |
400 | * discussion about such a case (ttys) where polling for POLLIN |
414 | * discussion about such a case (ttys) where polling for POLLIN |
401 | * fails but POLLIN|POLLOUT works. |
415 | * fails but POLLIN|POLLOUT works. |
402 | */ |
416 | */ |
403 | struct iocb *iocb = linuxaio_submits [submitted]; |
417 | struct iocb *iocb = linuxaio_submits [submitted]; |
404 | |
418 | epoll_modify (EV_A_ iocb->aio_fildes, 0, anfds [iocb->aio_fildes].events); |
405 | linuxaio_rearm_epoll (EV_A_ linuxaio_submits [submitted], EPOLL_CTL_ADD); |
|
|
406 | iocb->aio_reqprio = -1; /* mark iocb as epoll */ |
419 | iocb->aio_reqprio = -1; /* mark iocb as epoll */ |
407 | |
420 | |
408 | res = 1; /* skip this iocb */ |
421 | res = 1; /* skip this iocb - another iocb, another chance */ |
409 | } |
422 | } |
410 | #endif |
423 | else if (errno == EAGAIN) |
|
|
424 | { |
|
|
425 | /* This happens when the ring buffer is full, or some other shit we |
|
|
426 | * dont' know and isn't documented. Most likely because we have too |
|
|
427 | * many requests and linux aio can't be assed to handle them. |
|
|
428 | * In this case, we try to allocate a larger ring buffer, freeing |
|
|
429 | * ours first. This might fail, in which case we have to fall back to 100% |
|
|
430 | * epoll. |
|
|
431 | * God, how I hate linux not getting its act together. Ever. |
|
|
432 | */ |
|
|
433 | evsys_io_destroy (linuxaio_ctx); |
|
|
434 | linuxaio_submitcnt = 0; |
|
|
435 | |
|
|
436 | /* rearm all fds with active iocbs */ |
|
|
437 | { |
|
|
438 | int fd; |
|
|
439 | for (fd = 0; fd < linuxaio_iocbpmax; ++fd) |
|
|
440 | if (linuxaio_iocbps [fd]->io.aio_buf) |
|
|
441 | linuxaio_fd_rearm (EV_A_ fd); |
|
|
442 | } |
|
|
443 | |
|
|
444 | ++linuxaio_iteration; |
|
|
445 | if (linuxaio_io_setup (EV_A) < 0) |
|
|
446 | { |
|
|
447 | /* to bad, we can't get a new aio context, go 100% epoll */ |
|
|
448 | linuxaio_free_iocbp (EV_A); |
|
|
449 | ev_io_stop (EV_A_ &linuxaio_epoll_w); |
|
|
450 | ev_ref (EV_A); |
|
|
451 | linuxaio_ctx = 0; |
|
|
452 | backend_modify = epoll_modify; |
|
|
453 | backend_poll = epoll_poll; |
|
|
454 | } |
|
|
455 | |
|
|
456 | timeout = 0; |
|
|
457 | /* it's easiest to handle this mess in another iteration */ |
|
|
458 | return; |
|
|
459 | } |
411 | else if (errno == EBADF) |
460 | else if (errno == EBADF) |
412 | { |
461 | { |
413 | fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes); |
462 | fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes); |
414 | |
463 | |
415 | res = 1; /* skip this iocb */ |
464 | res = 1; /* skip this iocb */ |
… | |
… | |
431 | int |
480 | int |
432 | linuxaio_init (EV_P_ int flags) |
481 | linuxaio_init (EV_P_ int flags) |
433 | { |
482 | { |
434 | /* would be great to have a nice test for IOCB_CMD_POLL instead */ |
483 | /* would be great to have a nice test for IOCB_CMD_POLL instead */ |
435 | /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ |
484 | /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ |
436 | #if EPOLL_FALLBACK |
485 | /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work */ |
437 | /* 4.19 made epoll work */ |
|
|
438 | if (ev_linux_version () < 0x041300) |
486 | if (ev_linux_version () < 0x041300) |
439 | return 0; |
487 | return 0; |
440 | #else |
488 | |
441 | /* 4.18 introduced IOCB_CMD_POLL */ |
489 | if (!epoll_init (EV_A_ 0)) |
442 | if (ev_linux_version () < 0x041200) |
|
|
443 | return 0; |
490 | return 0; |
444 | #endif |
|
|
445 | |
491 | |
446 | linuxaio_ctx = 0; |
492 | linuxaio_iteration = 0; |
447 | if (evsys_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
|
|
448 | return 0; |
|
|
449 | |
493 | |
450 | #if EPOLL_FALLBACK |
494 | if (linuxaio_io_setup (EV_A) < 0) |
451 | backend_fd = ev_epoll_create (); |
|
|
452 | if (backend_fd < 0) |
|
|
453 | { |
495 | { |
454 | evsys_io_destroy (linuxaio_ctx); |
496 | epoll_destroy (EV_A); |
455 | return 0; |
497 | return 0; |
456 | } |
498 | } |
457 | |
499 | |
458 | ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); |
500 | ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); |
459 | ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI); |
501 | ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI); |
460 | ev_io_start (EV_A_ &linuxaio_epoll_w); |
502 | ev_io_start (EV_A_ &linuxaio_epoll_w); |
461 | ev_unref (EV_A); /* watcher should not keep loop alive */ |
503 | ev_unref (EV_A); /* watcher should not keep loop alive */ |
462 | #endif |
|
|
463 | |
504 | |
464 | backend_modify = linuxaio_modify; |
505 | backend_modify = linuxaio_modify; |
465 | backend_poll = linuxaio_poll; |
506 | backend_poll = linuxaio_poll; |
466 | |
507 | |
467 | linuxaio_iocbpmax = 0; |
508 | linuxaio_iocbpmax = 0; |
… | |
… | |
476 | |
517 | |
477 | inline_size |
518 | inline_size |
478 | void |
519 | void |
479 | linuxaio_destroy (EV_P) |
520 | linuxaio_destroy (EV_P) |
480 | { |
521 | { |
481 | #if EPOLL_FALLBACK |
522 | epoll_destroy (EV_A); |
482 | close (backend_fd); |
|
|
483 | #endif |
|
|
484 | linuxaio_free_iocbp (EV_A); |
523 | linuxaio_free_iocbp (EV_A); |
485 | evsys_io_destroy (linuxaio_ctx); |
524 | evsys_io_destroy (linuxaio_ctx); |
486 | } |
525 | } |
487 | |
526 | |
488 | inline_size |
527 | inline_size |
… | |
… | |
491 | { |
530 | { |
492 | /* this frees all iocbs, which is very heavy-handed */ |
531 | /* this frees all iocbs, which is very heavy-handed */ |
493 | linuxaio_destroy (EV_A); |
532 | linuxaio_destroy (EV_A); |
494 | linuxaio_submitcnt = 0; /* all pointers were invalidated */ |
533 | linuxaio_submitcnt = 0; /* all pointers were invalidated */ |
495 | |
534 | |
496 | linuxaio_ctx = 0; |
535 | linuxaio_iteration = 0; /* we start over in the child */ |
497 | while (evsys_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
536 | |
|
|
537 | while (linuxaio_io_setup (EV_A) < 0) |
498 | ev_syserr ("(libev) linuxaio io_setup"); |
538 | ev_syserr ("(libev) linuxaio io_setup"); |
499 | |
539 | |
500 | #if EPOLL_FALLBACK |
540 | epoll_fork (EV_A); |
501 | while ((backend_fd = ev_epoll_create ()) < 0) |
|
|
502 | ev_syserr ("(libev) linuxaio epoll_create"); |
|
|
503 | |
541 | |
504 | ev_io_stop (EV_A_ &linuxaio_epoll_w); |
542 | ev_io_stop (EV_A_ &linuxaio_epoll_w); |
505 | ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); |
543 | ev_io_set (EV_A_ &linuxaio_epoll_w, backend_fd, EV_READ); |
506 | ev_io_start (EV_A_ &linuxaio_epoll_w); |
544 | ev_io_start (EV_A_ &linuxaio_epoll_w); |
507 | #endif |
|
|
508 | |
545 | |
|
|
546 | /* epoll_fork already did this. hopefully */ |
509 | fd_rearm_all (EV_A); |
547 | /*fd_rearm_all (EV_A);*/ |
510 | } |
548 | } |
511 | |
549 | |