… | |
… | |
53 | * therefore the calculation below will use "exactly" 4kB for the ring buffer |
53 | * therefore the calculation below will use "exactly" 4kB for the ring buffer |
54 | */ |
54 | */ |
55 | #define EV_LINUXAIO_DEPTH (128 / 2 - 2 - 1) /* max. number of io events per batch */ |
55 | #define EV_LINUXAIO_DEPTH (128 / 2 - 2 - 1) /* max. number of io events per batch */ |
56 | |
56 | |
57 | /*****************************************************************************/ |
57 | /*****************************************************************************/ |
58 | /* syscall wrapdadoop */ |
58 | /* syscall wrapdadoop - this section has the raw syscall definitions */ |
59 | |
59 | |
60 | #include <sys/syscall.h> /* no glibc wrappers */ |
60 | #include <sys/syscall.h> /* no glibc wrappers */ |
61 | |
61 | |
62 | /* aio_abi.h is not versioned in any way, so we cannot test for its existance */ |
62 | /* aio_abi.h is not versioned in any way, so we cannot test for its existance */ |
63 | #define IOCB_CMD_POLL 5 |
63 | #define IOCB_CMD_POLL 5 |
… | |
… | |
80 | struct io_event io_events[0]; |
80 | struct io_event io_events[0]; |
81 | }; |
81 | }; |
82 | |
82 | |
83 | inline_size |
83 | inline_size |
84 | int |
84 | int |
85 | ev_io_setup (unsigned nr_events, aio_context_t *ctx_idp) |
85 | evsys_io_setup (unsigned nr_events, aio_context_t *ctx_idp) |
86 | { |
86 | { |
87 | return syscall (SYS_io_setup, nr_events, ctx_idp); |
87 | return syscall (SYS_io_setup, nr_events, ctx_idp); |
88 | } |
88 | } |
89 | |
89 | |
90 | inline_size |
90 | inline_size |
91 | int |
91 | int |
92 | ev_io_destroy (aio_context_t ctx_id) |
92 | evsys_io_destroy (aio_context_t ctx_id) |
93 | { |
93 | { |
94 | return syscall (SYS_io_destroy, ctx_id); |
94 | return syscall (SYS_io_destroy, ctx_id); |
95 | } |
95 | } |
96 | |
96 | |
97 | inline_size |
97 | inline_size |
98 | int |
98 | int |
99 | ev_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[]) |
99 | evsys_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[]) |
100 | { |
100 | { |
101 | return syscall (SYS_io_submit, ctx_id, nr, cbp); |
101 | return syscall (SYS_io_submit, ctx_id, nr, cbp); |
102 | } |
102 | } |
103 | |
103 | |
104 | inline_size |
104 | inline_size |
105 | int |
105 | int |
106 | ev_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result) |
106 | evsys_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result) |
107 | { |
107 | { |
108 | return syscall (SYS_io_cancel, ctx_id, cbp, result); |
108 | return syscall (SYS_io_cancel, ctx_id, cbp, result); |
109 | } |
109 | } |
110 | |
110 | |
111 | inline_size |
111 | inline_size |
112 | int |
112 | int |
113 | ev_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) |
113 | evsys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) |
114 | { |
114 | { |
115 | return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); |
115 | return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); |
116 | } |
116 | } |
117 | |
117 | |
118 | /*****************************************************************************/ |
118 | /*****************************************************************************/ |
… | |
… | |
125 | /*int inuse;*/ |
125 | /*int inuse;*/ |
126 | } *ANIOCBP; |
126 | } *ANIOCBP; |
127 | |
127 | |
128 | inline_size |
128 | inline_size |
129 | void |
129 | void |
130 | linuxaio_array_needsize_iocbp (ANIOCBP *base, int count) |
130 | linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count) |
131 | { |
131 | { |
132 | /* TODO: quite the overhead to allocate every iocb separately, maybe use our own alocator? */ |
|
|
133 | while (count--) |
132 | while (count--) |
134 | { |
133 | { |
|
|
134 | /* TODO: quite the overhead to allocate every iocb separately, maybe use our own alocator? */ |
135 | *base = (ANIOCBP)ev_malloc (sizeof (**base)); |
135 | ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb)); |
136 | /* TODO: full zero initialize required? */ |
136 | |
|
|
137 | /* full zero initialise is probably not required at the moment, but |
|
|
138 | * this is not well documented, so we better do it. |
|
|
139 | */ |
137 | memset (*base, 0, sizeof (**base)); |
140 | memset (iocb, 0, sizeof (*iocb)); |
138 | /* would be nice to initialize fd/data as well, but array_needsize API doesn't support that */ |
141 | |
139 | (*base)->io.aio_lio_opcode = IOCB_CMD_POLL; |
142 | iocb->io.aio_lio_opcode = IOCB_CMD_POLL; |
140 | ++base; |
143 | iocb->io.aio_data = offset; |
|
|
144 | iocb->io.aio_fildes = offset; |
|
|
145 | |
|
|
146 | base [offset++] = iocb; |
141 | } |
147 | } |
142 | } |
148 | } |
143 | |
149 | |
144 | ecb_cold |
150 | ecb_cold |
145 | static void |
151 | static void |
… | |
… | |
153 | |
159 | |
154 | static void |
160 | static void |
155 | linuxaio_modify (EV_P_ int fd, int oev, int nev) |
161 | linuxaio_modify (EV_P_ int fd, int oev, int nev) |
156 | { |
162 | { |
157 | array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp); |
163 | array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp); |
158 | struct aniocb *iocb = linuxaio_iocbps [fd]; |
164 | ANIOCBP iocb = linuxaio_iocbps [fd]; |
159 | |
165 | |
160 | #if EPOLL_FALLBACK |
166 | #if EPOLL_FALLBACK |
161 | if (iocb->io.aio_reqprio < 0) |
167 | if (iocb->io.aio_reqprio < 0) |
162 | { |
168 | { |
163 | epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); |
169 | epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); |
164 | iocb->io.aio_reqprio = 0; |
170 | iocb->io.aio_reqprio = 0; |
165 | } |
171 | } |
166 | #endif |
172 | #endif |
167 | |
173 | |
168 | if (iocb->io.aio_buf) |
174 | if (iocb->io.aio_buf) |
169 | ev_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); /* always returns an error relevant kernels */ |
175 | evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); /* always returns an error relevant kernels */ |
170 | |
176 | |
171 | if (nev) |
177 | if (nev) |
172 | { |
178 | { |
173 | iocb->io.aio_data = fd; |
|
|
174 | iocb->io.aio_fildes = fd; |
|
|
175 | iocb->io.aio_buf = |
179 | iocb->io.aio_buf = |
176 | (nev & EV_READ ? POLLIN : 0) |
180 | (nev & EV_READ ? POLLIN : 0) |
177 | | (nev & EV_WRITE ? POLLOUT : 0); |
181 | | (nev & EV_WRITE ? POLLOUT : 0); |
178 | |
182 | |
179 | /* queue iocb up for io_submit */ |
183 | /* queue iocb up for io_submit */ |
180 | /* this assumes we only ever get one call per fd per loop iteration */ |
184 | /* this assumes we only ever get one call per fd per loop iteration */ |
… | |
… | |
250 | linuxaio_iocbps [fd]->io.aio_buf = 0; |
254 | linuxaio_iocbps [fd]->io.aio_buf = 0; |
251 | anfds [fd].events = 0; |
255 | anfds [fd].events = 0; |
252 | fd_change (EV_A_ fd, 0); |
256 | fd_change (EV_A_ fd, 0); |
253 | |
257 | |
254 | /* feed events, we do not expect or handle POLLNVAL */ |
258 | /* feed events, we do not expect or handle POLLNVAL */ |
255 | if (expect_false (res & POLLNVAL)) |
|
|
256 | fd_kill (EV_A_ fd); |
|
|
257 | else |
|
|
258 | fd_event ( |
259 | fd_event ( |
259 | EV_A_ |
260 | EV_A_ |
260 | fd, |
261 | fd, |
261 | (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) |
262 | (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) |
262 | | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) |
263 | | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) |
263 | ); |
264 | ); |
264 | |
265 | |
265 | --nr; |
266 | --nr; |
266 | ++ev; |
267 | ++ev; |
267 | } |
268 | } |
268 | } |
269 | } |
… | |
… | |
329 | EV_RELEASE_CB; |
330 | EV_RELEASE_CB; |
330 | |
331 | |
331 | ts.tv_sec = (long)timeout; |
332 | ts.tv_sec = (long)timeout; |
332 | ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9); |
333 | ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9); |
333 | |
334 | |
334 | res = ev_io_getevents (linuxaio_ctx, 1, sizeof (ioev) / sizeof (ioev [0]), ioev, &ts); |
335 | res = evsys_io_getevents (linuxaio_ctx, 1, sizeof (ioev) / sizeof (ioev [0]), ioev, &ts); |
335 | |
336 | |
336 | EV_ACQUIRE_CB; |
337 | EV_ACQUIRE_CB; |
337 | |
338 | |
338 | if (res < 0) |
339 | if (res < 0) |
339 | if (errno == EINTR) |
340 | if (errno == EINTR) |
… | |
… | |
361 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
362 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
362 | { |
363 | { |
363 | #if 0 |
364 | #if 0 |
364 | int res; |
365 | int res; |
365 | if (linuxaio_submits[submitted]->aio_fildes == backend_fd) |
366 | if (linuxaio_submits[submitted]->aio_fildes == backend_fd) |
366 | res = ev_io_submit (linuxaio_ctx, 1, linuxaio_submits + submitted); |
367 | res = evsys_io_submit (linuxaio_ctx, 1, linuxaio_submits + submitted); |
367 | else |
368 | else |
368 | { res = -1; errno = EINVAL; }; |
369 | { res = -1; errno = EINVAL; }; |
369 | #else |
370 | #else |
370 | int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
371 | int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
371 | #endif |
372 | #endif |
372 | |
373 | |
373 | if (expect_false (res < 0)) |
374 | if (expect_false (res < 0)) |
374 | if (errno == EAGAIN) |
375 | if (errno == EAGAIN) |
375 | { |
376 | { |
… | |
… | |
399 | * See https://lore.kernel.org/patchwork/patch/1047453/ to see |
400 | * See https://lore.kernel.org/patchwork/patch/1047453/ to see |
400 | * discussion about such a case (ttys) where polling for POLLIN |
401 | * discussion about such a case (ttys) where polling for POLLIN |
401 | * fails but POLLIN|POLLOUT works. |
402 | * fails but POLLIN|POLLOUT works. |
402 | */ |
403 | */ |
403 | struct iocb *iocb = linuxaio_submits [submitted]; |
404 | struct iocb *iocb = linuxaio_submits [submitted]; |
|
|
405 | |
|
|
406 | linuxaio_rearm_epoll (EV_A_ linuxaio_submits [submitted], EPOLL_CTL_ADD); |
|
|
407 | iocb->aio_reqprio = -1; /* mark iocb as epoll */ |
|
|
408 | |
404 | res = 1; /* skip this iocb */ |
409 | res = 1; /* skip this iocb */ |
405 | |
|
|
406 | linuxaio_rearm_epoll (EV_A_ iocb, EPOLL_CTL_ADD); |
|
|
407 | iocb->aio_reqprio = -1; /* mark iocb as epoll */ |
|
|
408 | } |
410 | } |
409 | #endif |
411 | #endif |
|
|
412 | else if (errno == EBADF) |
|
|
413 | { |
|
|
414 | fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes); |
|
|
415 | |
|
|
416 | res = 1; /* skip this iocb */ |
|
|
417 | } |
410 | else |
418 | else |
411 | ev_syserr ("(libev) linuxaio io_submit"); |
419 | ev_syserr ("(libev) linuxaio io_submit"); |
412 | |
420 | |
413 | submitted += res; |
421 | submitted += res; |
414 | } |
422 | } |
… | |
… | |
435 | if (ev_linux_version () < 0x041200) |
443 | if (ev_linux_version () < 0x041200) |
436 | return 0; |
444 | return 0; |
437 | #endif |
445 | #endif |
438 | |
446 | |
439 | linuxaio_ctx = 0; |
447 | linuxaio_ctx = 0; |
440 | if (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
448 | if (evsys_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
441 | return 0; |
449 | return 0; |
442 | |
450 | |
443 | #if EPOLL_FALLBACK |
451 | #if EPOLL_FALLBACK |
444 | backend_fd = ev_epoll_create (); |
452 | backend_fd = ev_epoll_create (); |
445 | if (backend_fd < 0) |
453 | if (backend_fd < 0) |
446 | { |
454 | { |
447 | ev_io_destroy (linuxaio_ctx); |
455 | evsys_io_destroy (linuxaio_ctx); |
448 | return 0; |
456 | return 0; |
449 | } |
457 | } |
450 | |
458 | |
451 | ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); |
459 | ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); |
452 | ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI); |
460 | ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI); |
… | |
… | |
473 | { |
481 | { |
474 | #if EPOLL_FALLBACK |
482 | #if EPOLL_FALLBACK |
475 | close (backend_fd); |
483 | close (backend_fd); |
476 | #endif |
484 | #endif |
477 | linuxaio_free_iocbp (EV_A); |
485 | linuxaio_free_iocbp (EV_A); |
478 | ev_io_destroy (linuxaio_ctx); |
486 | evsys_io_destroy (linuxaio_ctx); |
479 | } |
487 | } |
480 | |
488 | |
481 | inline_size |
489 | inline_size |
482 | void |
490 | void |
483 | linuxaio_fork (EV_P) |
491 | linuxaio_fork (EV_P) |
… | |
… | |
485 | /* this frees all iocbs, which is very heavy-handed */ |
493 | /* this frees all iocbs, which is very heavy-handed */ |
486 | linuxaio_destroy (EV_A); |
494 | linuxaio_destroy (EV_A); |
487 | linuxaio_submitcnt = 0; /* all pointers were invalidated */ |
495 | linuxaio_submitcnt = 0; /* all pointers were invalidated */ |
488 | |
496 | |
489 | linuxaio_ctx = 0; |
497 | linuxaio_ctx = 0; |
490 | while (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
498 | while (evsys_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
491 | ev_syserr ("(libev) linuxaio io_setup"); |
499 | ev_syserr ("(libev) linuxaio io_setup"); |
492 | |
500 | |
493 | #if EPOLL_FALLBACK |
501 | #if EPOLL_FALLBACK |
494 | while ((backend_fd = ev_epoll_create ()) < 0) |
502 | while ((backend_fd = ev_epoll_create ()) < 0) |
495 | ev_syserr ("(libev) linuxaio epoll_create"); |
503 | ev_syserr ("(libev) linuxaio epoll_create"); |