ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libev/ev_linuxaio.c
(Generate patch)

Comparing libev/ev_linuxaio.c (file contents):
Revision 1.3 by root, Thu Jun 20 23:59:30 2019 UTC vs.
Revision 1.6 by root, Fri Jun 21 03:52:29 2019 UTC

39 39
40#include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ 40#include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */
41#include <poll.h> 41#include <poll.h>
42#include <linux/aio_abi.h> 42#include <linux/aio_abi.h>
43 43
44/* we try to fill 4kn pages exactly. 44/* we try to fill 4kB pages exactly.
45 * the ring buffer header is 32 bytes, every io event is 32 bytes. 45 * the ring buffer header is 32 bytes, every io event is 32 bytes.
46 * the kernel takes the io event number, doubles it, adds 2, adds the ring buffer 46 * the kernel takes the io event number, doubles it, adds 2, adds the ring buffer.
47 * so the calculation below will use "exactly" 8kB for the ring buffer 47 * therefore the calculation below will use "exactly" 8kB for the ring buffer
48 */ 48 */
49#define EV_LINUXAIO_DEPTH (256 / 2 - 2 - 1) /* max. number of io events per batch */ 49#define EV_LINUXAIO_DEPTH (256 / 2 - 2 - 1) /* max. number of io events per batch */
50 50
51/*****************************************************************************/ 51/*****************************************************************************/
52/* syscall wrapdadoop */ 52/* syscall wrapdadoop */
53 53
54#include <sys/syscall.h> /* no glibc wrappers */ 54#include <sys/syscall.h> /* no glibc wrappers */
55 55
56/* aio_abi.h is not verioned in any way, so we cannot test for its existance */ 56/* aio_abi.h is not versioned in any way, so we cannot test for its existance */
57#define IOCB_CMD_POLL 5 57#define IOCB_CMD_POLL 5
58 58
59/* taken from linux/fs/aio.c */ 59/* taken from linux/fs/aio.c */
60#define AIO_RING_MAGIC 0xa10a10a1 60#define AIO_RING_MAGIC 0xa10a10a1
61#define AIO_RING_INCOMPAT_FEATURES 0 61#define AIO_RING_INCOMPAT_FEATURES 0
72 unsigned header_length; /* size of aio_ring */ 72 unsigned header_length; /* size of aio_ring */
73 73
74 struct io_event io_events[0]; 74 struct io_event io_events[0];
75}; 75};
76 76
77static int 77inline_size
78int
78ev_io_setup (unsigned nr_events, aio_context_t *ctx_idp) 79ev_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
79{ 80{
80 return syscall (SYS_io_setup, nr_events, ctx_idp); 81 return syscall (SYS_io_setup, nr_events, ctx_idp);
81} 82}
82 83
83static int 84inline_size
85int
84ev_io_destroy (aio_context_t ctx_id) 86ev_io_destroy (aio_context_t ctx_id)
85{ 87{
86 return syscall (SYS_io_destroy, ctx_id); 88 return syscall (SYS_io_destroy, ctx_id);
87} 89}
88 90
89static int 91inline_size
92int
90ev_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[]) 93ev_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[])
91{ 94{
92 return syscall (SYS_io_submit, ctx_id, nr, cbp); 95 return syscall (SYS_io_submit, ctx_id, nr, cbp);
93} 96}
94 97
95static int 98inline_size
99int
96ev_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result) 100ev_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result)
97{ 101{
98 return syscall (SYS_io_cancel, ctx_id, cbp, result); 102 return syscall (SYS_io_cancel, ctx_id, cbp, result);
99} 103}
100 104
101static int 105inline_size
106int
102ev_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) 107ev_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
103{ 108{
104 return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); 109 return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
105} 110}
106
107typedef void (*ev_io_cb) (long nr, struct io_event *events);
108 111
109/*****************************************************************************/ 112/*****************************************************************************/
110/* actual backed implementation */ 113/* actual backed implementation */
111 114
112/* two iocbs for every fd, one for read, one for write */
113typedef struct aniocb 115typedef struct aniocb
114{ 116{
115 struct iocb io; 117 struct iocb io;
116 /*int inuse;*/ 118 /*int inuse;*/
117} *ANIOCBP; 119} *ANIOCBP;
130 (*base)->io.aio_lio_opcode = IOCB_CMD_POLL; 132 (*base)->io.aio_lio_opcode = IOCB_CMD_POLL;
131 ++base; 133 ++base;
132 } 134 }
133} 135}
134 136
137ecb_cold
135static void 138static void
136linuxaio_free_iocbp (EV_P) 139linuxaio_free_iocbp (EV_P)
137{ 140{
138 while (linuxaio_iocbpmax--) 141 while (linuxaio_iocbpmax--)
139 ev_free (linuxaio_iocbps [linuxaio_iocbpmax]); 142 ev_free (linuxaio_iocbps [linuxaio_iocbpmax]);
140 143
141 /* next resize will completely reallocate the array */ 144 linuxaio_iocbpmax = 0; /* next resize will completely reallocate the array, at some overhead */
142 linuxaio_iocbpmax = 0;
143 linuxaio_submitcnt = 0; /* all pointers invalidated */
144} 145}
145 146
146static void 147static void
147linuxaio_modify (EV_P_ int fd, int oev, int nev) 148linuxaio_modify (EV_P_ int fd, int oev, int nev)
148{ 149{
149 array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp); 150 array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp);
150 struct aniocb *iocb = linuxaio_iocbps [fd]; 151 struct aniocb *iocb = linuxaio_iocbps [fd];
151 152
152 if (iocb->io.aio_buf) 153 if (iocb->io.aio_buf)
153 ev_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); /* always fails in relevant kernels */ 154 ev_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); /* always returns an error relevant kernels */
154 155
155 if (nev) 156 if (nev)
156 { 157 {
157 iocb->io.aio_data = fd; 158 iocb->io.aio_data = fd;
158 iocb->io.aio_fildes = fd; 159 iocb->io.aio_fildes = fd;
208 ECB_MEMORY_FENCE_ACQUIRE; 209 ECB_MEMORY_FENCE_ACQUIRE;
209 210
210 unsigned head = ring->head; 211 unsigned head = ring->head;
211 unsigned tail = *(volatile unsigned *)&ring->tail; 212 unsigned tail = *(volatile unsigned *)&ring->tail;
212 213
214 if (head == tail)
215 return 0;
216
213 if (ring->magic != AIO_RING_MAGIC 217 if (ecb_expect_false (ring->magic != AIO_RING_MAGIC)
214 || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES 218 || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES
215 || ring->header_length != sizeof (struct aio_ring) /* TODO: or use it to find io_event[0]? */ 219 || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */
216 || head == tail)
217 return 0; 220 return 0;
218 221
219 /* parse all available events, but only once, to avoid starvation */ 222 /* parse all available events, but only once, to avoid starvation */
220 if (tail > head) /* normal case around */ 223 if (tail > head) /* normal case around */
221 linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head); 224 linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head);
222 else
223 {
224 /* wrapped around */ 225 else /* wrapped around */
226 {
225 linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head); 227 linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head);
226 linuxaio_parse_events (EV_A_ ring->io_events, tail); 228 linuxaio_parse_events (EV_A_ ring->io_events, tail);
227 } 229 }
228 230
229 ring->head = tail; 231 ring->head = tail;
274 /* which allows us to pinpoint the errornous iocb */ 276 /* which allows us to pinpoint the errornous iocb */
275 for (submitted = 0; submitted < linuxaio_submitcnt; ) 277 for (submitted = 0; submitted < linuxaio_submitcnt; )
276 { 278 {
277 int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); 279 int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);
278 280
279 if (res < 0) 281 if (ecb_expect_false (res < 0))
280 if (errno == EAGAIN) 282 if (errno == EAGAIN)
281 { 283 {
282 /* This happens when the ring buffer is full, at least. I assume this means 284 /* This happens when the ring buffer is full, at least. I assume this means
283 * that the event was queued synchronously during io_submit, and thus 285 * that the event was queued synchronously during io_submit, and thus
284 * the buffer overflowd. 286 * the buffer overflowd.
285 * In this case, we just try next loop iteration. 287 * In this case, we just try next loop iteration.
286 */ 288 */
287 memcpy (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits)); 289 memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits));
288 linuxaio_submitcnt -= submitted; 290 linuxaio_submitcnt -= submitted;
289 timeout = 0; 291 timeout = 0;
290 break; 292 break;
291 } 293 }
292 else 294 else
341inline_size 343inline_size
342void 344void
343linuxaio_fork (EV_P) 345linuxaio_fork (EV_P)
344{ 346{
345 /* TODO: verify and test */ 347 /* TODO: verify and test */
348
349 /* this frees all iocbs, which is very heavy-handed */
346 linuxaio_destroy (EV_A); 350 linuxaio_destroy (EV_A);
351 linuxaio_submitcnt = 0; /* all pointers were invalidated */
347 352
348 linuxaio_ctx = 0; 353 linuxaio_ctx = 0;
349 while (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) 354 while (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0)
350 ev_syserr ("(libev) io_setup"); 355 ev_syserr ("(libev) io_setup");
351 356

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines