… | |
… | |
39 | |
39 | |
40 | #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ |
40 | #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ |
41 | #include <poll.h> |
41 | #include <poll.h> |
42 | #include <linux/aio_abi.h> |
42 | #include <linux/aio_abi.h> |
43 | |
43 | |
44 | /* we try to fill 4kn pages exactly. |
44 | /* we try to fill 4kB pages exactly. |
45 | * the ring buffer header is 32 bytes, every io event is 32 bytes. |
45 | * the ring buffer header is 32 bytes, every io event is 32 bytes. |
46 | * the kernel takes the io event number, doubles it, adds 2, adds the ring buffer |
46 | * the kernel takes the io event number, doubles it, adds 2, adds the ring buffer. |
47 | * so the calculation below will use "exactly" 8kB for the ring buffer |
47 | * therefore the calculation below will use "exactly" 8kB for the ring buffer |
48 | */ |
48 | */ |
49 | #define EV_LINUXAIO_DEPTH (256 / 2 - 2 - 1) /* max. number of io events per batch */ |
49 | #define EV_LINUXAIO_DEPTH (256 / 2 - 2 - 1) /* max. number of io events per batch */ |
50 | |
50 | |
51 | /*****************************************************************************/ |
51 | /*****************************************************************************/ |
52 | /* syscall wrapdadoop */ |
52 | /* syscall wrapdadoop */ |
… | |
… | |
72 | unsigned header_length; /* size of aio_ring */ |
72 | unsigned header_length; /* size of aio_ring */ |
73 | |
73 | |
74 | struct io_event io_events[0]; |
74 | struct io_event io_events[0]; |
75 | }; |
75 | }; |
76 | |
76 | |
77 | static int |
77 | inline_size |
|
|
78 | int |
78 | ev_io_setup (unsigned nr_events, aio_context_t *ctx_idp) |
79 | ev_io_setup (unsigned nr_events, aio_context_t *ctx_idp) |
79 | { |
80 | { |
80 | return syscall (SYS_io_setup, nr_events, ctx_idp); |
81 | return syscall (SYS_io_setup, nr_events, ctx_idp); |
81 | } |
82 | } |
82 | |
83 | |
83 | static int |
84 | inline_size |
|
|
85 | int |
84 | ev_io_destroy (aio_context_t ctx_id) |
86 | ev_io_destroy (aio_context_t ctx_id) |
85 | { |
87 | { |
86 | return syscall (SYS_io_destroy, ctx_id); |
88 | return syscall (SYS_io_destroy, ctx_id); |
87 | } |
89 | } |
88 | |
90 | |
89 | static int |
91 | inline_size |
|
|
92 | int |
90 | ev_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[]) |
93 | ev_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[]) |
91 | { |
94 | { |
92 | return syscall (SYS_io_submit, ctx_id, nr, cbp); |
95 | return syscall (SYS_io_submit, ctx_id, nr, cbp); |
93 | } |
96 | } |
94 | |
97 | |
95 | static int |
98 | inline_size |
|
|
99 | int |
96 | ev_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result) |
100 | ev_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result) |
97 | { |
101 | { |
98 | return syscall (SYS_io_cancel, ctx_id, cbp, result); |
102 | return syscall (SYS_io_cancel, ctx_id, cbp, result); |
99 | } |
103 | } |
100 | |
104 | |
101 | static int |
105 | inline_size |
|
|
106 | int |
102 | ev_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) |
107 | ev_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) |
103 | { |
108 | { |
104 | return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); |
109 | return syscall (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); |
105 | } |
110 | } |
106 | |
|
|
107 | typedef void (*ev_io_cb) (long nr, struct io_event *events); |
|
|
108 | |
111 | |
109 | /*****************************************************************************/ |
112 | /*****************************************************************************/ |
110 | /* actual backed implementation */ |
113 | /* actual backed implementation */ |
111 | |
114 | |
112 | /* two iocbs for every fd, one for read, one for write */ |
|
|
113 | typedef struct aniocb |
115 | typedef struct aniocb |
114 | { |
116 | { |
115 | struct iocb io; |
117 | struct iocb io; |
116 | /*int inuse;*/ |
118 | /*int inuse;*/ |
117 | } *ANIOCBP; |
119 | } *ANIOCBP; |
… | |
… | |
130 | (*base)->io.aio_lio_opcode = IOCB_CMD_POLL; |
132 | (*base)->io.aio_lio_opcode = IOCB_CMD_POLL; |
131 | ++base; |
133 | ++base; |
132 | } |
134 | } |
133 | } |
135 | } |
134 | |
136 | |
|
|
137 | ecb_cold |
135 | static void |
138 | static void |
136 | linuxaio_free_iocbp (EV_P) |
139 | linuxaio_free_iocbp (EV_P) |
137 | { |
140 | { |
138 | while (linuxaio_iocbpmax--) |
141 | while (linuxaio_iocbpmax--) |
139 | ev_free (linuxaio_iocbps [linuxaio_iocbpmax]); |
142 | ev_free (linuxaio_iocbps [linuxaio_iocbpmax]); |
140 | |
143 | |
141 | /* next resize will completely reallocate the array */ |
144 | linuxaio_iocbpmax = 0; /* next resize will completely reallocate the array, at some overhead */ |
142 | linuxaio_iocbpmax = 0; |
|
|
143 | linuxaio_submitcnt = 0; /* all pointers invalidated */ |
|
|
144 | } |
145 | } |
145 | |
146 | |
146 | static void |
147 | static void |
147 | linuxaio_modify (EV_P_ int fd, int oev, int nev) |
148 | linuxaio_modify (EV_P_ int fd, int oev, int nev) |
148 | { |
149 | { |
… | |
… | |
208 | ECB_MEMORY_FENCE_ACQUIRE; |
209 | ECB_MEMORY_FENCE_ACQUIRE; |
209 | |
210 | |
210 | unsigned head = ring->head; |
211 | unsigned head = ring->head; |
211 | unsigned tail = *(volatile unsigned *)&ring->tail; |
212 | unsigned tail = *(volatile unsigned *)&ring->tail; |
212 | |
213 | |
|
|
214 | if (head == tail) |
|
|
215 | return 0; |
|
|
216 | |
213 | if (ring->magic != AIO_RING_MAGIC |
217 | if (ecb_expect_false (ring->magic != AIO_RING_MAGIC) |
214 | || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES |
218 | || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES |
215 | || ring->header_length != sizeof (struct aio_ring) /* TODO: or use it to find io_event[0]? */ |
219 | || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */ |
216 | || head == tail) |
|
|
217 | return 0; |
220 | return 0; |
218 | |
221 | |
219 | /* parse all available events, but only once, to avoid starvation */ |
222 | /* parse all available events, but only once, to avoid starvation */ |
220 | if (tail > head) /* normal case around */ |
223 | if (tail > head) /* normal case around */ |
221 | linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head); |
224 | linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head); |
222 | else |
|
|
223 | { |
|
|
224 | /* wrapped around */ |
225 | else /* wrapped around */ |
|
|
226 | { |
225 | linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head); |
227 | linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head); |
226 | linuxaio_parse_events (EV_A_ ring->io_events, tail); |
228 | linuxaio_parse_events (EV_A_ ring->io_events, tail); |
227 | } |
229 | } |
228 | |
230 | |
229 | ring->head = tail; |
231 | ring->head = tail; |
… | |
… | |
274 | /* which allows us to pinpoint the errornous iocb */ |
276 | /* which allows us to pinpoint the errornous iocb */ |
275 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
277 | for (submitted = 0; submitted < linuxaio_submitcnt; ) |
276 | { |
278 | { |
277 | int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
279 | int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); |
278 | |
280 | |
279 | if (res < 0) |
281 | if (ecb_expect_false (res < 0)) |
280 | if (errno == EAGAIN) |
282 | if (errno == EAGAIN) |
281 | { |
283 | { |
282 | /* This happens when the ring buffer is full, at least. I assume this means |
284 | /* This happens when the ring buffer is full, at least. I assume this means |
283 | * that the event was queued synchronously during io_submit, and thus |
285 | * that the event was queued synchronously during io_submit, and thus |
284 | * the buffer overflowd. |
286 | * the buffer overflowd. |
… | |
… | |
341 | inline_size |
343 | inline_size |
342 | void |
344 | void |
343 | linuxaio_fork (EV_P) |
345 | linuxaio_fork (EV_P) |
344 | { |
346 | { |
345 | /* TODO: verify and test */ |
347 | /* TODO: verify and test */ |
|
|
348 | |
|
|
349 | /* this frees all iocbs, which is very heavy-handed */ |
346 | linuxaio_destroy (EV_A); |
350 | linuxaio_destroy (EV_A); |
|
|
351 | linuxaio_submitcnt = 0; /* all pointers were invalidated */ |
347 | |
352 | |
348 | linuxaio_ctx = 0; |
353 | linuxaio_ctx = 0; |
349 | while (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
354 | while (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) |
350 | ev_syserr ("(libev) io_setup"); |
355 | ev_syserr ("(libev) io_setup"); |
351 | |
356 | |