ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libev/ev_linuxaio.c
(Generate patch)

Comparing libev/ev_linuxaio.c (file contents):
Revision 1.10 by root, Sun Jun 23 02:02:24 2019 UTC vs.
Revision 1.17 by root, Mon Jun 24 02:02:35 2019 UTC

198 linuxaio_iocbps [fd]->io.aio_buf = 0; 198 linuxaio_iocbps [fd]->io.aio_buf = 0;
199 anfds [fd].events = 0; 199 anfds [fd].events = 0;
200 fd_change (EV_A_ fd, 0); 200 fd_change (EV_A_ fd, 0);
201 201
202 /* feed events, we do not expect or handle POLLNVAL */ 202 /* feed events, we do not expect or handle POLLNVAL */
203 if (ecb_expect_false (res & POLLNVAL)) 203 if (expect_false (res & POLLNVAL))
204 fd_kill (EV_A_ fd); 204 fd_kill (EV_A_ fd);
205 else 205 else
206 fd_event ( 206 fd_event (
207 EV_A_ 207 EV_A_
208 fd, 208 fd,
219static int 219static int
220linuxaio_get_events_from_ring (EV_P) 220linuxaio_get_events_from_ring (EV_P)
221{ 221{
222 struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; 222 struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
223 223
224 unsigned head = ring->head; 224 /* the kernel reads and writes both of these variables, */
225 /* as a C extension, we assume that volatile use here */
226 /* both makes reads atomic and once-only */
227 unsigned head = *(volatile unsigned *)&ring->head;
225 unsigned tail = *(volatile unsigned *)&ring->tail; 228 unsigned tail = *(volatile unsigned *)&ring->tail;
226 229
227 if (head == tail) 230 if (head == tail)
228 return 0; 231 return 0;
229 232
230 /* bail out if the ring buffer doesn't match the expected layout */ 233 /* bail out if the ring buffer doesn't match the expected layout */
231 if (ecb_expect_false (ring->magic != AIO_RING_MAGIC) 234 if (expect_false (ring->magic != AIO_RING_MAGIC)
232 || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES 235 || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES
233 || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */ 236 || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */
234 return 0; 237 return 0;
235 238
239 /* make sure the events up to tail are visible */
236 ECB_MEMORY_FENCE_ACQUIRE; 240 ECB_MEMORY_FENCE_ACQUIRE;
237 241
238 /* parse all available events, but only once, to avoid starvation */ 242 /* parse all available events, but only once, to avoid starvation */
239 if (tail > head) /* normal case around */ 243 if (tail > head) /* normal case around */
240 linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head); 244 linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head);
242 { 246 {
243 linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head); 247 linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head);
244 linuxaio_parse_events (EV_A_ ring->io_events, tail); 248 linuxaio_parse_events (EV_A_ ring->io_events, tail);
245 } 249 }
246 250
247 ring->head = tail; 251 /* TODO: we only need a compiler barrier here, not a read fence */
252 ECB_MEMORY_FENCE_RELEASE;
253 /* as an extension to C, we hope that the volatile will make this atomic and once-only */
254 *(volatile unsigned *)&ring->head = tail;
255 /* make sure kernel can see our new head value - probably not required */
256 ECB_MEMORY_FENCE_RELEASE;
248 257
249 return 1; 258 return 1;
250} 259}
251 260
252/* read at least one event from kernel, or timeout */ 261/* read at least one event from kernel, or timeout */
311 /* which allows us to pinpoint the errornous iocb */ 320 /* which allows us to pinpoint the errornous iocb */
312 for (submitted = 0; submitted < linuxaio_submitcnt; ) 321 for (submitted = 0; submitted < linuxaio_submitcnt; )
313 { 322 {
314 int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); 323 int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);
315 324
316 if (ecb_expect_false (res < 0)) 325 if (expect_false (res < 0))
317 if (errno == EAGAIN) 326 if (errno == EAGAIN)
318 { 327 {
319 /* This happens when the ring buffer is full, at least. I assume this means 328 /* This happens when the ring buffer is full, at least. I assume this means
320 * that the event was queued synchronously during io_submit, and thus 329 * that the event was queued synchronously during io_submit, and thus
321 * the buffer overflowed. 330 * the buffer overflowed.
329 break; 338 break;
330 } 339 }
331#if EPOLL_FALLBACK 340#if EPOLL_FALLBACK
332 else if (errno == EINVAL) 341 else if (errno == EINVAL)
333 { 342 {
334 /* This hapΓΌpens for unsupported fds, officially, but in my testing, 343 /* This happens for unsupported fds, officially, but in my testing,
335 * also randomly happens for supported fds. We fall back to good old 344 * also randomly happens for supported fds. We fall back to good old
336 * poll() here, under the assumption that this is a very rare case. 345 * poll() here, under the assumption that this is a very rare case.
346 * See https://lore.kernel.org/patchwork/patch/1047453/ for evidence
347 * that the problem is known, but ignored.
337 */ 348 */
338 struct iocb *iocb = linuxaio_submits [submitted]; 349 struct iocb *iocb = linuxaio_submits [submitted];
339 res = 1; /* skip this iocb */ 350 res = 1; /* skip this iocb */
340 351
341 linuxaio_rearm_epoll (EV_A_ iocb, EPOLL_CTL_ADD); 352 linuxaio_rearm_epoll (EV_A_ iocb, EPOLL_CTL_ADD);
365 for (;;) 376 for (;;)
366 { 377 {
367 int idx; 378 int idx;
368 int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0); 379 int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0);
369 380
370 if (ecb_expect_false (res < 0)) 381 if (expect_false (res < 0))
371 ev_syserr ("(libev) linuxaio epoll_wait"); 382 ev_syserr ("(libev) linuxaio epoll_wait");
372 else if (!res) 383 else if (!res)
373 break; 384 break;
374 385
375 for (idx = res; idx--; ) 386 for (idx = res; idx--; )
397int 408int
398linuxaio_init (EV_P_ int flags) 409linuxaio_init (EV_P_ int flags)
399{ 410{
400 /* would be great to have a nice test for IOCB_CMD_POLL instead */ 411 /* would be great to have a nice test for IOCB_CMD_POLL instead */
401 /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ 412 /* also: test some semi-common fd types, such as files and ttys in recommended_backends */
402 if (ev_linux_version () < 0x041200) /* 4.18 introduced IOCB_CMD_POLL */ 413#if EPOLL_FALLBACK
414 /* 4.19 made epoll work */
415 if (ev_linux_version () < 0x041300)
403 return 0; 416 return 0;
417#else
418 /* 4.18 introduced IOCB_CMD_POLL */
419 if (ev_linux_version () < 0x041200)
420 return 0;
421#endif
404 422
405 linuxaio_ctx = 0; 423 linuxaio_ctx = 0;
406 if (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) 424 if (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0)
407 return 0; 425 return 0;
408 426
414 return 0; 432 return 0;
415 } 433 }
416 434
417 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); 435 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
418 ev_io_start (EV_A_ &linuxaio_epoll_w); 436 ev_io_start (EV_A_ &linuxaio_epoll_w);
437 ev_unref (EV_A); /* watcher should not keep loop alive */
419#endif 438#endif
420 439
421 backend_modify = linuxaio_modify; 440 backend_modify = linuxaio_modify;
422 backend_poll = linuxaio_poll; 441 backend_poll = linuxaio_poll;
423 442
459 ev_syserr ("(libev) linuxaio epoll_create"); 478 ev_syserr ("(libev) linuxaio epoll_create");
460 479
461 ev_io_stop (EV_A_ &linuxaio_epoll_w); 480 ev_io_stop (EV_A_ &linuxaio_epoll_w);
462 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); 481 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
463 ev_io_start (EV_A_ &linuxaio_epoll_w); 482 ev_io_start (EV_A_ &linuxaio_epoll_w);
464 ev_unref (EV_A); /* watcher should not keep loop alive */
465#endif 483#endif
466 484
467 fd_rearm_all (EV_A); 485 fd_rearm_all (EV_A);
468} 486}
469 487

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines