ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libev/ev_linuxaio.c
(Generate patch)

Comparing libev/ev_linuxaio.c (file contents):
Revision 1.12 by root, Sun Jun 23 23:02:09 2019 UTC vs.
Revision 1.17 by root, Mon Jun 24 02:02:35 2019 UTC

198 linuxaio_iocbps [fd]->io.aio_buf = 0; 198 linuxaio_iocbps [fd]->io.aio_buf = 0;
199 anfds [fd].events = 0; 199 anfds [fd].events = 0;
200 fd_change (EV_A_ fd, 0); 200 fd_change (EV_A_ fd, 0);
201 201
202 /* feed events, we do not expect or handle POLLNVAL */ 202 /* feed events, we do not expect or handle POLLNVAL */
203 if (ecb_expect_false (res & POLLNVAL)) 203 if (expect_false (res & POLLNVAL))
204 fd_kill (EV_A_ fd); 204 fd_kill (EV_A_ fd);
205 else 205 else
206 fd_event ( 206 fd_event (
207 EV_A_ 207 EV_A_
208 fd, 208 fd,
219static int 219static int
220linuxaio_get_events_from_ring (EV_P) 220linuxaio_get_events_from_ring (EV_P)
221{ 221{
222 struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; 222 struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
223 223
224 unsigned head = ring->head; 224 /* the kernel reads and writes both of these variables, */
225 /* as a C extension, we assume that volatile use here */
226 /* both makes reads atomic and once-only */
227 unsigned head = *(volatile unsigned *)&ring->head;
225 unsigned tail = *(volatile unsigned *)&ring->tail; 228 unsigned tail = *(volatile unsigned *)&ring->tail;
226 229
227 if (head == tail) 230 if (head == tail)
228 return 0; 231 return 0;
229 232
230 /* bail out if the ring buffer doesn't match the expected layout */ 233 /* bail out if the ring buffer doesn't match the expected layout */
231 if (ecb_expect_false (ring->magic != AIO_RING_MAGIC) 234 if (expect_false (ring->magic != AIO_RING_MAGIC)
232 || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES 235 || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES
233 || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */ 236 || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */
234 return 0; 237 return 0;
235 238
236 /* make sure the events up to tail are visible */ 239 /* make sure the events up to tail are visible */
237 ECB_MEMORY_FENCE_ACQUIRE; 240 ECB_MEMORY_FENCE_ACQUIRE;
238 241
243 { 246 {
244 linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head); 247 linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head);
245 linuxaio_parse_events (EV_A_ ring->io_events, tail); 248 linuxaio_parse_events (EV_A_ ring->io_events, tail);
246 } 249 }
247 250
251 /* TODO: we only need a compiler barrier here, not a read fence */
252 ECB_MEMORY_FENCE_RELEASE;
253 /* as an extension to C, we hope that the volatile will make this atomic and once-only */
248 *(volatile unsigned *)&ring->head = tail; 254 *(volatile unsigned *)&ring->head = tail;
249 /* make sure kernel can see our new head value - probably not required */ 255 /* make sure kernel can see our new head value - probably not required */
250 ECB_MEMORY_FENCE_RELEASE; 256 ECB_MEMORY_FENCE_RELEASE;
251 257
252 return 1; 258 return 1;
314 /* which allows us to pinpoint the errornous iocb */ 320 /* which allows us to pinpoint the errornous iocb */
315 for (submitted = 0; submitted < linuxaio_submitcnt; ) 321 for (submitted = 0; submitted < linuxaio_submitcnt; )
316 { 322 {
317 int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); 323 int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);
318 324
319 if (ecb_expect_false (res < 0)) 325 if (expect_false (res < 0))
320 if (errno == EAGAIN) 326 if (errno == EAGAIN)
321 { 327 {
322 /* This happens when the ring buffer is full, at least. I assume this means 328 /* This happens when the ring buffer is full, at least. I assume this means
323 * that the event was queued synchronously during io_submit, and thus 329 * that the event was queued synchronously during io_submit, and thus
324 * the buffer overflowed. 330 * the buffer overflowed.
332 break; 338 break;
333 } 339 }
334#if EPOLL_FALLBACK 340#if EPOLL_FALLBACK
335 else if (errno == EINVAL) 341 else if (errno == EINVAL)
336 { 342 {
337 /* This hapΓΌpens for unsupported fds, officially, but in my testing, 343 /* This happens for unsupported fds, officially, but in my testing,
338 * also randomly happens for supported fds. We fall back to good old 344 * also randomly happens for supported fds. We fall back to good old
339 * poll() here, under the assumption that this is a very rare case. 345 * poll() here, under the assumption that this is a very rare case.
346 * See https://lore.kernel.org/patchwork/patch/1047453/ for evidence
347 * that the problem is known, but ignored.
340 */ 348 */
341 struct iocb *iocb = linuxaio_submits [submitted]; 349 struct iocb *iocb = linuxaio_submits [submitted];
342 res = 1; /* skip this iocb */ 350 res = 1; /* skip this iocb */
343 351
344 linuxaio_rearm_epoll (EV_A_ iocb, EPOLL_CTL_ADD); 352 linuxaio_rearm_epoll (EV_A_ iocb, EPOLL_CTL_ADD);
368 for (;;) 376 for (;;)
369 { 377 {
370 int idx; 378 int idx;
371 int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0); 379 int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0);
372 380
373 if (ecb_expect_false (res < 0)) 381 if (expect_false (res < 0))
374 ev_syserr ("(libev) linuxaio epoll_wait"); 382 ev_syserr ("(libev) linuxaio epoll_wait");
375 else if (!res) 383 else if (!res)
376 break; 384 break;
377 385
378 for (idx = res; idx--; ) 386 for (idx = res; idx--; )
400int 408int
401linuxaio_init (EV_P_ int flags) 409linuxaio_init (EV_P_ int flags)
402{ 410{
403 /* would be great to have a nice test for IOCB_CMD_POLL instead */ 411 /* would be great to have a nice test for IOCB_CMD_POLL instead */
404 /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ 412 /* also: test some semi-common fd types, such as files and ttys in recommended_backends */
405 if (ev_linux_version () < 0x041200) /* 4.18 introduced IOCB_CMD_POLL */ 413#if EPOLL_FALLBACK
414 /* 4.19 made epoll work */
415 if (ev_linux_version () < 0x041300)
406 return 0; 416 return 0;
417#else
418 /* 4.18 introduced IOCB_CMD_POLL */
419 if (ev_linux_version () < 0x041200)
420 return 0;
421#endif
407 422
408 linuxaio_ctx = 0; 423 linuxaio_ctx = 0;
409 if (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0) 424 if (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0)
410 return 0; 425 return 0;
411 426
417 return 0; 432 return 0;
418 } 433 }
419 434
420 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); 435 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
421 ev_io_start (EV_A_ &linuxaio_epoll_w); 436 ev_io_start (EV_A_ &linuxaio_epoll_w);
437 ev_unref (EV_A); /* watcher should not keep loop alive */
422#endif 438#endif
423 439
424 backend_modify = linuxaio_modify; 440 backend_modify = linuxaio_modify;
425 backend_poll = linuxaio_poll; 441 backend_poll = linuxaio_poll;
426 442
462 ev_syserr ("(libev) linuxaio epoll_create"); 478 ev_syserr ("(libev) linuxaio epoll_create");
463 479
464 ev_io_stop (EV_A_ &linuxaio_epoll_w); 480 ev_io_stop (EV_A_ &linuxaio_epoll_w);
465 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); 481 ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
466 ev_io_start (EV_A_ &linuxaio_epoll_w); 482 ev_io_start (EV_A_ &linuxaio_epoll_w);
467 ev_unref (EV_A); /* watcher should not keep loop alive */
468#endif 483#endif
469 484
470 fd_rearm_all (EV_A); 485 fd_rearm_all (EV_A);
471} 486}
472 487

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines