… | |
… | |
100 | * not only is this totally undocumented, not even the source code |
100 | * not only is this totally undocumented, not even the source code |
101 | * can tell you what the future semantics of compat_features and |
101 | * can tell you what the future semantics of compat_features and |
102 | * incompat_features are, or what header_length actually is for. |
102 | * incompat_features are, or what header_length actually is for. |
103 | */ |
103 | */ |
104 | #define AIO_RING_MAGIC 0xa10a10a1 |
104 | #define AIO_RING_MAGIC 0xa10a10a1 |
105 | #define AIO_RING_INCOMPAT_FEATURES 0 |
105 | #define EV_AIO_RING_INCOMPAT_FEATURES 0 |
106 | struct aio_ring |
106 | struct aio_ring |
107 | { |
107 | { |
108 | unsigned id; /* kernel internal index number */ |
108 | unsigned id; /* kernel internal index number */ |
109 | unsigned nr; /* number of io_events */ |
109 | unsigned nr; /* number of io_events */ |
110 | unsigned head; /* Written to by userland or by kernel. */ |
110 | unsigned head; /* Written to by userland or by kernel. */ |
… | |
… | |
372 | unsigned tail = *(volatile unsigned *)&ring->tail; |
372 | unsigned tail = *(volatile unsigned *)&ring->tail; |
373 | |
373 | |
374 | if (head == tail) |
374 | if (head == tail) |
375 | return 0; |
375 | return 0; |
376 | |
376 | |
377 | /* bail out if the ring buffer doesn't match the expected layout */ |
|
|
378 | if (expect_false (ring->magic != AIO_RING_MAGIC) |
|
|
379 | || ring->incompat_features != AIO_RING_INCOMPAT_FEATURES |
|
|
380 | || ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */ |
|
|
381 | return 0; |
|
|
382 | |
|
|
383 | /* make sure the events up to tail are visible */ |
377 | /* make sure the events up to tail are visible */ |
384 | ECB_MEMORY_FENCE_ACQUIRE; |
378 | ECB_MEMORY_FENCE_ACQUIRE; |
385 | |
379 | |
386 | /* parse all available events, but only once, to avoid starvation */ |
380 | /* parse all available events, but only once, to avoid starvation */ |
387 | if (tail > head) /* normal case around */ |
381 | if (tail > head) /* normal case around */ |
… | |
… | |
397 | *(volatile unsigned *)&ring->head = tail; |
391 | *(volatile unsigned *)&ring->head = tail; |
398 | |
392 | |
399 | return 1; |
393 | return 1; |
400 | } |
394 | } |
401 | |
395 | |
|
|
396 | inline_size |
|
|
397 | int |
|
|
398 | linuxaio_ringbuf_valid (EV_P) |
|
|
399 | { |
|
|
400 | struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; |
|
|
401 | |
|
|
402 | return expect_true (ring->magic == AIO_RING_MAGIC) |
|
|
403 | && ring->incompat_features == EV_AIO_RING_INCOMPAT_FEATURES |
|
|
404 | && ring->header_length == sizeof (struct aio_ring); /* TODO: or use it to find io_event[0]? */ |
|
|
405 | } |
|
|
406 | |
402 | /* read at least one event from kernel, or timeout */ |
407 | /* read at least one event from kernel, or timeout */ |
403 | inline_size |
408 | inline_size |
404 | void |
409 | void |
405 | linuxaio_get_events (EV_P_ ev_tstamp timeout) |
410 | linuxaio_get_events (EV_P_ ev_tstamp timeout) |
406 | { |
411 | { |
407 | struct timespec ts; |
412 | struct timespec ts; |
408 | struct io_event ioev[1]; |
413 | struct io_event ioev[8]; /* 256 octet stack space */ |
409 | int res; |
414 | int want = 1; /* how many events to request */ |
|
|
415 | int ringbuf_valid = linuxaio_ringbuf_valid (EV_A); |
410 | |
416 | |
|
|
417 | if (expect_true (ringbuf_valid)) |
|
|
418 | { |
|
|
419 | /* if the ring buffer has any events, we don't wait or call the kernel at all */ |
411 | if (linuxaio_get_events_from_ring (EV_A)) |
420 | if (linuxaio_get_events_from_ring (EV_A)) |
412 | return; |
421 | return; |
413 | |
422 | |
414 | /* no events, so wait for at least one, then poll ring buffer again */ |
423 | /* if the ring buffer is empty, and we don't have a timeout, then don't call the kernel */ |
415 | /* this degrades to one event per loop iteration */ |
424 | if (!timeout) |
416 | /* if the ring buffer changes layout, but so be it */ |
425 | return; |
|
|
426 | } |
|
|
427 | else |
|
|
428 | /* no ringbuffer, request slightly larger batch */ |
|
|
429 | want = sizeof (ioev) / sizeof (ioev [0]); |
417 | |
430 | |
|
|
431 | /* no events, so wait for some |
|
|
432 | * for fairness reasons, we do this in a loop, to fetch all events |
|
|
433 | */ |
|
|
434 | for (;;) |
|
|
435 | { |
|
|
436 | int res; |
|
|
437 | |
418 | EV_RELEASE_CB; |
438 | EV_RELEASE_CB; |
419 | |
439 | |
420 | ts.tv_sec = (long)timeout; |
440 | ts.tv_sec = (long)timeout; |
421 | ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9); |
441 | ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9); |
422 | |
442 | |
423 | res = evsys_io_getevents (linuxaio_ctx, 1, sizeof (ioev) / sizeof (ioev [0]), ioev, &ts); |
443 | res = evsys_io_getevents (linuxaio_ctx, 1, want, ioev, &ts); |
424 | |
444 | |
425 | EV_ACQUIRE_CB; |
445 | EV_ACQUIRE_CB; |
426 | |
446 | |
427 | if (res < 0) |
447 | if (res < 0) |
428 | if (errno == EINTR) |
448 | if (errno == EINTR) |
429 | /* ignored */; |
449 | /* ignored, retry */; |
430 | else |
450 | else |
431 | ev_syserr ("(libev) linuxaio io_getevents"); |
451 | ev_syserr ("(libev) linuxaio io_getevents"); |
432 | else if (res) |
452 | else if (res) |
433 | { |
453 | { |
434 | /* at least one event available, handle it and any remaining ones in the ring buffer */ |
454 | /* at least one event available, handle them */ |
435 | linuxaio_parse_events (EV_A_ ioev, res); |
455 | linuxaio_parse_events (EV_A_ ioev, res); |
|
|
456 | |
|
|
457 | if (expect_true (ringbuf_valid)) |
|
|
458 | { |
|
|
459 | /* if we have a ring buffer, handle any remaining events in it */ |
436 | linuxaio_get_events_from_ring (EV_A); |
460 | linuxaio_get_events_from_ring (EV_A); |
|
|
461 | |
|
|
462 | /* at this point, we should have handled all outstanding events */ |
|
|
463 | break; |
|
|
464 | } |
|
|
465 | else if (res < want) |
|
|
466 | /* otherwise, if there were fewere events than we wanted, we assume there are no more */ |
|
|
467 | break; |
|
|
468 | } |
|
|
469 | else |
|
|
470 | break; /* no events from the kernel, we are done */ |
|
|
471 | |
|
|
472 | timeout = 0; /* only wait in the first iteration */ |
437 | } |
473 | } |
438 | } |
474 | } |
439 | |
475 | |
440 | inline_size |
476 | inline_size |
441 | int |
477 | int |