ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libev/ev_iouring.c
(Generate patch)

Comparing libev/ev_iouring.c (file contents):
Revision 1.22 by root, Mon Jun 8 11:15:59 2020 UTC vs.
Revision 1.23 by root, Sun Jul 26 11:10:45 2020 UTC

118 __u32 timeout_flags; 118 __u32 timeout_flags;
119 __u32 accept_flags; 119 __u32 accept_flags;
120 __u32 cancel_flags; 120 __u32 cancel_flags;
121 __u32 open_flags; 121 __u32 open_flags;
122 __u32 statx_flags; 122 __u32 statx_flags;
123 __u32 fadvise_advice;
123 }; 124 };
124 __u64 user_data; 125 __u64 user_data;
125 union { 126 union {
126 __u16 buf_index; 127 __u16 buf_index;
128 __u16 personality;
127 __u64 __pad2[3]; 129 __u64 __pad2[3];
128 }; 130 };
129}; 131};
130 132
131struct io_uring_cqe 133struct io_uring_cqe
170 __u32 resv[4]; 172 __u32 resv[4];
171 struct io_sqring_offsets sq_off; 173 struct io_sqring_offsets sq_off;
172 struct io_cqring_offsets cq_off; 174 struct io_cqring_offsets cq_off;
173}; 175};
174 176
177#define IORING_FEAT_SINGLE_MMAP 0x00000001
178#define IORING_FEAT_NODROP 0x00000002
179#define IORING_FEAT_SUBMIT_STABLE 0x00000004
180
175#define IORING_SETUP_CQSIZE 0x00000008 181#define IORING_SETUP_CQSIZE 0x00000008
182#define IORING_SETUP_CLAMP 0x00000010
176 183
177#define IORING_OP_POLL_ADD 6 184#define IORING_OP_POLL_ADD 6
178#define IORING_OP_POLL_REMOVE 7 185#define IORING_OP_POLL_REMOVE 7
179#define IORING_OP_TIMEOUT 11 186#define IORING_OP_TIMEOUT 11
180#define IORING_OP_TIMEOUT_REMOVE 12 187#define IORING_OP_TIMEOUT_REMOVE 12
181 188
189#define IORING_REGISTER_EVENTFD 4
190#define IORING_REGISTER_EVENTFD_ASYNC 7
191#define IORING_REGISTER_PROBE 8
192
193#define IO_URING_OP_SUPPORTED 1
194
195struct io_uring_probe_op {
196 __u8 op;
197 __u8 resv;
198 __u16 flags;
199 __u32 resv2;
200};
201
202struct io_uring_probe
203{
204 __u8 last_op;
205 __u8 ops_len;
206 __u16 resv;
207 __u32 resv2[3];
208 struct io_uring_probe_op ops[0];
209};
210
182/* relative or absolute, reference clock is CLOCK_MONOTONIC */ 211/* relative or absolute, reference clock is CLOCK_MONOTONIC */
183struct iouring_kernel_timespec 212struct iouring_kernel_timespec
184{ 213{
185 int64_t tv_sec; 214 int64_t tv_sec;
186 long long tv_nsec; 215 long long tv_nsec;
189#define IORING_TIMEOUT_ABS 0x00000001 218#define IORING_TIMEOUT_ABS 0x00000001
190 219
191#define IORING_ENTER_GETEVENTS 0x01 220#define IORING_ENTER_GETEVENTS 0x01
192 221
193#define IORING_OFF_SQ_RING 0x00000000ULL 222#define IORING_OFF_SQ_RING 0x00000000ULL
194#define IORING_OFF_CQ_RING 0x08000000ULL
195#define IORING_OFF_SQES 0x10000000ULL 223#define IORING_OFF_SQES 0x10000000ULL
196 224
197#define IORING_FEAT_SINGLE_MMAP 0x00000001 225#define IORING_FEAT_SINGLE_MMAP 0x00000001
198#define IORING_FEAT_NODROP 0x00000002 226#define IORING_FEAT_NODROP 0x00000002
199#define IORING_FEAT_SUBMIT_STABLE 0x00000004 227#define IORING_FEAT_SUBMIT_STABLE 0x00000004
210evsys_io_uring_enter (int fd, unsigned to_submit, unsigned min_complete, unsigned flags, const sigset_t *sig, size_t sigsz) 238evsys_io_uring_enter (int fd, unsigned to_submit, unsigned min_complete, unsigned flags, const sigset_t *sig, size_t sigsz)
211{ 239{
212 return ev_syscall6 (SYS_io_uring_enter, fd, to_submit, min_complete, flags, sig, sigsz); 240 return ev_syscall6 (SYS_io_uring_enter, fd, to_submit, min_complete, flags, sig, sigsz);
213} 241}
214 242
243inline_size
244int
245evsys_io_uring_register (unsigned int fd, unsigned int opcode, void *arg, unsigned int nr_args)
246{
247 return ev_syscall4 (SYS_io_uring_register, fd, opcode, arg, nr_args);
248}
249
215/*****************************************************************************/ 250/*****************************************************************************/
216/* actual backed implementation */ 251/* actual backend implementation */
217 252
218/* we hope that volatile will make the compiler access this variables only once */ 253/* we hope that volatile will make the compiler access this variables only once */
219#define EV_SQ_VAR(name) *(volatile unsigned *)((char *)iouring_sq_ring + iouring_sq_ ## name)
220#define EV_CQ_VAR(name) *(volatile unsigned *)((char *)iouring_cq_ring + iouring_cq_ ## name) 254#define EV_SQ_VAR(name) *(volatile unsigned *)((char *)iouring_ring + iouring_sq_ ## name)
255#define EV_CQ_VAR(name) *(volatile unsigned *)((char *)iouring_ring + iouring_cq_ ## name)
221 256
222/* the index array */ 257/* the index array */
223#define EV_SQ_ARRAY ((unsigned *)((char *)iouring_sq_ring + iouring_sq_array)) 258#define EV_SQ_ARRAY ((unsigned *)((char *)iouring_ring + iouring_sq_array))
224 259
225/* the submit/completion queue entries */ 260/* the submit/completion queue entries */
226#define EV_SQES ((struct io_uring_sqe *) iouring_sqes) 261#define EV_SQES ((struct io_uring_sqe *) iouring_sqes)
227#define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) 262#define EV_CQES ((struct io_uring_cqe *)((char *)iouring_ring + iouring_cq_cqes))
228 263
229inline_speed 264inline_speed
230int 265int
231iouring_enter (EV_P_ ev_tstamp timeout) 266iouring_enter (EV_P_ ev_tstamp timeout)
232{ 267{
289inline_size 324inline_size
290struct io_uring_sqe * 325struct io_uring_sqe *
291iouring_sqe_submit (EV_P_ struct io_uring_sqe *sqe) 326iouring_sqe_submit (EV_P_ struct io_uring_sqe *sqe)
292{ 327{
293 unsigned idx = sqe - EV_SQES; 328 unsigned idx = sqe - EV_SQES;
329
330 printf ("submit idx %d, op %d, fd %d, us5r %p, poll %d\n", idx, sqe->opcode, sqe->fd, sqe->user_data, sqe->poll_events);
294 331
295 EV_SQ_ARRAY [idx] = idx; 332 EV_SQ_ARRAY [idx] = idx;
296 ECB_MEMORY_FENCE_RELEASE; 333 ECB_MEMORY_FENCE_RELEASE;
297 ++EV_SQ_VAR (tail); 334 ++EV_SQ_VAR (tail);
298 /*ECB_MEMORY_FENCE_RELEASE; /* for the time being we assume this is not needed */ 335 /*ECB_MEMORY_FENCE_RELEASE; /* for the time being we assume this is not needed */
317iouring_internal_destroy (EV_P) 354iouring_internal_destroy (EV_P)
318{ 355{
319 close (iouring_tfd); 356 close (iouring_tfd);
320 close (iouring_fd); 357 close (iouring_fd);
321 358
322 if (iouring_sq_ring != MAP_FAILED) munmap (iouring_sq_ring, iouring_sq_ring_size); 359 if (iouring_ring != MAP_FAILED) munmap (iouring_ring, iouring_ring_size);
323 if (iouring_cq_ring != MAP_FAILED) munmap (iouring_cq_ring, iouring_cq_ring_size);
324 if (iouring_sqes != MAP_FAILED) munmap (iouring_sqes , iouring_sqes_size ); 360 if (iouring_sqes != MAP_FAILED) munmap (iouring_sqes, iouring_sqes_size);
325 361
326 if (ev_is_active (&iouring_tfd_w)) 362 if (ev_is_active (&iouring_tfd_w))
327 { 363 {
328 ev_ref (EV_A); 364 ev_ref (EV_A);
329 ev_io_stop (EV_A_ &iouring_tfd_w); 365 ev_io_stop (EV_A_ &iouring_tfd_w);
333ecb_cold 369ecb_cold
334static int 370static int
335iouring_internal_init (EV_P) 371iouring_internal_init (EV_P)
336{ 372{
337 struct io_uring_params params = { 0 }; 373 struct io_uring_params params = { 0 };
374 uint32_t sq_size, cq_size;
375
376 params.flags = IORING_SETUP_CLAMP;
338 377
339 iouring_to_submit = 0; 378 iouring_to_submit = 0;
340 379
341 iouring_tfd = -1; 380 iouring_tfd = -1;
342 iouring_sq_ring = MAP_FAILED; 381 iouring_ring = MAP_FAILED;
343 iouring_cq_ring = MAP_FAILED;
344 iouring_sqes = MAP_FAILED; 382 iouring_sqes = MAP_FAILED;
345 383
346 if (!have_monotonic) /* cannot really happen, but what if11 */ 384 if (!have_monotonic) /* cannot really happen, but what if11 */
347 return -1; 385 return -1;
348 386
349 for (;;)
350 {
351 iouring_fd = evsys_io_uring_setup (iouring_entries, &params); 387 iouring_fd = evsys_io_uring_setup (iouring_entries, &params);
352 388
353 if (iouring_fd >= 0) 389 if (iouring_fd < 0)
354 break; /* yippie */ 390 return -1;
355 391
356 if (errno != EINVAL)
357 return -1; /* we failed */
358
359#if TODO
360 if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE)) 392 if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEAT_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE))
361 return -1; /* we require the above features */ 393 return -1; /* we require the above features */
362#endif
363 394
364 /* EINVAL: lots of possible reasons, but maybe 395 /* TODO: remember somehow whether our queue size has been clamped */
365 * it is because we hit the unqueryable hardcoded size limit
366 */
367 396
368 /* we hit the limit already, give up */
369 if (iouring_max_entries)
370 return -1;
371
372 /* first time we hit EINVAL? assume we hit the limit, so go back and retry */
373 iouring_entries >>= 1;
374 iouring_max_entries = iouring_entries;
375 }
376
377 iouring_sq_ring_size = params.sq_off.array + params.sq_entries * sizeof (unsigned); 397 sq_size = params.sq_off.array + params.sq_entries * sizeof (unsigned);
378 iouring_cq_ring_size = params.cq_off.cqes + params.cq_entries * sizeof (struct io_uring_cqe); 398 cq_size = params.cq_off.cqes + params.cq_entries * sizeof (struct io_uring_cqe);
399
400 iouring_ring_size = sq_size > cq_size ? sq_size : cq_size;
379 iouring_sqes_size = params.sq_entries * sizeof (struct io_uring_sqe); 401 iouring_sqes_size = params.sq_entries * sizeof (struct io_uring_sqe);
380 402
381 iouring_sq_ring = mmap (0, iouring_sq_ring_size, PROT_READ | PROT_WRITE, 403 iouring_ring = mmap (0, iouring_ring_size, PROT_READ | PROT_WRITE,
382 MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQ_RING); 404 MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQ_RING);
383 iouring_cq_ring = mmap (0, iouring_cq_ring_size, PROT_READ | PROT_WRITE,
384 MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_CQ_RING);
385 iouring_sqes = mmap (0, iouring_sqes_size, PROT_READ | PROT_WRITE, 405 iouring_sqes = mmap (0, iouring_sqes_size, PROT_READ | PROT_WRITE,
386 MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQES); 406 MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQES);
387 407
388 if (iouring_sq_ring == MAP_FAILED || iouring_cq_ring == MAP_FAILED || iouring_sqes == MAP_FAILED) 408 if (iouring_ring == MAP_FAILED || iouring_sqes == MAP_FAILED)
389 return -1; 409 return -1;
390 410
391 iouring_sq_head = params.sq_off.head; 411 iouring_sq_head = params.sq_off.head;
392 iouring_sq_tail = params.sq_off.tail; 412 iouring_sq_tail = params.sq_off.tail;
393 iouring_sq_ring_mask = params.sq_off.ring_mask; 413 iouring_sq_ring_mask = params.sq_off.ring_mask;
401 iouring_cq_ring_mask = params.cq_off.ring_mask; 421 iouring_cq_ring_mask = params.cq_off.ring_mask;
402 iouring_cq_ring_entries = params.cq_off.ring_entries; 422 iouring_cq_ring_entries = params.cq_off.ring_entries;
403 iouring_cq_overflow = params.cq_off.overflow; 423 iouring_cq_overflow = params.cq_off.overflow;
404 iouring_cq_cqes = params.cq_off.cqes; 424 iouring_cq_cqes = params.cq_off.cqes;
405 425
426 iouring_tfd_to = EV_TSTAMP_HUGE;
427
406 iouring_tfd = timerfd_create (CLOCK_MONOTONIC, TFD_CLOEXEC); 428 iouring_tfd = timerfd_create (CLOCK_MONOTONIC, TFD_CLOEXEC);
407 429
408 if (iouring_tfd < 0) 430 if (iouring_tfd < 0)
409 return iouring_tfd; 431 return -1;
410
411 iouring_tfd_to = EV_TSTAMP_HUGE;
412 432
413 return 0; 433 return 0;
414} 434}
415 435
416ecb_cold 436ecb_cold

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines