1 |
#include "eio.h" |
2 |
#include "xthread.h" |
3 |
|
4 |
#include <errno.h> |
5 |
#include <stddef.h> |
6 |
#include <stdlib.h> |
7 |
#include <string.h> |
8 |
#include <errno.h> |
9 |
#include <sys/types.h> |
10 |
#include <sys/stat.h> |
11 |
#include <limits.h> |
12 |
#include <fcntl.h> |
13 |
#include <assert.h> |
14 |
|
15 |
#ifndef EIO_FINISH |
16 |
# define EIO_FINISH(req) ((req)->finish) && !EIO_CANCELLED (req) ? (req)->finish (req) : 0 |
17 |
#endif |
18 |
|
19 |
#ifndef EIO_DESTROY |
20 |
# define EIO_DESTROY(req) do { if ((req)->destroy) (req)->destroy (req); } while (0) |
21 |
#endif |
22 |
|
23 |
#ifndef EIO_FEED |
24 |
# define EIO_FEED(req) do { if ((req)->feed ) (req)->feed (req); } while (0) |
25 |
#endif |
26 |
|
27 |
#ifdef _WIN32 |
28 |
|
29 |
/*doh*/ |
30 |
|
31 |
#else |
32 |
|
33 |
# include "config.h" |
34 |
# include <sys/time.h> |
35 |
# include <sys/select.h> |
36 |
# include <unistd.h> |
37 |
# include <utime.h> |
38 |
# include <signal.h> |
39 |
# include <dirent.h> |
40 |
|
41 |
# ifndef EIO_STRUCT_DIRENT |
42 |
# define EIO_STRUCT_DIRENT struct dirent |
43 |
# endif |
44 |
|
45 |
#endif |
46 |
|
47 |
#if HAVE_SENDFILE |
48 |
# if __linux |
49 |
# include <sys/sendfile.h> |
50 |
# elif __freebsd |
51 |
# include <sys/socket.h> |
52 |
# include <sys/uio.h> |
53 |
# elif __hpux |
54 |
# include <sys/socket.h> |
55 |
# elif __solaris /* not yet */ |
56 |
# include <sys/sendfile.h> |
57 |
# else |
58 |
# error sendfile support requested but not available |
59 |
# endif |
60 |
#endif |
61 |
|
62 |
/* number of seconds after which an idle threads exit */ |
63 |
#define IDLE_TIMEOUT 10 |
64 |
|
65 |
/* used for struct dirent, AIX doesn't provide it */ |
66 |
#ifndef NAME_MAX |
67 |
# define NAME_MAX 4096 |
68 |
#endif |
69 |
|
70 |
/* buffer size for various temporary buffers */ |
71 |
#define EIO_BUFSIZE 65536 |
72 |
|
73 |
#define dBUF \ |
74 |
char *eio_buf; \ |
75 |
X_LOCK (wrklock); \ |
76 |
self->dbuf = eio_buf = malloc (EIO_BUFSIZE); \ |
77 |
X_UNLOCK (wrklock); \ |
78 |
errno = ENOMEM; \ |
79 |
if (!eio_buf) \ |
80 |
return -1; |
81 |
|
82 |
#define EIO_TICKS ((1000000 + 1023) >> 10) |
83 |
|
84 |
static void (*want_poll_cb) (void); |
85 |
static void (*done_poll_cb) (void); |
86 |
|
87 |
static unsigned int max_poll_time = 0; |
88 |
static unsigned int max_poll_reqs = 0; |
89 |
|
90 |
/* calculcate time difference in ~1/EIO_TICKS of a second */ |
91 |
static int tvdiff (struct timeval *tv1, struct timeval *tv2) |
92 |
{ |
93 |
return (tv2->tv_sec - tv1->tv_sec ) * EIO_TICKS |
94 |
+ ((tv2->tv_usec - tv1->tv_usec) >> 10); |
95 |
} |
96 |
|
97 |
static unsigned int started, idle, wanted = 4; |
98 |
|
99 |
/* worker threads management */ |
100 |
static mutex_t wrklock = X_MUTEX_INIT; |
101 |
|
102 |
typedef struct worker |
103 |
{ |
104 |
/* locked by wrklock */ |
105 |
struct worker *prev, *next; |
106 |
|
107 |
thread_t tid; |
108 |
|
109 |
/* locked by reslock, reqlock or wrklock */ |
110 |
eio_req *req; /* currently processed request */ |
111 |
void *dbuf; |
112 |
DIR *dirp; |
113 |
} worker; |
114 |
|
115 |
static worker wrk_first = { &wrk_first, &wrk_first, 0 }; |
116 |
|
117 |
static void worker_clear (worker *wrk) |
118 |
{ |
119 |
if (wrk->dirp) |
120 |
{ |
121 |
closedir (wrk->dirp); |
122 |
wrk->dirp = 0; |
123 |
} |
124 |
|
125 |
if (wrk->dbuf) |
126 |
{ |
127 |
free (wrk->dbuf); |
128 |
wrk->dbuf = 0; |
129 |
} |
130 |
} |
131 |
|
132 |
static void worker_free (worker *wrk) |
133 |
{ |
134 |
wrk->next->prev = wrk->prev; |
135 |
wrk->prev->next = wrk->next; |
136 |
|
137 |
free (wrk); |
138 |
} |
139 |
|
140 |
static volatile unsigned int nreqs, nready, npending; |
141 |
static volatile unsigned int max_idle = 4; |
142 |
|
143 |
static mutex_t reslock = X_MUTEX_INIT; |
144 |
static mutex_t reqlock = X_MUTEX_INIT; |
145 |
static cond_t reqwait = X_COND_INIT; |
146 |
|
147 |
unsigned int eio_nreqs (void) |
148 |
{ |
149 |
return nreqs; |
150 |
} |
151 |
|
152 |
unsigned int eio_nready (void) |
153 |
{ |
154 |
unsigned int retval; |
155 |
|
156 |
if (WORDACCESS_UNSAFE) X_LOCK (reqlock); |
157 |
retval = nready; |
158 |
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); |
159 |
|
160 |
return retval; |
161 |
} |
162 |
|
163 |
unsigned int eio_npending (void) |
164 |
{ |
165 |
unsigned int retval; |
166 |
|
167 |
if (WORDACCESS_UNSAFE) X_LOCK (reqlock); |
168 |
retval = npending; |
169 |
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); |
170 |
|
171 |
return retval; |
172 |
} |
173 |
|
174 |
unsigned int eio_nthreads (void) |
175 |
{ |
176 |
unsigned int retval; |
177 |
|
178 |
if (WORDACCESS_UNSAFE) X_LOCK (reqlock); |
179 |
retval = started; |
180 |
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); |
181 |
|
182 |
return retval; |
183 |
} |
184 |
|
185 |
/* |
186 |
* a somewhat faster data structure might be nice, but |
187 |
* with 8 priorities this actually needs <20 insns |
188 |
* per shift, the most expensive operation. |
189 |
*/ |
190 |
typedef struct { |
191 |
eio_req *qs[EIO_NUM_PRI], *qe[EIO_NUM_PRI]; /* qstart, qend */ |
192 |
int size; |
193 |
} reqq; |
194 |
|
195 |
static reqq req_queue; |
196 |
static reqq res_queue; |
197 |
|
198 |
static int reqq_push (reqq *q, eio_req *req) |
199 |
{ |
200 |
int pri = req->pri; |
201 |
req->next = 0; |
202 |
|
203 |
if (q->qe[pri]) |
204 |
{ |
205 |
q->qe[pri]->next = req; |
206 |
q->qe[pri] = req; |
207 |
} |
208 |
else |
209 |
q->qe[pri] = q->qs[pri] = req; |
210 |
|
211 |
return q->size++; |
212 |
} |
213 |
|
214 |
static eio_req *reqq_shift (reqq *q) |
215 |
{ |
216 |
int pri; |
217 |
|
218 |
if (!q->size) |
219 |
return 0; |
220 |
|
221 |
--q->size; |
222 |
|
223 |
for (pri = EIO_NUM_PRI; pri--; ) |
224 |
{ |
225 |
eio_req *req = q->qs[pri]; |
226 |
|
227 |
if (req) |
228 |
{ |
229 |
if (!(q->qs[pri] = (eio_req *)req->next)) |
230 |
q->qe[pri] = 0; |
231 |
|
232 |
return req; |
233 |
} |
234 |
} |
235 |
|
236 |
abort (); |
237 |
} |
238 |
|
239 |
static void grp_try_feed (eio_req *grp) |
240 |
{ |
241 |
while (grp->size < grp->int2 && !EIO_CANCELLED (grp)) |
242 |
{ |
243 |
int old_len = grp->size; |
244 |
|
245 |
EIO_FEED (grp); |
246 |
|
247 |
/* stop if no progress has been made */ |
248 |
if (old_len == grp->size) |
249 |
{ |
250 |
grp->feed = 0; |
251 |
break; |
252 |
} |
253 |
} |
254 |
} |
255 |
|
256 |
static int eio_finish (eio_req *req); |
257 |
|
258 |
static int grp_dec (eio_req *grp) |
259 |
{ |
260 |
--grp->size; |
261 |
|
262 |
/* call feeder, if applicable */ |
263 |
grp_try_feed (grp); |
264 |
|
265 |
/* finish, if done */ |
266 |
if (!grp->size && grp->int1) |
267 |
return eio_finish (grp); |
268 |
else |
269 |
return 0; |
270 |
} |
271 |
|
272 |
void eio_destroy (eio_req *req) |
273 |
{ |
274 |
if ((req)->flags & EIO_FLAG_PTR1_FREE) free (req->ptr1); |
275 |
if ((req)->flags & EIO_FLAG_PTR2_FREE) free (req->ptr2); |
276 |
|
277 |
EIO_DESTROY (req); |
278 |
} |
279 |
|
280 |
static int eio_finish (eio_req *req) |
281 |
{ |
282 |
int res = EIO_FINISH (req); |
283 |
|
284 |
if (req->grp) |
285 |
{ |
286 |
int res2; |
287 |
eio_req *grp = req->grp; |
288 |
|
289 |
/* unlink request */ |
290 |
if (req->grp_next) req->grp_next->grp_prev = req->grp_prev; |
291 |
if (req->grp_prev) req->grp_prev->grp_next = req->grp_next; |
292 |
|
293 |
if (grp->grp_first == req) |
294 |
grp->grp_first = req->grp_next; |
295 |
|
296 |
res2 = grp_dec (grp); |
297 |
|
298 |
if (!res && res2) |
299 |
res = res2; |
300 |
} |
301 |
|
302 |
eio_destroy (req); |
303 |
|
304 |
return res; |
305 |
} |
306 |
|
307 |
void eio_grp_cancel (eio_req *grp) |
308 |
{ |
309 |
for (grp = grp->grp_first; grp; grp = grp->grp_next) |
310 |
eio_cancel (grp); |
311 |
} |
312 |
|
313 |
void eio_cancel (eio_req *req) |
314 |
{ |
315 |
X_LOCK (wrklock); |
316 |
req->flags |= EIO_FLAG_CANCELLED; |
317 |
X_UNLOCK (wrklock); |
318 |
|
319 |
eio_grp_cancel (req); |
320 |
} |
321 |
|
322 |
X_THREAD_PROC (eio_proc); |
323 |
|
324 |
static void start_thread (void) |
325 |
{ |
326 |
worker *wrk = calloc (1, sizeof (worker)); |
327 |
|
328 |
/*TODO*/ |
329 |
assert (("unable to allocate worker thread data", wrk)); |
330 |
|
331 |
X_LOCK (wrklock); |
332 |
|
333 |
if (thread_create (&wrk->tid, eio_proc, (void *)wrk)) |
334 |
{ |
335 |
wrk->prev = &wrk_first; |
336 |
wrk->next = wrk_first.next; |
337 |
wrk_first.next->prev = wrk; |
338 |
wrk_first.next = wrk; |
339 |
++started; |
340 |
} |
341 |
else |
342 |
free (wrk); |
343 |
|
344 |
X_UNLOCK (wrklock); |
345 |
} |
346 |
|
347 |
static void maybe_start_thread (void) |
348 |
{ |
349 |
if (eio_nthreads () >= wanted) |
350 |
return; |
351 |
|
352 |
/* todo: maybe use idle here, but might be less exact */ |
353 |
if (0 <= (int)eio_nthreads () + (int)eio_npending () - (int)eio_nreqs ()) |
354 |
return; |
355 |
|
356 |
start_thread (); |
357 |
} |
358 |
|
359 |
void eio_submit (eio_req *req) |
360 |
{ |
361 |
++nreqs; |
362 |
|
363 |
X_LOCK (reqlock); |
364 |
++nready; |
365 |
reqq_push (&req_queue, req); |
366 |
X_COND_SIGNAL (reqwait); |
367 |
X_UNLOCK (reqlock); |
368 |
|
369 |
maybe_start_thread (); |
370 |
} |
371 |
|
372 |
static void end_thread (void) |
373 |
{ |
374 |
eio_req *req = calloc (1, sizeof (eio_req)); |
375 |
|
376 |
req->type = EIO_QUIT; |
377 |
req->pri = EIO_PRI_MAX + EIO_PRI_BIAS; |
378 |
|
379 |
X_LOCK (reqlock); |
380 |
reqq_push (&req_queue, req); |
381 |
X_COND_SIGNAL (reqwait); |
382 |
X_UNLOCK (reqlock); |
383 |
|
384 |
X_LOCK (wrklock); |
385 |
--started; |
386 |
X_UNLOCK (wrklock); |
387 |
} |
388 |
|
389 |
void eio_set_max_poll_time (double nseconds) |
390 |
{ |
391 |
if (WORDACCESS_UNSAFE) X_LOCK (reqlock); |
392 |
max_poll_time = nseconds; |
393 |
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); |
394 |
} |
395 |
|
396 |
void eio_set_max_poll_reqs (unsigned int maxreqs) |
397 |
{ |
398 |
if (WORDACCESS_UNSAFE) X_LOCK (reqlock); |
399 |
max_poll_reqs = maxreqs; |
400 |
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); |
401 |
} |
402 |
|
403 |
void eio_set_max_idle (unsigned int nthreads) |
404 |
{ |
405 |
if (WORDACCESS_UNSAFE) X_LOCK (reqlock); |
406 |
max_idle = nthreads <= 0 ? 1 : nthreads; |
407 |
if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); |
408 |
} |
409 |
|
410 |
void eio_set_min_parallel (unsigned int nthreads) |
411 |
{ |
412 |
if (wanted < nthreads) |
413 |
wanted = nthreads; |
414 |
} |
415 |
|
416 |
void eio_set_max_parallel (unsigned int nthreads) |
417 |
{ |
418 |
if (wanted > nthreads) |
419 |
wanted = nthreads; |
420 |
|
421 |
while (started > wanted) |
422 |
end_thread (); |
423 |
} |
424 |
|
425 |
int eio_poll (void) |
426 |
{ |
427 |
int maxreqs = max_poll_reqs; |
428 |
struct timeval tv_start, tv_now; |
429 |
eio_req *req; |
430 |
|
431 |
if (max_poll_time) |
432 |
gettimeofday (&tv_start, 0); |
433 |
|
434 |
for (;;) |
435 |
{ |
436 |
maybe_start_thread (); |
437 |
|
438 |
X_LOCK (reslock); |
439 |
req = reqq_shift (&res_queue); |
440 |
|
441 |
if (req) |
442 |
{ |
443 |
--npending; |
444 |
|
445 |
if (!res_queue.size && done_poll_cb) |
446 |
done_poll_cb (); |
447 |
} |
448 |
|
449 |
X_UNLOCK (reslock); |
450 |
|
451 |
if (!req) |
452 |
return 0; |
453 |
|
454 |
--nreqs; |
455 |
|
456 |
if (req->type == EIO_GROUP && req->size) |
457 |
{ |
458 |
req->int1 = 1; /* mark request as delayed */ |
459 |
continue; |
460 |
} |
461 |
else |
462 |
{ |
463 |
int res = eio_finish (req); |
464 |
if (res) |
465 |
return res; |
466 |
} |
467 |
|
468 |
if (maxreqs && !--maxreqs) |
469 |
break; |
470 |
|
471 |
if (max_poll_time) |
472 |
{ |
473 |
gettimeofday (&tv_now, 0); |
474 |
|
475 |
if (tvdiff (&tv_start, &tv_now) >= max_poll_time) |
476 |
break; |
477 |
} |
478 |
} |
479 |
|
480 |
errno = EAGAIN; |
481 |
return -1; |
482 |
} |
483 |
|
484 |
/*****************************************************************************/ |
485 |
/* work around various missing functions */ |
486 |
|
487 |
#if !HAVE_PREADWRITE |
488 |
# define pread eio__pread |
489 |
# define pwrite eio__pwrite |
490 |
|
491 |
/* |
492 |
* make our pread/pwrite safe against themselves, but not against |
493 |
* normal read/write by using a mutex. slows down execution a lot, |
494 |
* but that's your problem, not mine. |
495 |
*/ |
496 |
static mutex_t preadwritelock = X_MUTEX_INIT; |
497 |
|
498 |
static ssize_t |
499 |
eio__pread (int fd, void *buf, size_t count, off_t offset) |
500 |
{ |
501 |
ssize_t res; |
502 |
off_t ooffset; |
503 |
|
504 |
X_LOCK (preadwritelock); |
505 |
ooffset = lseek (fd, 0, SEEK_CUR); |
506 |
lseek (fd, offset, SEEK_SET); |
507 |
res = read (fd, buf, count); |
508 |
lseek (fd, ooffset, SEEK_SET); |
509 |
X_UNLOCK (preadwritelock); |
510 |
|
511 |
return res; |
512 |
} |
513 |
|
514 |
static ssize_t |
515 |
eio__pwrite (int fd, void *buf, size_t count, off_t offset) |
516 |
{ |
517 |
ssize_t res; |
518 |
off_t ooffset; |
519 |
|
520 |
X_LOCK (preadwritelock); |
521 |
ooffset = lseek (fd, 0, SEEK_CUR); |
522 |
lseek (fd, offset, SEEK_SET); |
523 |
res = write (fd, buf, count); |
524 |
lseek (fd, offset, SEEK_SET); |
525 |
X_UNLOCK (preadwritelock); |
526 |
|
527 |
return res; |
528 |
} |
529 |
#endif |
530 |
|
531 |
#ifndef HAVE_FUTIMES |
532 |
|
533 |
# define utimes(path,times) eio__utimes (path, times) |
534 |
# define futimes(fd,times) eio__futimes (fd, times) |
535 |
|
536 |
static int |
537 |
eio__utimes (const char *filename, const struct timeval times[2]) |
538 |
{ |
539 |
if (times) |
540 |
{ |
541 |
struct utimbuf buf; |
542 |
|
543 |
buf.actime = times[0].tv_sec; |
544 |
buf.modtime = times[1].tv_sec; |
545 |
|
546 |
return utime (filename, &buf); |
547 |
} |
548 |
else |
549 |
return utime (filename, 0); |
550 |
} |
551 |
|
552 |
static int eio__futimes (int fd, const struct timeval tv[2]) |
553 |
{ |
554 |
errno = ENOSYS; |
555 |
return -1; |
556 |
} |
557 |
|
558 |
#endif |
559 |
|
560 |
#if !HAVE_FDATASYNC |
561 |
# define fdatasync fsync |
562 |
#endif |
563 |
|
564 |
#if !HAVE_READAHEAD |
565 |
# define readahead(fd,offset,count) eio__readahead (fd, offset, count, self) |
566 |
|
567 |
static ssize_t |
568 |
eio__readahead (int fd, off_t offset, size_t count, worker *self) |
569 |
{ |
570 |
size_t todo = count; |
571 |
dBUF; |
572 |
|
573 |
while (todo > 0) |
574 |
{ |
575 |
size_t len = todo < EIO_BUFSIZE ? todo : EIO_BUFSIZE; |
576 |
|
577 |
pread (fd, eio_buf, len, offset); |
578 |
offset += len; |
579 |
todo -= len; |
580 |
} |
581 |
|
582 |
errno = 0; |
583 |
return count; |
584 |
} |
585 |
|
586 |
#endif |
587 |
|
588 |
#if !HAVE_READDIR_R |
589 |
# define readdir_r eio__readdir_r |
590 |
|
591 |
static mutex_t readdirlock = X_MUTEX_INIT; |
592 |
|
593 |
static int |
594 |
eio__readdir_r (DIR *dirp, EIO_STRUCT_DIRENT *ent, EIO_STRUCT_DIRENT **res) |
595 |
{ |
596 |
EIO_STRUCT_DIRENT *e; |
597 |
int errorno; |
598 |
|
599 |
X_LOCK (readdirlock); |
600 |
|
601 |
e = readdir (dirp); |
602 |
errorno = errno; |
603 |
|
604 |
if (e) |
605 |
{ |
606 |
*res = ent; |
607 |
strcpy (ent->d_name, e->d_name); |
608 |
} |
609 |
else |
610 |
*res = 0; |
611 |
|
612 |
X_UNLOCK (readdirlock); |
613 |
|
614 |
errno = errorno; |
615 |
return e ? 0 : -1; |
616 |
} |
617 |
#endif |
618 |
|
619 |
/* sendfile always needs emulation */ |
620 |
static ssize_t |
621 |
eio__sendfile (int ofd, int ifd, off_t offset, size_t count, worker *self) |
622 |
{ |
623 |
ssize_t res; |
624 |
|
625 |
if (!count) |
626 |
return 0; |
627 |
|
628 |
#if HAVE_SENDFILE |
629 |
# if __linux |
630 |
res = sendfile (ofd, ifd, &offset, count); |
631 |
|
632 |
# elif __freebsd |
633 |
/* |
634 |
* Of course, the freebsd sendfile is a dire hack with no thoughts |
635 |
* wasted on making it similar to other I/O functions. |
636 |
*/ |
637 |
{ |
638 |
off_t sbytes; |
639 |
res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); |
640 |
|
641 |
if (res < 0 && sbytes) |
642 |
/* maybe only on EAGAIN: as usual, the manpage leaves you guessing */ |
643 |
res = sbytes; |
644 |
} |
645 |
|
646 |
# elif __hpux |
647 |
res = sendfile (ofd, ifd, offset, count, 0, 0); |
648 |
|
649 |
# elif __solaris |
650 |
{ |
651 |
struct sendfilevec vec; |
652 |
size_t sbytes; |
653 |
|
654 |
vec.sfv_fd = ifd; |
655 |
vec.sfv_flag = 0; |
656 |
vec.sfv_off = offset; |
657 |
vec.sfv_len = count; |
658 |
|
659 |
res = sendfilev (ofd, &vec, 1, &sbytes); |
660 |
|
661 |
if (res < 0 && sbytes) |
662 |
res = sbytes; |
663 |
} |
664 |
|
665 |
# endif |
666 |
#else |
667 |
res = -1; |
668 |
errno = ENOSYS; |
669 |
#endif |
670 |
|
671 |
if (res < 0 |
672 |
&& (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK |
673 |
#if __solaris |
674 |
|| errno == EAFNOSUPPORT || errno == EPROTOTYPE |
675 |
#endif |
676 |
) |
677 |
) |
678 |
{ |
679 |
/* emulate sendfile. this is a major pain in the ass */ |
680 |
dBUF; |
681 |
|
682 |
res = 0; |
683 |
|
684 |
while (count) |
685 |
{ |
686 |
ssize_t cnt; |
687 |
|
688 |
cnt = pread (ifd, eio_buf, count > EIO_BUFSIZE ? EIO_BUFSIZE : count, offset); |
689 |
|
690 |
if (cnt <= 0) |
691 |
{ |
692 |
if (cnt && !res) res = -1; |
693 |
break; |
694 |
} |
695 |
|
696 |
cnt = write (ofd, eio_buf, cnt); |
697 |
|
698 |
if (cnt <= 0) |
699 |
{ |
700 |
if (cnt && !res) res = -1; |
701 |
break; |
702 |
} |
703 |
|
704 |
offset += cnt; |
705 |
res += cnt; |
706 |
count -= cnt; |
707 |
} |
708 |
} |
709 |
|
710 |
return res; |
711 |
} |
712 |
|
713 |
/* read a full directory */ |
714 |
static void |
715 |
eio__scandir (eio_req *req, worker *self) |
716 |
{ |
717 |
DIR *dirp; |
718 |
union |
719 |
{ |
720 |
EIO_STRUCT_DIRENT d; |
721 |
char b [offsetof (EIO_STRUCT_DIRENT, d_name) + NAME_MAX + 1]; |
722 |
} *u; |
723 |
EIO_STRUCT_DIRENT *entp; |
724 |
char *name, *names; |
725 |
int memlen = 4096; |
726 |
int memofs = 0; |
727 |
int res = 0; |
728 |
|
729 |
X_LOCK (wrklock); |
730 |
self->dirp = dirp = opendir (req->ptr1); |
731 |
self->dbuf = u = malloc (sizeof (*u)); |
732 |
req->flags |= EIO_FLAG_PTR2_FREE; |
733 |
req->ptr2 = names = malloc (memlen); |
734 |
X_UNLOCK (wrklock); |
735 |
|
736 |
if (dirp && u && names) |
737 |
for (;;) |
738 |
{ |
739 |
errno = 0; |
740 |
readdir_r (dirp, &u->d, &entp); |
741 |
|
742 |
if (!entp) |
743 |
break; |
744 |
|
745 |
name = entp->d_name; |
746 |
|
747 |
if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2]))) |
748 |
{ |
749 |
int len = strlen (name) + 1; |
750 |
|
751 |
res++; |
752 |
|
753 |
while (memofs + len > memlen) |
754 |
{ |
755 |
memlen *= 2; |
756 |
X_LOCK (wrklock); |
757 |
req->ptr2 = names = realloc (names, memlen); |
758 |
X_UNLOCK (wrklock); |
759 |
|
760 |
if (!names) |
761 |
break; |
762 |
} |
763 |
|
764 |
memcpy (names + memofs, name, len); |
765 |
memofs += len; |
766 |
} |
767 |
} |
768 |
|
769 |
if (errno) |
770 |
res = -1; |
771 |
|
772 |
req->result = res; |
773 |
} |
774 |
|
775 |
/*****************************************************************************/ |
776 |
|
777 |
#define ALLOC(len) \ |
778 |
if (!req->ptr2) \ |
779 |
{ \ |
780 |
X_LOCK (wrklock); \ |
781 |
req->flags |= EIO_FLAG_PTR2_FREE; \ |
782 |
X_UNLOCK (wrklock); \ |
783 |
req->ptr2 = malloc (len); \ |
784 |
if (!req->ptr2) \ |
785 |
{ \ |
786 |
errno = ENOMEM; \ |
787 |
req->result = -1; \ |
788 |
break; \ |
789 |
} \ |
790 |
} |
791 |
|
792 |
X_THREAD_PROC (eio_proc) |
793 |
{ |
794 |
eio_req *req; |
795 |
struct timespec ts; |
796 |
worker *self = (worker *)thr_arg; |
797 |
|
798 |
/* try to distribute timeouts somewhat randomly */ |
799 |
ts.tv_nsec = ((unsigned long)self & 1023UL) * (1000000000UL / 1024UL); |
800 |
|
801 |
for (;;) |
802 |
{ |
803 |
ts.tv_sec = time (0) + IDLE_TIMEOUT; |
804 |
|
805 |
X_LOCK (reqlock); |
806 |
|
807 |
for (;;) |
808 |
{ |
809 |
self->req = req = reqq_shift (&req_queue); |
810 |
|
811 |
if (req) |
812 |
break; |
813 |
|
814 |
++idle; |
815 |
|
816 |
if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT) |
817 |
{ |
818 |
if (idle > max_idle) |
819 |
{ |
820 |
--idle; |
821 |
X_UNLOCK (reqlock); |
822 |
X_LOCK (wrklock); |
823 |
--started; |
824 |
X_UNLOCK (wrklock); |
825 |
goto quit; |
826 |
} |
827 |
|
828 |
/* we are allowed to idle, so do so without any timeout */ |
829 |
X_COND_WAIT (reqwait, reqlock); |
830 |
ts.tv_sec = time (0) + IDLE_TIMEOUT; |
831 |
} |
832 |
|
833 |
--idle; |
834 |
} |
835 |
|
836 |
--nready; |
837 |
|
838 |
X_UNLOCK (reqlock); |
839 |
|
840 |
errno = 0; /* strictly unnecessary */ |
841 |
|
842 |
if (!EIO_CANCELLED (req)) |
843 |
switch (req->type) |
844 |
{ |
845 |
case EIO_READ: ALLOC (req->size); |
846 |
req->result = req->offs >= 0 |
847 |
? pread (req->int1, req->ptr2, req->size, req->offs) |
848 |
: read (req->int1, req->ptr2, req->size); break; |
849 |
case EIO_WRITE: req->result = req->offs >= 0 |
850 |
? pwrite (req->int1, req->ptr2, req->size, req->offs) |
851 |
: write (req->int1, req->ptr2, req->size); break; |
852 |
|
853 |
case EIO_READAHEAD: req->result = readahead (req->int1, req->offs, req->size); break; |
854 |
case EIO_SENDFILE: req->result = eio__sendfile (req->int1, req->int2, req->offs, req->size, self); break; |
855 |
|
856 |
case EIO_STAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
857 |
req->result = stat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; |
858 |
case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
859 |
req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; |
860 |
case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
861 |
req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; |
862 |
|
863 |
case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; |
864 |
case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; |
865 |
case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; |
866 |
case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; |
867 |
case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break; |
868 |
case EIO_FTRUNCATE: req->result = ftruncate (req->int1, req->offs); break; |
869 |
|
870 |
case EIO_OPEN: req->result = open (req->ptr1, req->int1, (mode_t)req->int2); break; |
871 |
case EIO_CLOSE: req->result = close (req->int1); break; |
872 |
case EIO_DUP2: req->result = dup2 (req->int1, req->int2); break; |
873 |
case EIO_UNLINK: req->result = unlink (req->ptr1); break; |
874 |
case EIO_RMDIR: req->result = rmdir (req->ptr1); break; |
875 |
case EIO_MKDIR: req->result = mkdir (req->ptr1, (mode_t)req->int2); break; |
876 |
case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; |
877 |
case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; |
878 |
case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; |
879 |
case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->offs); break; |
880 |
|
881 |
case EIO_READLINK: ALLOC (NAME_MAX); |
882 |
req->result = readlink (req->ptr1, req->ptr2, NAME_MAX); break; |
883 |
|
884 |
case EIO_SYNC: req->result = 0; sync (); break; |
885 |
case EIO_FSYNC: req->result = fsync (req->int1); break; |
886 |
case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; |
887 |
|
888 |
case EIO_READDIR: eio__scandir (req, self); break; |
889 |
|
890 |
case EIO_BUSY: |
891 |
#ifdef _WIN32 |
892 |
Sleep (req->nv1 * 1000.); |
893 |
#else |
894 |
{ |
895 |
struct timeval tv; |
896 |
|
897 |
tv.tv_sec = req->nv1; |
898 |
tv.tv_usec = (req->nv1 - tv.tv_sec) * 1000000.; |
899 |
|
900 |
req->result = select (0, 0, 0, 0, &tv); |
901 |
} |
902 |
#endif |
903 |
break; |
904 |
|
905 |
case EIO_UTIME: |
906 |
case EIO_FUTIME: |
907 |
{ |
908 |
struct timeval tv[2]; |
909 |
struct timeval *times; |
910 |
|
911 |
if (req->nv1 != -1. || req->nv2 != -1.) |
912 |
{ |
913 |
tv[0].tv_sec = req->nv1; |
914 |
tv[0].tv_usec = (req->nv1 - tv[0].tv_sec) * 1000000.; |
915 |
tv[1].tv_sec = req->nv2; |
916 |
tv[1].tv_usec = (req->nv2 - tv[1].tv_sec) * 1000000.; |
917 |
|
918 |
times = tv; |
919 |
} |
920 |
else |
921 |
times = 0; |
922 |
|
923 |
|
924 |
req->result = req->type == EIO_FUTIME |
925 |
? futimes (req->int1, times) |
926 |
: utimes (req->ptr1, times); |
927 |
} |
928 |
|
929 |
case EIO_GROUP: |
930 |
case EIO_NOP: |
931 |
req->result = 0; |
932 |
break; |
933 |
|
934 |
case EIO_QUIT: |
935 |
goto quit; |
936 |
|
937 |
default: |
938 |
req->result = -1; |
939 |
break; |
940 |
} |
941 |
|
942 |
req->errorno = errno; |
943 |
|
944 |
X_LOCK (reslock); |
945 |
|
946 |
++npending; |
947 |
|
948 |
if (!reqq_push (&res_queue, req) && want_poll_cb) |
949 |
want_poll_cb (); |
950 |
|
951 |
self->req = 0; |
952 |
worker_clear (self); |
953 |
|
954 |
X_UNLOCK (reslock); |
955 |
} |
956 |
|
957 |
quit: |
958 |
X_LOCK (wrklock); |
959 |
worker_free (self); |
960 |
X_UNLOCK (wrklock); |
961 |
|
962 |
return 0; |
963 |
} |
964 |
|
965 |
/*****************************************************************************/ |
966 |
|
967 |
static void eio_atfork_prepare (void) |
968 |
{ |
969 |
X_LOCK (wrklock); |
970 |
X_LOCK (reqlock); |
971 |
X_LOCK (reslock); |
972 |
#if !HAVE_PREADWRITE |
973 |
X_LOCK (preadwritelock); |
974 |
#endif |
975 |
#if !HAVE_READDIR_R |
976 |
X_LOCK (readdirlock); |
977 |
#endif |
978 |
} |
979 |
|
980 |
static void eio_atfork_parent (void) |
981 |
{ |
982 |
#if !HAVE_READDIR_R |
983 |
X_UNLOCK (readdirlock); |
984 |
#endif |
985 |
#if !HAVE_PREADWRITE |
986 |
X_UNLOCK (preadwritelock); |
987 |
#endif |
988 |
X_UNLOCK (reslock); |
989 |
X_UNLOCK (reqlock); |
990 |
X_UNLOCK (wrklock); |
991 |
} |
992 |
|
993 |
static void eio_atfork_child (void) |
994 |
{ |
995 |
eio_req *prv; |
996 |
|
997 |
while (prv = reqq_shift (&req_queue)) |
998 |
eio_destroy (prv); |
999 |
|
1000 |
while (prv = reqq_shift (&res_queue)) |
1001 |
eio_destroy (prv); |
1002 |
|
1003 |
while (wrk_first.next != &wrk_first) |
1004 |
{ |
1005 |
worker *wrk = wrk_first.next; |
1006 |
|
1007 |
if (wrk->req) |
1008 |
eio_destroy (wrk->req); |
1009 |
|
1010 |
worker_clear (wrk); |
1011 |
worker_free (wrk); |
1012 |
} |
1013 |
|
1014 |
started = 0; |
1015 |
idle = 0; |
1016 |
nreqs = 0; |
1017 |
nready = 0; |
1018 |
npending = 0; |
1019 |
|
1020 |
eio_atfork_parent (); |
1021 |
} |
1022 |
|
1023 |
int eio_init (void (*want_poll)(void), void (*done_poll)(void)) |
1024 |
{ |
1025 |
want_poll_cb = want_poll; |
1026 |
done_poll_cb = done_poll; |
1027 |
|
1028 |
#ifdef _WIN32 |
1029 |
X_MUTEX_CHECK (wrklock); |
1030 |
X_MUTEX_CHECK (reslock); |
1031 |
X_MUTEX_CHECK (reqlock); |
1032 |
X_MUTEX_CHECK (reqwait); |
1033 |
X_MUTEX_CHECK (preadwritelock); |
1034 |
X_MUTEX_CHECK (readdirlock); |
1035 |
|
1036 |
X_COND_CHECK (reqwait); |
1037 |
#endif |
1038 |
|
1039 |
X_THREAD_ATFORK (eio_atfork_prepare, eio_atfork_parent, eio_atfork_child); |
1040 |
} |
1041 |
|
1042 |
static void eio_api_destroy (eio_req *req) |
1043 |
{ |
1044 |
free (req); |
1045 |
} |
1046 |
|
1047 |
#define REQ(rtype) \ |
1048 |
eio_req *req; \ |
1049 |
\ |
1050 |
req = (eio_req *)calloc (1, sizeof *req); \ |
1051 |
if (!req) \ |
1052 |
return 0; \ |
1053 |
\ |
1054 |
req->type = rtype; \ |
1055 |
req->pri = EIO_DEFAULT_PRI + EIO_PRI_BIAS; \ |
1056 |
req->finish = cb; \ |
1057 |
req->destroy = eio_api_destroy; |
1058 |
|
1059 |
#define SEND eio_submit (req); return req |
1060 |
|
1061 |
#define PATH \ |
1062 |
req->flags |= EIO_FLAG_PTR1_FREE; \ |
1063 |
req->ptr1 = strdup (path); \ |
1064 |
if (!req->ptr1) \ |
1065 |
{ \ |
1066 |
eio_api_destroy (req); \ |
1067 |
return 0; \ |
1068 |
} |
1069 |
|
1070 |
eio_req *eio_nop (eio_cb cb) |
1071 |
{ |
1072 |
REQ (EIO_NOP); SEND; |
1073 |
} |
1074 |
|
1075 |
eio_req *eio_busy (double delay, eio_cb cb) |
1076 |
{ |
1077 |
REQ (EIO_BUSY); req->nv1 = delay; SEND; |
1078 |
} |
1079 |
|
1080 |
eio_req *eio_sync (eio_cb cb) |
1081 |
{ |
1082 |
REQ (EIO_SYNC); SEND; |
1083 |
} |
1084 |
|
1085 |
eio_req *eio_fsync (int fd, eio_cb cb) |
1086 |
{ |
1087 |
REQ (EIO_FSYNC); req->int1 = fd; SEND; |
1088 |
} |
1089 |
|
1090 |
eio_req *eio_fdatasync (int fd, eio_cb cb) |
1091 |
{ |
1092 |
REQ (EIO_FDATASYNC); req->int1 = fd; SEND; |
1093 |
} |
1094 |
|
1095 |
eio_req *eio_close (int fd, eio_cb cb) |
1096 |
{ |
1097 |
REQ (EIO_CLOSE); req->int1 = fd; SEND; |
1098 |
} |
1099 |
|
1100 |
eio_req *eio_readahead (int fd, off_t offset, size_t length, eio_cb cb) |
1101 |
{ |
1102 |
REQ (EIO_READAHEAD); req->int1 = fd; req->offs = offset; req->size = length; SEND; |
1103 |
} |
1104 |
|
1105 |
eio_req *eio_read (int fd, void *data, size_t length, off_t offset, eio_cb cb) |
1106 |
{ |
1107 |
REQ (EIO_READ); req->int1 = fd; req->offs = offset; req->size = length; req->ptr2 = data; SEND; |
1108 |
} |
1109 |
|
1110 |
eio_req *eio_write (int fd, void *data, size_t length, off_t offset, eio_cb cb) |
1111 |
{ |
1112 |
REQ (EIO_WRITE); req->int1 = fd; req->offs = offset; req->size = length; req->ptr2 = data; SEND; |
1113 |
} |
1114 |
|
1115 |
eio_req *eio_fstat (int fd, eio_cb cb) |
1116 |
{ |
1117 |
REQ (EIO_FSTAT); req->int1 = fd; SEND; |
1118 |
} |
1119 |
|
1120 |
eio_req *eio_futime (int fd, double atime, double mtime, eio_cb cb) |
1121 |
{ |
1122 |
REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; |
1123 |
} |
1124 |
|
1125 |
eio_req *eio_ftruncate (int fd, off_t offset, eio_cb cb) |
1126 |
{ |
1127 |
REQ (EIO_FTRUNCATE); req->int1 = fd; req->offs = offset; SEND; |
1128 |
} |
1129 |
|
1130 |
eio_req *eio_fchmod (int fd, mode_t mode, eio_cb cb) |
1131 |
{ |
1132 |
REQ (EIO_FCHMOD); req->int1 = fd; req->int2 = (long)mode; SEND; |
1133 |
} |
1134 |
|
1135 |
eio_req *eio_fchown (int fd, uid_t uid, gid_t gid, eio_cb cb) |
1136 |
{ |
1137 |
REQ (EIO_FCHOWN); req->int1 = fd; req->int2 = (long)uid; req->int3 = (long)gid; SEND; |
1138 |
} |
1139 |
|
1140 |
eio_req *eio_dup2 (int fd, int fd2, eio_cb cb) |
1141 |
{ |
1142 |
REQ (EIO_DUP2); req->int1 = fd; req->int2 = fd2; SEND; |
1143 |
} |
1144 |
|
1145 |
eio_req *eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, eio_cb cb) |
1146 |
{ |
1147 |
REQ (EIO_SENDFILE); req->int1 = out_fd; req->int2 = in_fd; req->offs = in_offset; req->size = length; SEND; |
1148 |
} |
1149 |
|
1150 |
eio_req *eio_open (const char *path, int flags, mode_t mode, eio_cb cb) |
1151 |
{ |
1152 |
REQ (EIO_OPEN); PATH; req->int1 = flags; req->int2 = (long)mode; SEND; |
1153 |
} |
1154 |
|
1155 |
eio_req *eio_utime (const char *path, double atime, double mtime, eio_cb cb) |
1156 |
{ |
1157 |
REQ (EIO_UTIME); PATH; req->nv1 = atime; req->nv2 = mtime; SEND; |
1158 |
} |
1159 |
|
1160 |
eio_req *eio_truncate (const char *path, off_t offset, eio_cb cb) |
1161 |
{ |
1162 |
REQ (EIO_TRUNCATE); PATH; req->offs = offset; SEND; |
1163 |
} |
1164 |
|
1165 |
eio_req *eio_chown (const char *path, uid_t uid, gid_t gid, eio_cb cb) |
1166 |
{ |
1167 |
REQ (EIO_CHOWN); PATH; req->int2 = (long)uid; req->int3 = (long)gid; SEND; |
1168 |
} |
1169 |
|
1170 |
eio_req *eio_chmod (const char *path, mode_t mode, eio_cb cb) |
1171 |
{ |
1172 |
REQ (EIO_CHMOD); PATH; req->int2 = (long)mode; SEND; |
1173 |
} |
1174 |
|
1175 |
eio_req *eio_mkdir (const char *path, mode_t mode, eio_cb cb) |
1176 |
{ |
1177 |
REQ (EIO_MKDIR); PATH; req->int2 = (long)mode; SEND; |
1178 |
} |
1179 |
|
1180 |
static eio_req * |
1181 |
eio__1path (int type, const char *path, eio_cb cb) |
1182 |
{ |
1183 |
REQ (type); PATH; SEND; |
1184 |
} |
1185 |
|
1186 |
eio_req *eio_readlink (const char *path, eio_cb cb) |
1187 |
{ |
1188 |
return eio__1path (EIO_READLINK, path, cb); |
1189 |
} |
1190 |
|
1191 |
eio_req *eio_stat (const char *path, eio_cb cb) |
1192 |
{ |
1193 |
return eio__1path (EIO_STAT, path, cb); |
1194 |
} |
1195 |
|
1196 |
eio_req *eio_lstat (const char *path, eio_cb cb) |
1197 |
{ |
1198 |
return eio__1path (EIO_LSTAT, path, cb); |
1199 |
} |
1200 |
|
1201 |
eio_req *eio_unlink (const char *path, eio_cb cb) |
1202 |
{ |
1203 |
return eio__1path (EIO_UNLINK, path, cb); |
1204 |
} |
1205 |
|
1206 |
eio_req *eio_rmdir (const char *path, eio_cb cb) |
1207 |
{ |
1208 |
return eio__1path (EIO_RMDIR, path, cb); |
1209 |
} |
1210 |
|
1211 |
eio_req *eio_readdir (const char *path, eio_cb cb) |
1212 |
{ |
1213 |
return eio__1path (EIO_READDIR, path, cb); |
1214 |
} |
1215 |
|
1216 |
eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, eio_cb cb) |
1217 |
{ |
1218 |
REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->int2 = (long)dev; SEND; |
1219 |
} |
1220 |
|
1221 |
static eio_req * |
1222 |
eio__2path (int type, const char *path, const char *new_path, eio_cb cb) |
1223 |
{ |
1224 |
REQ (type); PATH; |
1225 |
|
1226 |
req->flags |= EIO_FLAG_PTR2_FREE; |
1227 |
req->ptr2 = strdup (new_path); |
1228 |
if (!req->ptr2) |
1229 |
{ |
1230 |
eio_api_destroy (req); |
1231 |
return 0; |
1232 |
} |
1233 |
|
1234 |
SEND; |
1235 |
} |
1236 |
|
1237 |
eio_req *eio_link (const char *path, const char *new_path, eio_cb cb) |
1238 |
{ |
1239 |
return eio__2path (EIO_LINK, path, new_path, cb); |
1240 |
} |
1241 |
|
1242 |
eio_req *eio_symlink (const char *path, const char *new_path, eio_cb cb) |
1243 |
{ |
1244 |
return eio__2path (EIO_SYMLINK, path, new_path, cb); |
1245 |
} |
1246 |
|
1247 |
eio_req *eio_rename (const char *path, const char *new_path, eio_cb cb) |
1248 |
{ |
1249 |
return eio__2path (EIO_RENAME, path, new_path, cb); |
1250 |
} |
1251 |
|
1252 |
#undef REQ |
1253 |
#undef PATH |
1254 |
#undef SEND |
1255 |
|
1256 |
/*****************************************************************************/ |
1257 |
/* grp functions */ |
1258 |
|
1259 |
void eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit) |
1260 |
{ |
1261 |
grp->int2 = limit; |
1262 |
grp->feed = feed; |
1263 |
|
1264 |
grp_try_feed (grp); |
1265 |
} |
1266 |
|
1267 |
void eio_grp_limit (eio_req *grp, int limit) |
1268 |
{ |
1269 |
grp->int2 = limit; |
1270 |
|
1271 |
grp_try_feed (grp); |
1272 |
} |
1273 |
|
1274 |
void eio_grp_add (eio_req *grp, eio_req *req) |
1275 |
{ |
1276 |
assert (("cannot add requests to IO::AIO::GRP after the group finished", grp->int1 != 2)); |
1277 |
|
1278 |
++grp->size; |
1279 |
req->grp = grp; |
1280 |
|
1281 |
req->grp_prev = 0; |
1282 |
req->grp_next = grp->grp_first; |
1283 |
|
1284 |
if (grp->grp_first) |
1285 |
grp->grp_first->grp_prev = req; |
1286 |
|
1287 |
grp->grp_first = req; |
1288 |
} |
1289 |
|
1290 |
/*****************************************************************************/ |
1291 |
/* misc garbage */ |
1292 |
|
1293 |
ssize_t eio_sendfile_sync (int ofd, int ifd, off_t offset, size_t count) |
1294 |
{ |
1295 |
worker wrk; |
1296 |
|
1297 |
wrk.dbuf = 0; |
1298 |
|
1299 |
eio__sendfile (ofd, ifd, offset, count, &wrk); |
1300 |
|
1301 |
if (wrk.dbuf) |
1302 |
free (wrk.dbuf); |
1303 |
} |
1304 |
|