1 | /* |
1 | /* |
2 | * libeio implementation |
2 | * libeio implementation |
3 | * |
3 | * |
4 | * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libeio@schmorp.de> |
4 | * Copyright (c) 2007,2008,2009,2010 Marc Alexander Lehmann <libeio@schmorp.de> |
5 | * All rights reserved. |
5 | * All rights reserved. |
6 | * |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without modifica- |
7 | * Redistribution and use in source and binary forms, with or without modifica- |
8 | * tion, are permitted provided that the following conditions are met: |
8 | * tion, are permitted provided that the following conditions are met: |
9 | * |
9 | * |
… | |
… | |
49 | #include <stdlib.h> |
49 | #include <stdlib.h> |
50 | #include <string.h> |
50 | #include <string.h> |
51 | #include <errno.h> |
51 | #include <errno.h> |
52 | #include <sys/types.h> |
52 | #include <sys/types.h> |
53 | #include <sys/stat.h> |
53 | #include <sys/stat.h> |
|
|
54 | #include <sys/statvfs.h> |
54 | #include <limits.h> |
55 | #include <limits.h> |
55 | #include <fcntl.h> |
56 | #include <fcntl.h> |
56 | #include <assert.h> |
57 | #include <assert.h> |
57 | |
58 | |
58 | #ifndef EIO_FINISH |
59 | #ifndef EIO_FINISH |
… | |
… | |
80 | # include <utime.h> |
81 | # include <utime.h> |
81 | # include <signal.h> |
82 | # include <signal.h> |
82 | # include <dirent.h> |
83 | # include <dirent.h> |
83 | |
84 | |
84 | /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ |
85 | /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ |
85 | # if __freebsd || defined __NetBSD__ || defined __OpenBSD__ |
86 | # if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ |
86 | # define _DIRENT_HAVE_D_TYPE /* sigh */ |
87 | # define _DIRENT_HAVE_D_TYPE /* sigh */ |
87 | # define D_INO(de) (de)->d_fileno |
88 | # define D_INO(de) (de)->d_fileno |
88 | # define D_NAMLEN(de) (de)->d_namlen |
89 | # define D_NAMLEN(de) (de)->d_namlen |
89 | # elif __linux || defined d_ino || _XOPEN_SOURCE >= 600 |
90 | # elif __linux || defined d_ino || _XOPEN_SOURCE >= 600 |
90 | # define D_INO(de) (de)->d_ino |
91 | # define D_INO(de) (de)->d_ino |
… | |
… | |
106 | #endif |
107 | #endif |
107 | |
108 | |
108 | #if HAVE_SENDFILE |
109 | #if HAVE_SENDFILE |
109 | # if __linux |
110 | # if __linux |
110 | # include <sys/sendfile.h> |
111 | # include <sys/sendfile.h> |
111 | # elif __freebsd || defined __APPLE__ |
112 | # elif __FreeBSD__ || defined __APPLE__ |
112 | # include <sys/socket.h> |
113 | # include <sys/socket.h> |
113 | # include <sys/uio.h> |
114 | # include <sys/uio.h> |
114 | # elif __hpux |
115 | # elif __hpux |
115 | # include <sys/socket.h> |
116 | # include <sys/socket.h> |
116 | # elif __solaris |
117 | # elif __solaris |
… | |
… | |
134 | #define IDLE_TIMEOUT 10 |
135 | #define IDLE_TIMEOUT 10 |
135 | |
136 | |
136 | /* used for struct dirent, AIX doesn't provide it */ |
137 | /* used for struct dirent, AIX doesn't provide it */ |
137 | #ifndef NAME_MAX |
138 | #ifndef NAME_MAX |
138 | # define NAME_MAX 4096 |
139 | # define NAME_MAX 4096 |
|
|
140 | #endif |
|
|
141 | |
|
|
142 | /* used for readlink etc. */ |
|
|
143 | #ifndef PATH_MAX |
|
|
144 | # define PATH_MAX 4096 |
139 | #endif |
145 | #endif |
140 | |
146 | |
141 | /* buffer size for various temporary buffers */ |
147 | /* buffer size for various temporary buffers */ |
142 | #define EIO_BUFSIZE 65536 |
148 | #define EIO_BUFSIZE 65536 |
143 | |
149 | |
… | |
… | |
216 | static volatile unsigned int nreqs; /* reqlock */ |
222 | static volatile unsigned int nreqs; /* reqlock */ |
217 | static volatile unsigned int nready; /* reqlock */ |
223 | static volatile unsigned int nready; /* reqlock */ |
218 | static volatile unsigned int npending; /* reqlock */ |
224 | static volatile unsigned int npending; /* reqlock */ |
219 | static volatile unsigned int max_idle = 4; |
225 | static volatile unsigned int max_idle = 4; |
220 | |
226 | |
221 | static mutex_t wrklock = X_MUTEX_INIT; |
227 | static xmutex_t wrklock = X_MUTEX_INIT; |
222 | static mutex_t reslock = X_MUTEX_INIT; |
228 | static xmutex_t reslock = X_MUTEX_INIT; |
223 | static mutex_t reqlock = X_MUTEX_INIT; |
229 | static xmutex_t reqlock = X_MUTEX_INIT; |
224 | static cond_t reqwait = X_COND_INIT; |
230 | static xcond_t reqwait = X_COND_INIT; |
225 | |
231 | |
226 | #if !HAVE_PREADWRITE |
232 | #if !HAVE_PREADWRITE |
227 | /* |
233 | /* |
228 | * make our pread/pwrite emulation safe against themselves, but not against |
234 | * make our pread/pwrite emulation safe against themselves, but not against |
229 | * normal read/write by using a mutex. slows down execution a lot, |
235 | * normal read/write by using a mutex. slows down execution a lot, |
230 | * but that's your problem, not mine. |
236 | * but that's your problem, not mine. |
231 | */ |
237 | */ |
232 | static mutex_t preadwritelock = X_MUTEX_INIT; |
238 | static xmutex_t preadwritelock = X_MUTEX_INIT; |
233 | #endif |
239 | #endif |
234 | |
240 | |
235 | typedef struct etp_worker |
241 | typedef struct etp_worker |
236 | { |
242 | { |
237 | /* locked by wrklock */ |
243 | /* locked by wrklock */ |
238 | struct etp_worker *prev, *next; |
244 | struct etp_worker *prev, *next; |
239 | |
245 | |
240 | thread_t tid; |
246 | xthread_t tid; |
241 | |
247 | |
242 | /* locked by reslock, reqlock or wrklock */ |
248 | /* locked by reslock, reqlock or wrklock */ |
243 | ETP_REQ *req; /* currently processed request */ |
249 | ETP_REQ *req; /* currently processed request */ |
244 | |
250 | |
245 | ETP_WORKER_COMMON |
251 | ETP_WORKER_COMMON |
… | |
… | |
909 | |
915 | |
910 | #if HAVE_SENDFILE |
916 | #if HAVE_SENDFILE |
911 | # if __linux |
917 | # if __linux |
912 | res = sendfile (ofd, ifd, &offset, count); |
918 | res = sendfile (ofd, ifd, &offset, count); |
913 | |
919 | |
914 | # elif __freebsd |
920 | # elif __FreeBSD__ |
915 | /* |
921 | /* |
916 | * Of course, the freebsd sendfile is a dire hack with no thoughts |
922 | * Of course, the freebsd sendfile is a dire hack with no thoughts |
917 | * wasted on making it similar to other I/O functions. |
923 | * wasted on making it similar to other I/O functions. |
918 | */ |
924 | */ |
919 | { |
925 | { |
920 | off_t sbytes; |
926 | off_t sbytes; |
921 | res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); |
927 | res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); |
922 | |
928 | |
923 | if (res < 0 && sbytes) |
929 | #if 0 /* according to the manpage, this is correct, but broken behaviour */ |
924 | /* maybe only on EAGAIN: as usual, the manpage leaves you guessing */ |
930 | /* freebsd' sendfile will return 0 on success */ |
|
|
931 | /* freebsd 8 documents it as only setting *sbytes on EINTR and EAGAIN, but */ |
|
|
932 | /* not on e.g. EIO or EPIPE - sounds broken */ |
|
|
933 | if ((res < 0 && (errno == EAGAIN || errno == EINTR) && sbytes) || res == 0) |
|
|
934 | res = sbytes; |
|
|
935 | #endif |
|
|
936 | |
|
|
937 | /* according to source inspection, this is correct, and useful behaviour */ |
|
|
938 | if (sbytes) |
925 | res = sbytes; |
939 | res = sbytes; |
926 | } |
940 | } |
927 | |
941 | |
928 | # elif defined (__APPLE__) |
942 | # elif defined (__APPLE__) |
929 | |
943 | |
930 | { |
944 | { |
931 | off_t sbytes = count; |
945 | off_t sbytes = count; |
932 | res = sendfile (ifd, ofd, offset, &sbytes, 0, 0); |
946 | res = sendfile (ifd, ofd, offset, &sbytes, 0, 0); |
933 | |
947 | |
934 | if (res < 0 && errno == EAGAIN && sbytes) |
948 | /* according to the manpage, sbytes is always valid */ |
|
|
949 | if (sbytes) |
935 | res = sbytes; |
950 | res = sbytes; |
936 | } |
951 | } |
937 | |
952 | |
938 | # elif __hpux |
953 | # elif __hpux |
939 | res = sendfile (ofd, ifd, offset, count, 0, 0); |
954 | res = sendfile (ofd, ifd, offset, count, 0, 0); |
… | |
… | |
970 | errno = ENOSYS; |
985 | errno = ENOSYS; |
971 | #endif |
986 | #endif |
972 | |
987 | |
973 | if (res < 0 |
988 | if (res < 0 |
974 | && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK |
989 | && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK |
|
|
990 | /* BSDs */ |
|
|
991 | #ifdef ENOTSUP /* sigh, if the steenking pile called openbsd would only try to at least compile posix code... */ |
|
|
992 | || errno == ENOTSUP |
|
|
993 | #endif |
|
|
994 | || errno == EOPNOTSUPP /* BSDs */ |
975 | #if __solaris |
995 | #if __solaris |
976 | || errno == EAFNOSUPPORT || errno == EPROTOTYPE |
996 | || errno == EAFNOSUPPORT || errno == EPROTOTYPE |
977 | #endif |
997 | #endif |
978 | || errno == ENOTSUP || errno == EOPNOTSUPP |
|
|
979 | ) |
998 | ) |
980 | ) |
999 | ) |
981 | { |
1000 | { |
982 | /* emulate sendfile. this is a major pain in the ass */ |
1001 | /* emulate sendfile. this is a major pain in the ass */ |
983 | dBUF; |
1002 | dBUF; |
… | |
… | |
1374 | } |
1393 | } |
1375 | } |
1394 | } |
1376 | } |
1395 | } |
1377 | |
1396 | |
1378 | #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) |
1397 | #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) |
1379 | # undef msync |
|
|
1380 | # define msync(a,b,c) ((errno = ENOSYS), -1) |
1398 | # define eio__msync(a,b,c) ((errno = ENOSYS), -1) |
|
|
1399 | #else |
|
|
1400 | |
|
|
1401 | int |
|
|
1402 | eio__msync (void *mem, size_t len, int flags) |
|
|
1403 | { |
|
|
1404 | if (EIO_MS_ASYNC != MS_SYNC |
|
|
1405 | || EIO_MS_INVALIDATE != MS_INVALIDATE |
|
|
1406 | || EIO_MS_SYNC != MS_SYNC) |
|
|
1407 | { |
|
|
1408 | flags = 0 |
|
|
1409 | | (flags & EIO_MS_ASYNC ? MS_ASYNC : 0) |
|
|
1410 | | (flags & EIO_MS_INVALIDATE ? MS_INVALIDATE : 0) |
|
|
1411 | | (flags & EIO_MS_SYNC ? MS_SYNC : 0); |
|
|
1412 | } |
|
|
1413 | |
|
|
1414 | return msync (mem, len, flags); |
|
|
1415 | } |
|
|
1416 | |
1381 | #endif |
1417 | #endif |
1382 | |
1418 | |
1383 | int |
1419 | int |
1384 | eio__mtouch (void *mem, size_t len, int flags) |
1420 | eio__mtouch (void *mem, size_t len, int flags) |
1385 | { |
1421 | { |
… | |
… | |
1396 | |
1432 | |
1397 | /* round down to start of page, although this is probably useless */ |
1433 | /* round down to start of page, although this is probably useless */ |
1398 | addr &= ~(page - 1); /* assume page size is always a power of two */ |
1434 | addr &= ~(page - 1); /* assume page size is always a power of two */ |
1399 | |
1435 | |
1400 | if (addr < end) |
1436 | if (addr < end) |
1401 | if (flags) /* modify */ |
1437 | if (flags & EIO_MT_MODIFY) /* modify */ |
1402 | do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); |
1438 | do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); |
1403 | else |
1439 | else |
1404 | do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); |
1440 | do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); |
1405 | |
1441 | |
1406 | return 0; |
1442 | return 0; |
… | |
… | |
1554 | case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1590 | case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1555 | req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1591 | req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1556 | case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1592 | case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1557 | req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1593 | req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1558 | |
1594 | |
|
|
1595 | case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); |
|
|
1596 | req->result = statvfs (req->ptr1, (EIO_STRUCT_STATVFS *)req->ptr2); break; |
|
|
1597 | case EIO_FSTATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); |
|
|
1598 | req->result = fstatvfs (req->int1, (EIO_STRUCT_STATVFS *)req->ptr2); break; |
|
|
1599 | |
1559 | case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; |
1600 | case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; |
1560 | case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; |
1601 | case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; |
1561 | case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; |
1602 | case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; |
1562 | case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; |
1603 | case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; |
1563 | case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break; |
1604 | case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break; |
… | |
… | |
1572 | case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; |
1613 | case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; |
1573 | case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; |
1614 | case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; |
1574 | case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; |
1615 | case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; |
1575 | case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; |
1616 | case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; |
1576 | |
1617 | |
1577 | case EIO_READLINK: ALLOC (NAME_MAX); |
1618 | case EIO_READLINK: ALLOC (PATH_MAX); |
1578 | req->result = readlink (req->ptr1, req->ptr2, NAME_MAX); break; |
1619 | req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break; |
1579 | |
1620 | |
1580 | case EIO_SYNC: req->result = 0; sync (); break; |
1621 | case EIO_SYNC: req->result = 0; sync (); break; |
1581 | case EIO_FSYNC: req->result = fsync (req->int1); break; |
1622 | case EIO_FSYNC: req->result = fsync (req->int1); break; |
1582 | case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; |
1623 | case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; |
1583 | case EIO_MSYNC: req->result = msync (req->ptr2, req->size, req->int1); break; |
1624 | case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break; |
1584 | case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; |
1625 | case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; |
1585 | case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; |
1626 | case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; |
1586 | |
1627 | |
1587 | case EIO_READDIR: eio__scandir (req, self); break; |
1628 | case EIO_READDIR: eio__scandir (req, self); break; |
1588 | |
1629 | |
… | |
… | |
1710 | eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) |
1751 | eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) |
1711 | { |
1752 | { |
1712 | REQ (EIO_FSTAT); req->int1 = fd; SEND; |
1753 | REQ (EIO_FSTAT); req->int1 = fd; SEND; |
1713 | } |
1754 | } |
1714 | |
1755 | |
|
|
1756 | eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) |
|
|
1757 | { |
|
|
1758 | REQ (EIO_FSTATVFS); req->int1 = fd; SEND; |
|
|
1759 | } |
|
|
1760 | |
1715 | eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) |
1761 | eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) |
1716 | { |
1762 | { |
1717 | REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; |
1763 | REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; |
1718 | } |
1764 | } |
1719 | |
1765 | |
… | |
… | |
1789 | } |
1835 | } |
1790 | |
1836 | |
1791 | eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) |
1837 | eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) |
1792 | { |
1838 | { |
1793 | return eio__1path (EIO_LSTAT, path, pri, cb, data); |
1839 | return eio__1path (EIO_LSTAT, path, pri, cb, data); |
|
|
1840 | } |
|
|
1841 | |
|
|
1842 | eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data) |
|
|
1843 | { |
|
|
1844 | return eio__1path (EIO_STATVFS, path, pri, cb, data); |
1794 | } |
1845 | } |
1795 | |
1846 | |
1796 | eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) |
1847 | eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) |
1797 | { |
1848 | { |
1798 | return eio__1path (EIO_UNLINK, path, pri, cb, data); |
1849 | return eio__1path (EIO_UNLINK, path, pri, cb, data); |