1 | /* |
1 | /* |
2 | * libeio implementation |
2 | * libeio implementation |
3 | * |
3 | * |
4 | * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libeio@schmorp.de> |
4 | * Copyright (c) 2007,2008,2009,2010 Marc Alexander Lehmann <libeio@schmorp.de> |
5 | * All rights reserved. |
5 | * All rights reserved. |
6 | * |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without modifica- |
7 | * Redistribution and use in source and binary forms, with or without modifica- |
8 | * tion, are permitted provided that the following conditions are met: |
8 | * tion, are permitted provided that the following conditions are met: |
9 | * |
9 | * |
… | |
… | |
49 | #include <stdlib.h> |
49 | #include <stdlib.h> |
50 | #include <string.h> |
50 | #include <string.h> |
51 | #include <errno.h> |
51 | #include <errno.h> |
52 | #include <sys/types.h> |
52 | #include <sys/types.h> |
53 | #include <sys/stat.h> |
53 | #include <sys/stat.h> |
|
|
54 | #include <sys/statvfs.h> |
54 | #include <limits.h> |
55 | #include <limits.h> |
55 | #include <fcntl.h> |
56 | #include <fcntl.h> |
56 | #include <assert.h> |
57 | #include <assert.h> |
57 | |
58 | |
58 | #ifndef EIO_FINISH |
59 | #ifndef EIO_FINISH |
… | |
… | |
80 | # include <utime.h> |
81 | # include <utime.h> |
81 | # include <signal.h> |
82 | # include <signal.h> |
82 | # include <dirent.h> |
83 | # include <dirent.h> |
83 | |
84 | |
84 | /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ |
85 | /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ |
85 | # if __freebsd || defined __NetBSD__ || defined __OpenBSD__ |
86 | # if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ |
86 | # define _DIRENT_HAVE_D_TYPE /* sigh */ |
87 | # define _DIRENT_HAVE_D_TYPE /* sigh */ |
87 | # define D_INO(de) (de)->d_fileno |
88 | # define D_INO(de) (de)->d_fileno |
88 | # define D_NAMLEN(de) (de)->d_namlen |
89 | # define D_NAMLEN(de) (de)->d_namlen |
89 | # elif __linux || defined d_ino || _XOPEN_SOURCE >= 600 |
90 | # elif __linux || defined d_ino || _XOPEN_SOURCE >= 600 |
90 | # define D_INO(de) (de)->d_ino |
91 | # define D_INO(de) (de)->d_ino |
… | |
… | |
106 | #endif |
107 | #endif |
107 | |
108 | |
108 | #if HAVE_SENDFILE |
109 | #if HAVE_SENDFILE |
109 | # if __linux |
110 | # if __linux |
110 | # include <sys/sendfile.h> |
111 | # include <sys/sendfile.h> |
111 | # elif __freebsd || defined __APPLE__ |
112 | # elif __FreeBSD__ || defined __APPLE__ |
112 | # include <sys/socket.h> |
113 | # include <sys/socket.h> |
113 | # include <sys/uio.h> |
114 | # include <sys/uio.h> |
114 | # elif __hpux |
115 | # elif __hpux |
115 | # include <sys/socket.h> |
116 | # include <sys/socket.h> |
116 | # elif __solaris |
117 | # elif __solaris |
117 | # include <sys/sendfile.h> |
118 | # include <sys/sendfile.h> |
118 | # elif defined _WIN32 |
|
|
119 | # else |
119 | # else |
120 | # error sendfile support requested but not available |
120 | # error sendfile support requested but not available |
121 | # endif |
121 | # endif |
122 | #endif |
122 | #endif |
123 | |
123 | |
… | |
… | |
135 | #define IDLE_TIMEOUT 10 |
135 | #define IDLE_TIMEOUT 10 |
136 | |
136 | |
137 | /* used for struct dirent, AIX doesn't provide it */ |
137 | /* used for struct dirent, AIX doesn't provide it */ |
138 | #ifndef NAME_MAX |
138 | #ifndef NAME_MAX |
139 | # define NAME_MAX 4096 |
139 | # define NAME_MAX 4096 |
|
|
140 | #endif |
|
|
141 | |
|
|
142 | /* used for readlink etc. */ |
|
|
143 | #ifndef PATH_MAX |
|
|
144 | # define PATH_MAX 4096 |
140 | #endif |
145 | #endif |
141 | |
146 | |
142 | /* buffer size for various temporary buffers */ |
147 | /* buffer size for various temporary buffers */ |
143 | #define EIO_BUFSIZE 65536 |
148 | #define EIO_BUFSIZE 65536 |
144 | |
149 | |
… | |
… | |
217 | static volatile unsigned int nreqs; /* reqlock */ |
222 | static volatile unsigned int nreqs; /* reqlock */ |
218 | static volatile unsigned int nready; /* reqlock */ |
223 | static volatile unsigned int nready; /* reqlock */ |
219 | static volatile unsigned int npending; /* reqlock */ |
224 | static volatile unsigned int npending; /* reqlock */ |
220 | static volatile unsigned int max_idle = 4; |
225 | static volatile unsigned int max_idle = 4; |
221 | |
226 | |
222 | static mutex_t wrklock = X_MUTEX_INIT; |
227 | static xmutex_t wrklock = X_MUTEX_INIT; |
223 | static mutex_t reslock = X_MUTEX_INIT; |
228 | static xmutex_t reslock = X_MUTEX_INIT; |
224 | static mutex_t reqlock = X_MUTEX_INIT; |
229 | static xmutex_t reqlock = X_MUTEX_INIT; |
225 | static cond_t reqwait = X_COND_INIT; |
230 | static xcond_t reqwait = X_COND_INIT; |
226 | |
231 | |
227 | #if !HAVE_PREADWRITE |
232 | #if !HAVE_PREADWRITE |
228 | /* |
233 | /* |
229 | * make our pread/pwrite emulation safe against themselves, but not against |
234 | * make our pread/pwrite emulation safe against themselves, but not against |
230 | * normal read/write by using a mutex. slows down execution a lot, |
235 | * normal read/write by using a mutex. slows down execution a lot, |
231 | * but that's your problem, not mine. |
236 | * but that's your problem, not mine. |
232 | */ |
237 | */ |
233 | static mutex_t preadwritelock = X_MUTEX_INIT; |
238 | static xmutex_t preadwritelock = X_MUTEX_INIT; |
234 | #endif |
239 | #endif |
235 | |
240 | |
236 | typedef struct etp_worker |
241 | typedef struct etp_worker |
237 | { |
242 | { |
238 | /* locked by wrklock */ |
243 | /* locked by wrklock */ |
239 | struct etp_worker *prev, *next; |
244 | struct etp_worker *prev, *next; |
240 | |
245 | |
241 | thread_t tid; |
246 | xthread_t tid; |
242 | |
247 | |
243 | /* locked by reslock, reqlock or wrklock */ |
248 | /* locked by reslock, reqlock or wrklock */ |
244 | ETP_REQ *req; /* currently processed request */ |
249 | ETP_REQ *req; /* currently processed request */ |
245 | |
250 | |
246 | ETP_WORKER_COMMON |
251 | ETP_WORKER_COMMON |
… | |
… | |
910 | |
915 | |
911 | #if HAVE_SENDFILE |
916 | #if HAVE_SENDFILE |
912 | # if __linux |
917 | # if __linux |
913 | res = sendfile (ofd, ifd, &offset, count); |
918 | res = sendfile (ofd, ifd, &offset, count); |
914 | |
919 | |
915 | # elif __freebsd |
920 | # elif __FreeBSD__ |
916 | /* |
921 | /* |
917 | * Of course, the freebsd sendfile is a dire hack with no thoughts |
922 | * Of course, the freebsd sendfile is a dire hack with no thoughts |
918 | * wasted on making it similar to other I/O functions. |
923 | * wasted on making it similar to other I/O functions. |
919 | */ |
924 | */ |
920 | { |
925 | { |
921 | off_t sbytes; |
926 | off_t sbytes; |
922 | res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); |
927 | res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); |
923 | |
928 | |
924 | if (res < 0 && sbytes) |
929 | #if 0 /* according to the manpage, this is correct, but broken behaviour */ |
925 | /* maybe only on EAGAIN: as usual, the manpage leaves you guessing */ |
930 | /* freebsd' sendfile will return 0 on success */ |
|
|
931 | /* freebsd 8 documents it as only setting *sbytes on EINTR and EAGAIN, but */ |
|
|
932 | /* not on e.g. EIO or EPIPE - sounds broken */ |
|
|
933 | if ((res < 0 && (errno == EAGAIN || errno == EINTR) && sbytes) || res == 0) |
|
|
934 | res = sbytes; |
|
|
935 | #endif |
|
|
936 | |
|
|
937 | /* according to source inspection, this is correct, and useful behaviour */ |
|
|
938 | if (sbytes) |
926 | res = sbytes; |
939 | res = sbytes; |
927 | } |
940 | } |
928 | |
941 | |
929 | # elif defined __APPLE__ |
942 | # elif defined (__APPLE__) |
930 | |
943 | |
931 | { |
944 | { |
932 | off_t bytes = count; |
945 | off_t sbytes = count; |
933 | res = sendfile (ifd, ofd, offset, &bytes, 0, 0); |
946 | res = sendfile (ifd, ofd, offset, &sbytes, 0, 0); |
934 | |
947 | |
935 | if (res < 0 && errno == EAGAIN && bytes) |
948 | /* according to the manpage, sbytes is always valid */ |
|
|
949 | if (sbytes) |
936 | res = sbytes; |
950 | res = sbytes; |
937 | } |
951 | } |
938 | |
952 | |
939 | # elif __hpux |
953 | # elif __hpux |
940 | res = sendfile (ofd, ifd, offset, count, 0, 0); |
954 | res = sendfile (ofd, ifd, offset, count, 0, 0); |
… | |
… | |
954 | if (res < 0 && sbytes) |
968 | if (res < 0 && sbytes) |
955 | res = sbytes; |
969 | res = sbytes; |
956 | } |
970 | } |
957 | |
971 | |
958 | # endif |
972 | # endif |
|
|
973 | |
959 | #elif defined _WIN32 |
974 | #elif defined (_WIN32) |
960 | |
975 | |
961 | /* does not work, just for documentation of what would need to be done */ |
976 | /* does not work, just for documentation of what would need to be done */ |
962 | { |
977 | { |
963 | HANDLE h = TO_SOCKET (ifd); |
978 | HANDLE h = TO_SOCKET (ifd); |
964 | SetFilePointer (h, offset, 0, FILE_BEGIN); |
979 | SetFilePointer (h, offset, 0, FILE_BEGIN); |
… | |
… | |
970 | errno = ENOSYS; |
985 | errno = ENOSYS; |
971 | #endif |
986 | #endif |
972 | |
987 | |
973 | if (res < 0 |
988 | if (res < 0 |
974 | && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK |
989 | && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK |
|
|
990 | /* BSDs */ |
|
|
991 | #ifdef ENOTSUP /* sigh, if the steenking pile called openbsd would only try to at least compile posix code... */ |
|
|
992 | || errno == ENOTSUP |
|
|
993 | #endif |
|
|
994 | || errno == EOPNOTSUPP /* BSDs */ |
975 | #if __solaris |
995 | #if __solaris |
976 | || errno == EAFNOSUPPORT || errno == EPROTOTYPE |
996 | || errno == EAFNOSUPPORT || errno == EPROTOTYPE |
977 | #endif |
997 | #endif |
978 | ) |
998 | ) |
979 | ) |
999 | ) |
… | |
… | |
1373 | } |
1393 | } |
1374 | } |
1394 | } |
1375 | } |
1395 | } |
1376 | |
1396 | |
1377 | #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) |
1397 | #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) |
1378 | # undef msync |
|
|
1379 | # define msync(a,b,c) ((errno = ENOSYS), -1) |
1398 | # define eio__msync(a,b,c) ((errno = ENOSYS), -1) |
|
|
1399 | #else |
|
|
1400 | |
|
|
1401 | int |
|
|
1402 | eio__msync (void *mem, size_t len, int flags) |
|
|
1403 | { |
|
|
1404 | if (EIO_MS_ASYNC != MS_SYNC |
|
|
1405 | || EIO_MS_INVALIDATE != MS_INVALIDATE |
|
|
1406 | || EIO_MS_SYNC != MS_SYNC) |
|
|
1407 | { |
|
|
1408 | flags = 0 |
|
|
1409 | | (flags & EIO_MS_ASYNC ? MS_ASYNC : 0) |
|
|
1410 | | (flags & EIO_MS_INVALIDATE ? MS_INVALIDATE : 0) |
|
|
1411 | | (flags & EIO_MS_SYNC ? MS_SYNC : 0); |
|
|
1412 | } |
|
|
1413 | |
|
|
1414 | return msync (mem, len, flags); |
|
|
1415 | } |
|
|
1416 | |
1380 | #endif |
1417 | #endif |
1381 | |
1418 | |
1382 | int |
1419 | int |
1383 | eio__mtouch (void *mem, size_t len, int flags) |
1420 | eio__mtouch (void *mem, size_t len, int flags) |
1384 | { |
1421 | { |
… | |
… | |
1395 | |
1432 | |
1396 | /* round down to start of page, although this is probably useless */ |
1433 | /* round down to start of page, although this is probably useless */ |
1397 | addr &= ~(page - 1); /* assume page size is always a power of two */ |
1434 | addr &= ~(page - 1); /* assume page size is always a power of two */ |
1398 | |
1435 | |
1399 | if (addr < end) |
1436 | if (addr < end) |
1400 | if (flags) /* modify */ |
1437 | if (flags & EIO_MT_MODIFY) /* modify */ |
1401 | do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); |
1438 | do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); |
1402 | else |
1439 | else |
1403 | do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); |
1440 | do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); |
1404 | |
1441 | |
1405 | return 0; |
1442 | return 0; |
… | |
… | |
1553 | case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1590 | case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1554 | req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1591 | req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1555 | case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1592 | case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); |
1556 | req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1593 | req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; |
1557 | |
1594 | |
|
|
1595 | case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); |
|
|
1596 | req->result = statvfs (req->ptr1, (EIO_STRUCT_STATVFS *)req->ptr2); break; |
|
|
1597 | case EIO_FSTATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); |
|
|
1598 | req->result = fstatvfs (req->int1, (EIO_STRUCT_STATVFS *)req->ptr2); break; |
|
|
1599 | |
1558 | case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; |
1600 | case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; |
1559 | case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; |
1601 | case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; |
1560 | case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; |
1602 | case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; |
1561 | case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; |
1603 | case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; |
1562 | case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break; |
1604 | case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break; |
… | |
… | |
1571 | case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; |
1613 | case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; |
1572 | case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; |
1614 | case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; |
1573 | case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; |
1615 | case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; |
1574 | case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; |
1616 | case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; |
1575 | |
1617 | |
1576 | case EIO_READLINK: ALLOC (NAME_MAX); |
1618 | case EIO_READLINK: ALLOC (PATH_MAX); |
1577 | req->result = readlink (req->ptr1, req->ptr2, NAME_MAX); break; |
1619 | req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break; |
1578 | |
1620 | |
1579 | case EIO_SYNC: req->result = 0; sync (); break; |
1621 | case EIO_SYNC: req->result = 0; sync (); break; |
1580 | case EIO_FSYNC: req->result = fsync (req->int1); break; |
1622 | case EIO_FSYNC: req->result = fsync (req->int1); break; |
1581 | case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; |
1623 | case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; |
1582 | case EIO_MSYNC: req->result = msync (req->ptr2, req->size, req->int1); break; |
1624 | case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break; |
1583 | case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; |
1625 | case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; |
1584 | case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; |
1626 | case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; |
1585 | |
1627 | |
1586 | case EIO_READDIR: eio__scandir (req, self); break; |
1628 | case EIO_READDIR: eio__scandir (req, self); break; |
1587 | |
1629 | |
… | |
… | |
1709 | eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) |
1751 | eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) |
1710 | { |
1752 | { |
1711 | REQ (EIO_FSTAT); req->int1 = fd; SEND; |
1753 | REQ (EIO_FSTAT); req->int1 = fd; SEND; |
1712 | } |
1754 | } |
1713 | |
1755 | |
|
|
1756 | eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) |
|
|
1757 | { |
|
|
1758 | REQ (EIO_FSTATVFS); req->int1 = fd; SEND; |
|
|
1759 | } |
|
|
1760 | |
1714 | eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) |
1761 | eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) |
1715 | { |
1762 | { |
1716 | REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; |
1763 | REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; |
1717 | } |
1764 | } |
1718 | |
1765 | |
… | |
… | |
1788 | } |
1835 | } |
1789 | |
1836 | |
1790 | eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) |
1837 | eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) |
1791 | { |
1838 | { |
1792 | return eio__1path (EIO_LSTAT, path, pri, cb, data); |
1839 | return eio__1path (EIO_LSTAT, path, pri, cb, data); |
|
|
1840 | } |
|
|
1841 | |
|
|
1842 | eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data) |
|
|
1843 | { |
|
|
1844 | return eio__1path (EIO_STATVFS, path, pri, cb, data); |
1793 | } |
1845 | } |
1794 | |
1846 | |
1795 | eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) |
1847 | eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) |
1796 | { |
1848 | { |
1797 | return eio__1path (EIO_UNLINK, path, pri, cb, data); |
1849 | return eio__1path (EIO_UNLINK, path, pri, cb, data); |