ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libeio/eio.c
(Generate patch)

Comparing libeio/eio.c (file contents):
Revision 1.44 by root, Thu Nov 26 05:25:35 2009 UTC vs.
Revision 1.56 by root, Sun Sep 12 03:36:28 2010 UTC

1/* 1/*
2 * libeio implementation 2 * libeio implementation
3 * 3 *
4 * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libeio@schmorp.de> 4 * Copyright (c) 2007,2008,2009,2010 Marc Alexander Lehmann <libeio@schmorp.de>
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without modifica- 7 * Redistribution and use in source and binary forms, with or without modifica-
8 * tion, are permitted provided that the following conditions are met: 8 * tion, are permitted provided that the following conditions are met:
9 * 9 *
49#include <stdlib.h> 49#include <stdlib.h>
50#include <string.h> 50#include <string.h>
51#include <errno.h> 51#include <errno.h>
52#include <sys/types.h> 52#include <sys/types.h>
53#include <sys/stat.h> 53#include <sys/stat.h>
54#include <sys/statvfs.h>
54#include <limits.h> 55#include <limits.h>
55#include <fcntl.h> 56#include <fcntl.h>
56#include <assert.h> 57#include <assert.h>
57 58
58#ifndef EIO_FINISH 59#ifndef EIO_FINISH
73#else 74#else
74 75
75# include "config.h" 76# include "config.h"
76# include <sys/time.h> 77# include <sys/time.h>
77# include <sys/select.h> 78# include <sys/select.h>
78# include <sys/mman.h>
79# include <unistd.h> 79# include <unistd.h>
80# include <utime.h> 80# include <utime.h>
81# include <signal.h> 81# include <signal.h>
82# include <dirent.h> 82# include <dirent.h>
83 83
84#if _POSIX_MEMLOCK || _POSIX_MAPPED_FILES
85# include <sys/mman.h>
86#endif
87
84/* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ 88/* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */
85# if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) 89# if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__
86# define _DIRENT_HAVE_D_TYPE /* sigh */ 90# define _DIRENT_HAVE_D_TYPE /* sigh */
87# define D_INO(de) (de)->d_fileno 91# define D_INO(de) (de)->d_fileno
88# define D_NAMLEN(de) (de)->d_namlen 92# define D_NAMLEN(de) (de)->d_namlen
89# elif defined(__linux) || defined(d_ino) || _XOPEN_SOURCE >= 600 93# elif __linux || defined d_ino || _XOPEN_SOURCE >= 600
90# define D_INO(de) (de)->d_ino 94# define D_INO(de) (de)->d_ino
91# endif 95# endif
92 96
93#ifdef _D_EXACT_NAMLEN 97#ifdef _D_EXACT_NAMLEN
94# undef D_NAMLEN 98# undef D_NAMLEN
106#endif 110#endif
107 111
108#if HAVE_SENDFILE 112#if HAVE_SENDFILE
109# if __linux 113# if __linux
110# include <sys/sendfile.h> 114# include <sys/sendfile.h>
111# elif __freebsd 115# elif __FreeBSD__ || defined __APPLE__
112# include <sys/socket.h> 116# include <sys/socket.h>
113# include <sys/uio.h> 117# include <sys/uio.h>
114# elif __hpux 118# elif __hpux
115# include <sys/socket.h> 119# include <sys/socket.h>
116# elif __solaris /* not yet */ 120# elif __solaris
117# include <sys/sendfile.h> 121# include <sys/sendfile.h>
118# else 122# else
119# error sendfile support requested but not available 123# error sendfile support requested but not available
120# endif 124# endif
121#endif 125#endif
134#define IDLE_TIMEOUT 10 138#define IDLE_TIMEOUT 10
135 139
136/* used for struct dirent, AIX doesn't provide it */ 140/* used for struct dirent, AIX doesn't provide it */
137#ifndef NAME_MAX 141#ifndef NAME_MAX
138# define NAME_MAX 4096 142# define NAME_MAX 4096
143#endif
144
145/* used for readlink etc. */
146#ifndef PATH_MAX
147# define PATH_MAX 4096
139#endif 148#endif
140 149
141/* buffer size for various temporary buffers */ 150/* buffer size for various temporary buffers */
142#define EIO_BUFSIZE 65536 151#define EIO_BUFSIZE 65536
143 152
216static volatile unsigned int nreqs; /* reqlock */ 225static volatile unsigned int nreqs; /* reqlock */
217static volatile unsigned int nready; /* reqlock */ 226static volatile unsigned int nready; /* reqlock */
218static volatile unsigned int npending; /* reqlock */ 227static volatile unsigned int npending; /* reqlock */
219static volatile unsigned int max_idle = 4; 228static volatile unsigned int max_idle = 4;
220 229
221static mutex_t wrklock = X_MUTEX_INIT; 230static xmutex_t wrklock = X_MUTEX_INIT;
222static mutex_t reslock = X_MUTEX_INIT; 231static xmutex_t reslock = X_MUTEX_INIT;
223static mutex_t reqlock = X_MUTEX_INIT; 232static xmutex_t reqlock = X_MUTEX_INIT;
224static cond_t reqwait = X_COND_INIT; 233static xcond_t reqwait = X_COND_INIT;
225 234
226#if !HAVE_PREADWRITE 235#if !HAVE_PREADWRITE
227/* 236/*
228 * make our pread/pwrite emulation safe against themselves, but not against 237 * make our pread/pwrite emulation safe against themselves, but not against
229 * normal read/write by using a mutex. slows down execution a lot, 238 * normal read/write by using a mutex. slows down execution a lot,
230 * but that's your problem, not mine. 239 * but that's your problem, not mine.
231 */ 240 */
232static mutex_t preadwritelock = X_MUTEX_INIT; 241static xmutex_t preadwritelock = X_MUTEX_INIT;
233#endif 242#endif
234 243
235typedef struct etp_worker 244typedef struct etp_worker
236{ 245{
237 /* locked by wrklock */ 246 /* locked by wrklock */
238 struct etp_worker *prev, *next; 247 struct etp_worker *prev, *next;
239 248
240 thread_t tid; 249 xthread_t tid;
241 250
242 /* locked by reslock, reqlock or wrklock */ 251 /* locked by reslock, reqlock or wrklock */
243 ETP_REQ *req; /* currently processed request */ 252 ETP_REQ *req; /* currently processed request */
244 253
245 ETP_WORKER_COMMON 254 ETP_WORKER_COMMON
909 918
910#if HAVE_SENDFILE 919#if HAVE_SENDFILE
911# if __linux 920# if __linux
912 res = sendfile (ofd, ifd, &offset, count); 921 res = sendfile (ofd, ifd, &offset, count);
913 922
914# elif __freebsd 923# elif __FreeBSD__
915 /* 924 /*
916 * Of course, the freebsd sendfile is a dire hack with no thoughts 925 * Of course, the freebsd sendfile is a dire hack with no thoughts
917 * wasted on making it similar to other I/O functions. 926 * wasted on making it similar to other I/O functions.
918 */ 927 */
919 { 928 {
920 off_t sbytes; 929 off_t sbytes;
921 res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); 930 res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0);
922 931
923 if (res < 0 && sbytes) 932 #if 0 /* according to the manpage, this is correct, but broken behaviour */
924 /* maybe only on EAGAIN: as usual, the manpage leaves you guessing */ 933 /* freebsd' sendfile will return 0 on success */
934 /* freebsd 8 documents it as only setting *sbytes on EINTR and EAGAIN, but */
935 /* not on e.g. EIO or EPIPE - sounds broken */
936 if ((res < 0 && (errno == EAGAIN || errno == EINTR) && sbytes) || res == 0)
937 res = sbytes;
938 #endif
939
940 /* according to source inspection, this is correct, and useful behaviour */
941 if (sbytes)
942 res = sbytes;
943 }
944
945# elif defined (__APPLE__)
946
947 {
948 off_t sbytes = count;
949 res = sendfile (ifd, ofd, offset, &sbytes, 0, 0);
950
951 /* according to the manpage, sbytes is always valid */
952 if (sbytes)
925 res = sbytes; 953 res = sbytes;
926 } 954 }
927 955
928# elif __hpux 956# elif __hpux
929 res = sendfile (ofd, ifd, offset, count, 0, 0); 957 res = sendfile (ofd, ifd, offset, count, 0, 0);
943 if (res < 0 && sbytes) 971 if (res < 0 && sbytes)
944 res = sbytes; 972 res = sbytes;
945 } 973 }
946 974
947# endif 975# endif
976
977#elif defined (_WIN32)
978
979 /* does not work, just for documentation of what would need to be done */
980 {
981 HANDLE h = TO_SOCKET (ifd);
982 SetFilePointer (h, offset, 0, FILE_BEGIN);
983 res = TransmitFile (TO_SOCKET (ofd), h, count, 0, 0, 0, 0);
984 }
985
948#else 986#else
949 res = -1; 987 res = -1;
950 errno = ENOSYS; 988 errno = ENOSYS;
951#endif 989#endif
952 990
953 if (res < 0 991 if (res < 0
954 && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK 992 && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK
993 /* BSDs */
994#ifdef ENOTSUP /* sigh, if the steenking pile called openbsd would only try to at least compile posix code... */
995 || errno == ENOTSUP
996#endif
997 || errno == EOPNOTSUPP /* BSDs */
955#if __solaris 998#if __solaris
956 || errno == EAFNOSUPPORT || errno == EPROTOTYPE 999 || errno == EAFNOSUPPORT || errno == EPROTOTYPE
957#endif 1000#endif
958 ) 1001 )
959 ) 1002 )
1352 break; 1395 break;
1353 } 1396 }
1354 } 1397 }
1355} 1398}
1356 1399
1400#ifdef PAGESIZE
1401# define eio_pagesize() PAGESIZE
1402#else
1403static intptr_t
1404eio_pagesize (void)
1405{
1406 static intptr_t page;
1407
1408 if (!page)
1409 page = sysconf (_SC_PAGESIZE);
1410
1411 return page;
1412}
1413#endif
1414
1415static void
1416eio_page_align (void **addr, size_t *length)
1417{
1418 intptr_t mask = eio_pagesize () - 1;
1419
1420 /* round down addr */
1421 intptr_t adj = mask & (intptr_t)*addr;
1422
1423 *addr = (void *)((intptr_t)*addr - adj);
1424 *length += adj;
1425
1426 /* round up length */
1427 *length = (*length + mask) & ~mask;
1428}
1429
1430#if !_POSIX_MEMLOCK
1431# define eio__mlock(a,b) ((errno = ENOSYS), -1)
1432# define eio__mlockall(a) ((errno = ENOSYS), -1)
1433#else
1434
1435static int
1436eio__mlock (void *addr, size_t length)
1437{
1438 eio_page_align (&addr, &length);
1439
1440 mlock (addr, length);
1441}
1442
1443static int
1444eio__mlockall (int flags)
1445{
1446 #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7
1447 extern int mallopt (int, int);
1448 mallopt (-6, 238); /* http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=473812 */
1449 #endif
1450
1451 if (EIO_MCL_CURRENT != MCL_CURRENT
1452 || EIO_MCL_FUTURE != MCL_FUTURE)
1453 {
1454 flags = 0
1455 | (flags & EIO_MCL_CURRENT ? MCL_CURRENT : 0)
1456 | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0);
1457 }
1458
1459 mlockall (flags);
1460}
1461#endif
1462
1357#if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) 1463#if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO)
1358# undef msync
1359# define msync(a,b,c) ((errno = ENOSYS), -1) 1464# define eio__msync(a,b,c) ((errno = ENOSYS), -1)
1465#else
1466
1467int
1468eio__msync (void *mem, size_t len, int flags)
1469{
1470 eio_page_align (&mem, &len);
1471
1472 if (EIO_MS_ASYNC != MS_SYNC
1473 || EIO_MS_INVALIDATE != MS_INVALIDATE
1474 || EIO_MS_SYNC != MS_SYNC)
1475 {
1476 flags = 0
1477 | (flags & EIO_MS_ASYNC ? MS_ASYNC : 0)
1478 | (flags & EIO_MS_INVALIDATE ? MS_INVALIDATE : 0)
1479 | (flags & EIO_MS_SYNC ? MS_SYNC : 0);
1480 }
1481
1482 return msync (mem, len, flags);
1483}
1484
1360#endif 1485#endif
1361 1486
1362int 1487int
1363eio__mtouch (void *mem, size_t len, int flags) 1488eio__mtouch (void *mem, size_t len, int flags)
1364{ 1489{
1490 eio_page_align (&mem, &len);
1491
1492 {
1365 intptr_t addr = (intptr_t)mem; 1493 intptr_t addr = (intptr_t)mem;
1366 intptr_t end = addr + len; 1494 intptr_t end = addr + len;
1367#ifdef PAGESIZE 1495 intptr_t page = eio_pagesize ();
1368 const intptr_t page = PAGESIZE;
1369#else
1370 static intptr_t page;
1371 1496
1372 if (!page)
1373 page = sysconf (_SC_PAGESIZE);
1374#endif
1375
1376 addr &= ~(page - 1); /* assume page size is always a power of two */
1377
1378 if (addr < end) 1497 if (addr < end)
1379 if (flags) /* modify */ 1498 if (flags & EIO_MT_MODIFY) /* modify */
1380 do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); 1499 do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len);
1381 else 1500 else
1382 do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); 1501 do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len);
1502 }
1383 1503
1384 return 0; 1504 return 0;
1385} 1505}
1386 1506
1387/*****************************************************************************/ 1507/*****************************************************************************/
1532 case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 1652 case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT));
1533 req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break; 1653 req->result = lstat (req->ptr1, (EIO_STRUCT_STAT *)req->ptr2); break;
1534 case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); 1654 case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT));
1535 req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; 1655 req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break;
1536 1656
1657 case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS));
1658 req->result = statvfs (req->ptr1, (EIO_STRUCT_STATVFS *)req->ptr2); break;
1659 case EIO_FSTATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS));
1660 req->result = fstatvfs (req->int1, (EIO_STRUCT_STATVFS *)req->ptr2); break;
1661
1537 case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; 1662 case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break;
1538 case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; 1663 case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break;
1539 case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; 1664 case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break;
1540 case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; 1665 case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break;
1541 case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break; 1666 case EIO_TRUNCATE: req->result = truncate (req->ptr1, req->offs); break;
1550 case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; 1675 case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break;
1551 case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; 1676 case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break;
1552 case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; 1677 case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break;
1553 case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; 1678 case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break;
1554 1679
1555 case EIO_READLINK: ALLOC (NAME_MAX); 1680 case EIO_READLINK: ALLOC (PATH_MAX);
1556 req->result = readlink (req->ptr1, req->ptr2, NAME_MAX); break; 1681 req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break;
1557 1682
1558 case EIO_SYNC: req->result = 0; sync (); break; 1683 case EIO_SYNC: req->result = 0; sync (); break;
1559 case EIO_FSYNC: req->result = fsync (req->int1); break; 1684 case EIO_FSYNC: req->result = fsync (req->int1); break;
1560 case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; 1685 case EIO_FDATASYNC: req->result = fdatasync (req->int1); break;
1561 case EIO_MSYNC: req->result = msync (req->ptr2, req->size, req->int1); break; 1686 case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break;
1562 case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; 1687 case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break;
1688 case EIO_MLOCK: req->result = eio__mlock (req->ptr2, req->size); break;
1689 case EIO_MLOCKALL: req->result = eio__mlockall (req->int1); break;
1563 case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; 1690 case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break;
1564 1691
1565 case EIO_READDIR: eio__scandir (req, self); break; 1692 case EIO_READDIR: eio__scandir (req, self); break;
1566 1693
1567 case EIO_BUSY: 1694 case EIO_BUSY:
1653eio_req *eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) 1780eio_req *eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data)
1654{ 1781{
1655 REQ (EIO_MTOUCH); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND; 1782 REQ (EIO_MTOUCH); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND;
1656} 1783}
1657 1784
1785eio_req *eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data)
1786{
1787 REQ (EIO_MLOCK); req->ptr2 = addr; req->size = length; SEND;
1788}
1789
1790eio_req *eio_mlockall (int flags, int pri, eio_cb cb, void *data)
1791{
1792 REQ (EIO_MLOCKALL); req->int1 = flags; SEND;
1793}
1794
1658eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data) 1795eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data)
1659{ 1796{
1660 REQ (EIO_SYNC_FILE_RANGE); req->int1 = fd; req->offs = offset; req->size = nbytes; req->int2 = flags; SEND; 1797 REQ (EIO_SYNC_FILE_RANGE); req->int1 = fd; req->offs = offset; req->size = nbytes; req->int2 = flags; SEND;
1661} 1798}
1662 1799
1686} 1823}
1687 1824
1688eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) 1825eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data)
1689{ 1826{
1690 REQ (EIO_FSTAT); req->int1 = fd; SEND; 1827 REQ (EIO_FSTAT); req->int1 = fd; SEND;
1828}
1829
1830eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data)
1831{
1832 REQ (EIO_FSTATVFS); req->int1 = fd; SEND;
1691} 1833}
1692 1834
1693eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) 1835eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data)
1694{ 1836{
1695 REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; 1837 REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND;
1767} 1909}
1768 1910
1769eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) 1911eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data)
1770{ 1912{
1771 return eio__1path (EIO_LSTAT, path, pri, cb, data); 1913 return eio__1path (EIO_LSTAT, path, pri, cb, data);
1914}
1915
1916eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data)
1917{
1918 return eio__1path (EIO_STATVFS, path, pri, cb, data);
1772} 1919}
1773 1920
1774eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) 1921eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data)
1775{ 1922{
1776 return eio__1path (EIO_UNLINK, path, pri, cb, data); 1923 return eio__1path (EIO_UNLINK, path, pri, cb, data);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines