--- libeio/eio.c 2010/01/02 14:24:32 1.49 +++ libeio/eio.c 2010/09/12 03:36:28 1.56 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -75,14 +76,17 @@ # include "config.h" # include # include -# include # include # include # include # include +#if _POSIX_MEMLOCK || _POSIX_MAPPED_FILES +# include +#endif + /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ -# if __freebsd || defined __NetBSD__ || defined __OpenBSD__ +# if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ # define _DIRENT_HAVE_D_TYPE /* sigh */ # define D_INO(de) (de)->d_fileno # define D_NAMLEN(de) (de)->d_namlen @@ -108,7 +112,7 @@ #if HAVE_SENDFILE # if __linux # include -# elif __freebsd || defined __APPLE__ +# elif __FreeBSD__ || defined __APPLE__ # include # include # elif __hpux @@ -138,6 +142,11 @@ # define NAME_MAX 4096 #endif +/* used for readlink etc. */ +#ifndef PATH_MAX +# define PATH_MAX 4096 +#endif + /* buffer size for various temporary buffers */ #define EIO_BUFSIZE 65536 @@ -218,10 +227,10 @@ static volatile unsigned int npending; /* reqlock */ static volatile unsigned int max_idle = 4; -static mutex_t wrklock = X_MUTEX_INIT; -static mutex_t reslock = X_MUTEX_INIT; -static mutex_t reqlock = X_MUTEX_INIT; -static cond_t reqwait = X_COND_INIT; +static xmutex_t wrklock = X_MUTEX_INIT; +static xmutex_t reslock = X_MUTEX_INIT; +static xmutex_t reqlock = X_MUTEX_INIT; +static xcond_t reqwait = X_COND_INIT; #if !HAVE_PREADWRITE /* @@ -229,7 +238,7 @@ * normal read/write by using a mutex. slows down execution a lot, * but that's your problem, not mine. */ -static mutex_t preadwritelock = X_MUTEX_INIT; +static xmutex_t preadwritelock = X_MUTEX_INIT; #endif typedef struct etp_worker @@ -237,7 +246,7 @@ /* locked by wrklock */ struct etp_worker *prev, *next; - thread_t tid; + xthread_t tid; /* locked by reslock, reqlock or wrklock */ ETP_REQ *req; /* currently processed request */ @@ -911,7 +920,7 @@ # if __linux res = sendfile (ofd, ifd, &offset, count); -# elif __freebsd +# elif __FreeBSD__ /* * Of course, the freebsd sendfile is a dire hack with no thoughts * wasted on making it similar to other I/O functions. @@ -920,8 +929,16 @@ off_t sbytes; res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); - if (res < 0 && sbytes) - /* maybe only on EAGAIN: as usual, the manpage leaves you guessing */ + #if 0 /* according to the manpage, this is correct, but broken behaviour */ + /* freebsd' sendfile will return 0 on success */ + /* freebsd 8 documents it as only setting *sbytes on EINTR and EAGAIN, but */ + /* not on e.g. EIO or EPIPE - sounds broken */ + if ((res < 0 && (errno == EAGAIN || errno == EINTR) && sbytes) || res == 0) + res = sbytes; + #endif + + /* according to source inspection, this is correct, and useful behaviour */ + if (sbytes) res = sbytes; } @@ -931,7 +948,8 @@ off_t sbytes = count; res = sendfile (ifd, ofd, offset, &sbytes, 0, 0); - if (res < 0 && errno == EAGAIN && sbytes) + /* according to the manpage, sbytes is always valid */ + if (sbytes) res = sbytes; } @@ -1379,6 +1397,69 @@ } } +#ifdef PAGESIZE +# define eio_pagesize() PAGESIZE +#else +static intptr_t +eio_pagesize (void) +{ + static intptr_t page; + + if (!page) + page = sysconf (_SC_PAGESIZE); + + return page; +} +#endif + +static void +eio_page_align (void **addr, size_t *length) +{ + intptr_t mask = eio_pagesize () - 1; + + /* round down addr */ + intptr_t adj = mask & (intptr_t)*addr; + + *addr = (void *)((intptr_t)*addr - adj); + *length += adj; + + /* round up length */ + *length = (*length + mask) & ~mask; +} + +#if !_POSIX_MEMLOCK +# define eio__mlock(a,b) ((errno = ENOSYS), -1) +# define eio__mlockall(a) ((errno = ENOSYS), -1) +#else + +static int +eio__mlock (void *addr, size_t length) +{ + eio_page_align (&addr, &length); + + mlock (addr, length); +} + +static int +eio__mlockall (int flags) +{ + #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7 + extern int mallopt (int, int); + mallopt (-6, 238); /* http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=473812 */ + #endif + + if (EIO_MCL_CURRENT != MCL_CURRENT + || EIO_MCL_FUTURE != MCL_FUTURE) + { + flags = 0 + | (flags & EIO_MCL_CURRENT ? MCL_CURRENT : 0) + | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0); + } + + mlockall (flags); +} +#endif + #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) # define eio__msync(a,b,c) ((errno = ENOSYS), -1) #else @@ -1386,6 +1467,8 @@ int eio__msync (void *mem, size_t len, int flags) { + eio_page_align (&mem, &len); + if (EIO_MS_ASYNC != MS_SYNC || EIO_MS_INVALIDATE != MS_INVALIDATE || EIO_MS_SYNC != MS_SYNC) @@ -1404,25 +1487,19 @@ int eio__mtouch (void *mem, size_t len, int flags) { - intptr_t addr = (intptr_t)mem; - intptr_t end = addr + len; -#ifdef PAGESIZE - const intptr_t page = PAGESIZE; -#else - static intptr_t page; - - if (!page) - page = sysconf (_SC_PAGESIZE); -#endif + eio_page_align (&mem, &len); - /* round down to start of page, although this is probably useless */ - addr &= ~(page - 1); /* assume page size is always a power of two */ - - if (addr < end) - if (flags & EIO_MT_MODIFY) /* modify */ - do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); - else - do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); + { + intptr_t addr = (intptr_t)mem; + intptr_t end = addr + len; + intptr_t page = eio_pagesize (); + + if (addr < end) + if (flags & EIO_MT_MODIFY) /* modify */ + do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); + else + do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); + } return 0; } @@ -1577,6 +1654,11 @@ case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; + case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); + req->result = statvfs (req->ptr1, (EIO_STRUCT_STATVFS *)req->ptr2); break; + case EIO_FSTATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); + req->result = fstatvfs (req->int1, (EIO_STRUCT_STATVFS *)req->ptr2); break; + case EIO_CHOWN: req->result = chown (req->ptr1, req->int2, req->int3); break; case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; case EIO_CHMOD: req->result = chmod (req->ptr1, (mode_t)req->int2); break; @@ -1595,14 +1677,16 @@ case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; - case EIO_READLINK: ALLOC (NAME_MAX); - req->result = readlink (req->ptr1, req->ptr2, NAME_MAX); break; + case EIO_READLINK: ALLOC (PATH_MAX); + req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break; case EIO_SYNC: req->result = 0; sync (); break; case EIO_FSYNC: req->result = fsync (req->int1); break; case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break; case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; + case EIO_MLOCK: req->result = eio__mlock (req->ptr2, req->size); break; + case EIO_MLOCKALL: req->result = eio__mlockall (req->int1); break; case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; case EIO_READDIR: eio__scandir (req, self); break; @@ -1698,6 +1782,16 @@ REQ (EIO_MTOUCH); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND; } +eio_req *eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data) +{ + REQ (EIO_MLOCK); req->ptr2 = addr; req->size = length; SEND; +} + +eio_req *eio_mlockall (int flags, int pri, eio_cb cb, void *data) +{ + REQ (EIO_MLOCKALL); req->int1 = flags; SEND; +} + eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data) { REQ (EIO_SYNC_FILE_RANGE); req->int1 = fd; req->offs = offset; req->size = nbytes; req->int2 = flags; SEND; @@ -1733,6 +1827,11 @@ REQ (EIO_FSTAT); req->int1 = fd; SEND; } +eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) +{ + REQ (EIO_FSTATVFS); req->int1 = fd; SEND; +} + eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) { REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; @@ -1814,6 +1913,11 @@ return eio__1path (EIO_LSTAT, path, pri, cb, data); } +eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data) +{ + return eio__1path (EIO_STATVFS, path, pri, cb, data); +} + eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_UNLINK, path, pri, cb, data);