ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libeio/eio.c
(Generate patch)

Comparing libeio/eio.c (file contents):
Revision 1.56 by root, Sun Sep 12 03:36:28 2010 UTC vs.
Revision 1.64 by root, Thu May 26 04:05:18 2011 UTC

1/* 1/*
2 * libeio implementation 2 * libeio implementation
3 * 3 *
4 * Copyright (c) 2007,2008,2009,2010 Marc Alexander Lehmann <libeio@schmorp.de> 4 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libeio@schmorp.de>
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without modifica- 7 * Redistribution and use in source and binary forms, with or without modifica-
8 * tion, are permitted provided that the following conditions are met: 8 * tion, are permitted provided that the following conditions are met:
9 * 9 *
79# include <unistd.h> 79# include <unistd.h>
80# include <utime.h> 80# include <utime.h>
81# include <signal.h> 81# include <signal.h>
82# include <dirent.h> 82# include <dirent.h>
83 83
84#if _POSIX_MEMLOCK || _POSIX_MAPPED_FILES 84#if _POSIX_MEMLOCK || _POSIX_MEMLOCK_RANGE || _POSIX_MAPPED_FILES
85# include <sys/mman.h> 85# include <sys/mman.h>
86#endif 86#endif
87 87
88/* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ 88/* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */
89# if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ 89# if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__
132#endif 132#endif
133#ifndef D_NAMLEN 133#ifndef D_NAMLEN
134# define D_NAMLEN(de) strlen ((de)->d_name) 134# define D_NAMLEN(de) strlen ((de)->d_name)
135#endif 135#endif
136 136
137/* number of seconds after which an idle threads exit */
138#define IDLE_TIMEOUT 10
139
140/* used for struct dirent, AIX doesn't provide it */ 137/* used for struct dirent, AIX doesn't provide it */
141#ifndef NAME_MAX 138#ifndef NAME_MAX
142# define NAME_MAX 4096 139# define NAME_MAX 4096
143#endif 140#endif
144 141
223static unsigned int max_poll_reqs; /* reslock */ 220static unsigned int max_poll_reqs; /* reslock */
224 221
225static volatile unsigned int nreqs; /* reqlock */ 222static volatile unsigned int nreqs; /* reqlock */
226static volatile unsigned int nready; /* reqlock */ 223static volatile unsigned int nready; /* reqlock */
227static volatile unsigned int npending; /* reqlock */ 224static volatile unsigned int npending; /* reqlock */
228static volatile unsigned int max_idle = 4; 225static volatile unsigned int max_idle = 4; /* maximum number of threads that can idle indefinitely */
226static volatile unsigned int idle_timeout = 10; /* number of seconds after which an idle threads exit */
229 227
230static xmutex_t wrklock = X_MUTEX_INIT; 228static xmutex_t wrklock;
231static xmutex_t reslock = X_MUTEX_INIT; 229static xmutex_t reslock;
232static xmutex_t reqlock = X_MUTEX_INIT; 230static xmutex_t reqlock;
233static xcond_t reqwait = X_COND_INIT; 231static xcond_t reqwait;
234 232
235#if !HAVE_PREADWRITE 233#if !HAVE_PREADWRITE
236/* 234/*
237 * make our pread/pwrite emulation safe against themselves, but not against 235 * make our pread/pwrite emulation safe against themselves, but not against
238 * normal read/write by using a mutex. slows down execution a lot, 236 * normal read/write by using a mutex. slows down execution a lot,
368 } 366 }
369 367
370 abort (); 368 abort ();
371} 369}
372 370
371static void etp_thread_init (void)
372{
373 X_MUTEX_CREATE (wrklock);
374 X_MUTEX_CREATE (reslock);
375 X_MUTEX_CREATE (reqlock);
376 X_COND_CREATE (reqwait);
377}
378
373static void etp_atfork_prepare (void) 379static void etp_atfork_prepare (void)
374{ 380{
375 X_LOCK (wrklock); 381 X_LOCK (wrklock);
376 X_LOCK (reqlock); 382 X_LOCK (reqlock);
377 X_LOCK (reslock); 383 X_LOCK (reslock);
415 idle = 0; 421 idle = 0;
416 nreqs = 0; 422 nreqs = 0;
417 nready = 0; 423 nready = 0;
418 npending = 0; 424 npending = 0;
419 425
420 etp_atfork_parent (); 426 etp_thread_init ();
421} 427}
422 428
423static void 429static void
424etp_once_init (void) 430etp_once_init (void)
425{ 431{
432 etp_thread_init ();
426 X_THREAD_ATFORK (etp_atfork_prepare, etp_atfork_parent, etp_atfork_child); 433 X_THREAD_ATFORK (etp_atfork_prepare, etp_atfork_parent, etp_atfork_child);
427} 434}
428 435
429static int 436static int
430etp_init (void (*want_poll)(void), void (*done_poll)(void)) 437etp_init (void (*want_poll)(void), void (*done_poll)(void))
621} 628}
622 629
623static void etp_set_max_idle (unsigned int nthreads) 630static void etp_set_max_idle (unsigned int nthreads)
624{ 631{
625 if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 632 if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
626 max_idle = nthreads <= 0 ? 1 : nthreads; 633 max_idle = nthreads;
634 if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
635}
636
637static void etp_set_idle_timeout (unsigned int seconds)
638{
639 if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
640 idle_timeout = seconds;
627 if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 641 if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
628} 642}
629 643
630static void etp_set_min_parallel (unsigned int nthreads) 644static void etp_set_min_parallel (unsigned int nthreads)
631{ 645{
759void eio_set_max_idle (unsigned int nthreads) 773void eio_set_max_idle (unsigned int nthreads)
760{ 774{
761 etp_set_max_idle (nthreads); 775 etp_set_max_idle (nthreads);
762} 776}
763 777
778void eio_set_idle_timeout (unsigned int seconds)
779{
780 etp_set_idle_timeout (seconds);
781}
782
764void eio_set_min_parallel (unsigned int nthreads) 783void eio_set_min_parallel (unsigned int nthreads)
765{ 784{
766 etp_set_min_parallel (nthreads); 785 etp_set_min_parallel (nthreads);
767} 786}
768 787
816 835
817 return res; 836 return res;
818} 837}
819#endif 838#endif
820 839
821#ifndef HAVE_FUTIMES 840#ifndef HAVE_UTIMES
822 841
823# undef utimes 842# undef utimes
824# undef futimes
825# define utimes(path,times) eio__utimes (path, times) 843# define utimes(path,times) eio__utimes (path, times)
826# define futimes(fd,times) eio__futimes (fd, times)
827 844
828static int 845static int
829eio__utimes (const char *filename, const struct timeval times[2]) 846eio__utimes (const char *filename, const struct timeval times[2])
830{ 847{
831 if (times) 848 if (times)
838 return utime (filename, &buf); 855 return utime (filename, &buf);
839 } 856 }
840 else 857 else
841 return utime (filename, 0); 858 return utime (filename, 0);
842} 859}
860
861#endif
862
863#ifndef HAVE_FUTIMES
864
865# undef futimes
866# define futimes(fd,times) eio__futimes (fd, times)
843 867
844static int eio__futimes (int fd, const struct timeval tv[2]) 868static int eio__futimes (int fd, const struct timeval tv[2])
845{ 869{
846 errno = ENOSYS; 870 errno = ENOSYS;
847 return -1; 871 return -1;
1036} 1060}
1037 1061
1038static signed char 1062static signed char
1039eio_dent_cmp (const eio_dirent *a, const eio_dirent *b) 1063eio_dent_cmp (const eio_dirent *a, const eio_dirent *b)
1040{ 1064{
1041 return a->score - b->score ? a->score - b->score /* works because our signed char is always 0..100 */ 1065 return a->score - b->score ? a->score - b->score /* works because our signed char is always 0..100 */
1042 : a->inode < b->inode ? -1 : a->inode > b->inode ? 1 : 0; 1066 : a->inode < b->inode ? -1
1067 : a->inode > b->inode ? 1
1068 : 0;
1043} 1069}
1044 1070
1045#define EIO_DENT_CMP(i,op,j) eio_dent_cmp (&i, &j) op 0 1071#define EIO_DENT_CMP(i,op,j) eio_dent_cmp (&i, &j) op 0
1046 1072
1047#define EIO_SORT_CUTOFF 30 /* quite high, but performs well on many filesystems */ 1073#define EIO_SORT_CUTOFF 30 /* quite high, but performs well on many filesystems */
1053 unsigned char bits [9 + sizeof (ino_t) * 8]; 1079 unsigned char bits [9 + sizeof (ino_t) * 8];
1054 unsigned char *bit = bits; 1080 unsigned char *bit = bits;
1055 1081
1056 assert (CHAR_BIT == 8); 1082 assert (CHAR_BIT == 8);
1057 assert (sizeof (eio_dirent) * 8 < 256); 1083 assert (sizeof (eio_dirent) * 8 < 256);
1058 assert (offsetof (eio_dirent, inode)); /* we use 0 as sentinel */ 1084 assert (offsetof (eio_dirent, inode)); /* we use bit #0 as sentinel */
1059 assert (offsetof (eio_dirent, score)); /* we use 0 as sentinel */ 1085 assert (offsetof (eio_dirent, score)); /* we use bit #0 as sentinel */
1060 1086
1061 if (size <= EIO_SORT_FAST) 1087 if (size <= EIO_SORT_FAST)
1062 return; 1088 return;
1063 1089
1064 /* first prepare an array of bits to test in our radix sort */ 1090 /* first prepare an array of bits to test in our radix sort */
1219 flags &= ~(EIO_READDIR_DIRS_FIRST | EIO_READDIR_STAT_ORDER); 1245 flags &= ~(EIO_READDIR_DIRS_FIRST | EIO_READDIR_STAT_ORDER);
1220 1246
1221 X_LOCK (wrklock); 1247 X_LOCK (wrklock);
1222 /* the corresponding closedir is in ETP_WORKER_CLEAR */ 1248 /* the corresponding closedir is in ETP_WORKER_CLEAR */
1223 self->dirp = dirp = opendir (req->ptr1); 1249 self->dirp = dirp = opendir (req->ptr1);
1250
1224 req->flags |= EIO_FLAG_PTR1_FREE | EIO_FLAG_PTR2_FREE; 1251 req->flags |= EIO_FLAG_PTR1_FREE | EIO_FLAG_PTR2_FREE;
1225 req->ptr1 = dents = flags ? malloc (dentalloc * sizeof (eio_dirent)) : 0; 1252 req->ptr1 = dents = flags ? malloc (dentalloc * sizeof (eio_dirent)) : 0;
1226 req->ptr2 = names = malloc (namesalloc); 1253 req->ptr2 = names = malloc (namesalloc);
1227 X_UNLOCK (wrklock); 1254 X_UNLOCK (wrklock);
1228 1255
1240 /* sort etc. */ 1267 /* sort etc. */
1241 req->int1 = flags; 1268 req->int1 = flags;
1242 req->result = dentoffs; 1269 req->result = dentoffs;
1243 1270
1244 if (flags & EIO_READDIR_STAT_ORDER) 1271 if (flags & EIO_READDIR_STAT_ORDER)
1245 eio_dent_sort (dents, dentoffs, 0, inode_bits); /* sort by inode exclusively */ 1272 eio_dent_sort (dents, dentoffs, flags & EIO_READDIR_DIRS_FIRST ? 7 : 0, inode_bits);
1246 else if (flags & EIO_READDIR_DIRS_FIRST) 1273 else if (flags & EIO_READDIR_DIRS_FIRST)
1247 if (flags & EIO_READDIR_FOUND_UNKNOWN) 1274 if (flags & EIO_READDIR_FOUND_UNKNOWN)
1248 eio_dent_sort (dents, dentoffs, 7, inode_bits); /* sort by score and inode */ 1275 eio_dent_sort (dents, dentoffs, 7, inode_bits); /* sort by score and inode */
1249 else 1276 else
1250 { 1277 {
1252 eio_dirent *oth = dents + dentoffs; 1279 eio_dirent *oth = dents + dentoffs;
1253 eio_dirent *dir = dents; 1280 eio_dirent *dir = dents;
1254 1281
1255 /* now partition dirs to the front, and non-dirs to the back */ 1282 /* now partition dirs to the front, and non-dirs to the back */
1256 /* by walking from both sides and swapping if necessary */ 1283 /* by walking from both sides and swapping if necessary */
1257 /* also clear score, so it doesn't influence sorting */
1258 while (oth > dir) 1284 while (oth > dir)
1259 { 1285 {
1260 if (dir->type == EIO_DT_DIR) 1286 if (dir->type == EIO_DT_DIR)
1261 ++dir; 1287 ++dir;
1262 else if ((--oth)->type == EIO_DT_DIR) 1288 else if ((--oth)->type == EIO_DT_DIR)
1265 1291
1266 ++dir; 1292 ++dir;
1267 } 1293 }
1268 } 1294 }
1269 1295
1270 /* now sort the dirs only */ 1296 /* now sort the dirs only (dirs all have the same score) */
1271 eio_dent_sort (dents, dir - dents, 0, inode_bits); 1297 eio_dent_sort (dents, dir - dents, 0, inode_bits);
1272 } 1298 }
1273 1299
1274 break; 1300 break;
1275 } 1301 }
1426 /* round up length */ 1452 /* round up length */
1427 *length = (*length + mask) & ~mask; 1453 *length = (*length + mask) & ~mask;
1428} 1454}
1429 1455
1430#if !_POSIX_MEMLOCK 1456#if !_POSIX_MEMLOCK
1431# define eio__mlock(a,b) ((errno = ENOSYS), -1)
1432# define eio__mlockall(a) ((errno = ENOSYS), -1) 1457# define eio__mlockall(a) ((errno = ENOSYS), -1)
1433#else 1458#else
1434
1435static int
1436eio__mlock (void *addr, size_t length)
1437{
1438 eio_page_align (&addr, &length);
1439
1440 mlock (addr, length);
1441}
1442 1459
1443static int 1460static int
1444eio__mlockall (int flags) 1461eio__mlockall (int flags)
1445{ 1462{
1446 #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7 1463 #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7
1454 flags = 0 1471 flags = 0
1455 | (flags & EIO_MCL_CURRENT ? MCL_CURRENT : 0) 1472 | (flags & EIO_MCL_CURRENT ? MCL_CURRENT : 0)
1456 | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0); 1473 | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0);
1457 } 1474 }
1458 1475
1459 mlockall (flags); 1476 return mlockall (flags);
1460} 1477}
1478#endif
1479
1480#if !_POSIX_MEMLOCK_RANGE
1481# define eio__mlock(a,b) ((errno = ENOSYS), -1)
1482#else
1483
1484static int
1485eio__mlock (void *addr, size_t length)
1486{
1487 eio_page_align (&addr, &length);
1488
1489 return mlock (addr, length);
1490}
1491
1461#endif 1492#endif
1462 1493
1463#if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) 1494#if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO)
1464# define eio__msync(a,b,c) ((errno = ENOSYS), -1) 1495# define eio__msync(a,b,c) ((errno = ENOSYS), -1)
1465#else 1496#else
1541 if (req) 1572 if (req)
1542 break; 1573 break;
1543 1574
1544 ++idle; 1575 ++idle;
1545 1576
1546 ts.tv_sec = time (0) + IDLE_TIMEOUT; 1577 ts.tv_sec = time (0) + idle_timeout;
1547 if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT) 1578 if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT)
1548 { 1579 {
1549 if (idle > max_idle) 1580 if (idle > max_idle)
1550 { 1581 {
1551 --idle; 1582 --idle;
1630 return 0; \ 1661 return 0; \
1631 } 1662 }
1632 1663
1633static void eio_execute (etp_worker *self, eio_req *req) 1664static void eio_execute (etp_worker *self, eio_req *req)
1634{ 1665{
1635 errno = 0;
1636
1637 switch (req->type) 1666 switch (req->type)
1638 { 1667 {
1639 case EIO_READ: ALLOC (req->size); 1668 case EIO_READ: ALLOC (req->size);
1640 req->result = req->offs >= 0 1669 req->result = req->offs >= 0
1641 ? pread (req->int1, req->ptr2, req->size, req->offs) 1670 ? pread (req->int1, req->ptr2, req->size, req->offs)
1673 case EIO_RMDIR: req->result = rmdir (req->ptr1); break; 1702 case EIO_RMDIR: req->result = rmdir (req->ptr1); break;
1674 case EIO_MKDIR: req->result = mkdir (req->ptr1, (mode_t)req->int2); break; 1703 case EIO_MKDIR: req->result = mkdir (req->ptr1, (mode_t)req->int2); break;
1675 case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; 1704 case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break;
1676 case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; 1705 case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break;
1677 case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; 1706 case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break;
1678 case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; 1707 case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->offs); break;
1679 1708
1680 case EIO_READLINK: ALLOC (PATH_MAX); 1709 case EIO_READLINK: ALLOC (PATH_MAX);
1681 req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break; 1710 req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break;
1682 1711
1683 case EIO_SYNC: req->result = 0; sync (); break; 1712 case EIO_SYNC: req->result = 0; sync (); break;
1691 1720
1692 case EIO_READDIR: eio__scandir (req, self); break; 1721 case EIO_READDIR: eio__scandir (req, self); break;
1693 1722
1694 case EIO_BUSY: 1723 case EIO_BUSY:
1695#ifdef _WIN32 1724#ifdef _WIN32
1696 Sleep (req->nv1 * 1000.); 1725 Sleep (req->nv1 * 1e3);
1697#else 1726#else
1698 { 1727 {
1699 struct timeval tv; 1728 struct timeval tv;
1700 1729
1701 tv.tv_sec = req->nv1; 1730 tv.tv_sec = req->nv1;
1702 tv.tv_usec = (req->nv1 - tv.tv_sec) * 1000000.; 1731 tv.tv_usec = (req->nv1 - tv.tv_sec) * 1e6;
1703 1732
1704 req->result = select (0, 0, 0, 0, &tv); 1733 req->result = select (0, 0, 0, 0, &tv);
1705 } 1734 }
1706#endif 1735#endif
1707 break; 1736 break;
1722 times = tv; 1751 times = tv;
1723 } 1752 }
1724 else 1753 else
1725 times = 0; 1754 times = 0;
1726 1755
1727
1728 req->result = req->type == EIO_FUTIME 1756 req->result = req->type == EIO_FUTIME
1729 ? futimes (req->int1, times) 1757 ? futimes (req->int1, times)
1730 : utimes (req->ptr1, times); 1758 : utimes (req->ptr1, times);
1731 } 1759 }
1732 break; 1760 break;
1741 case EIO_CUSTOM: 1769 case EIO_CUSTOM:
1742 ((void (*)(eio_req *))req->feed) (req); 1770 ((void (*)(eio_req *))req->feed) (req);
1743 break; 1771 break;
1744 1772
1745 default: 1773 default:
1774 errno = ENOSYS;
1746 req->result = -1; 1775 req->result = -1;
1747 break; 1776 break;
1748 } 1777 }
1749 1778
1750 req->errorno = errno; 1779 req->errorno = errno;
1933 REQ (EIO_READDIR); PATH; req->int1 = flags; SEND; 1962 REQ (EIO_READDIR); PATH; req->int1 = flags; SEND;
1934} 1963}
1935 1964
1936eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data) 1965eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data)
1937{ 1966{
1938 REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->int3 = (long)dev; SEND; 1967 REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->offs = (off_t)dev; SEND;
1939} 1968}
1940 1969
1941static eio_req * 1970static eio_req *
1942eio__2path (int type, const char *path, const char *new_path, int pri, eio_cb cb, void *data) 1971eio__2path (int type, const char *path, const char *new_path, int pri, eio_cb cb, void *data)
1943{ 1972{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines