ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/libeio/eio.c
(Generate patch)

Comparing cvsroot/libeio/eio.c (file contents):
Revision 1.59 by root, Wed Dec 1 07:30:39 2010 UTC vs.
Revision 1.67 by root, Sun Jun 5 20:25:00 2011 UTC

1/* 1/*
2 * libeio implementation 2 * libeio implementation
3 * 3 *
4 * Copyright (c) 2007,2008,2009,2010 Marc Alexander Lehmann <libeio@schmorp.de> 4 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libeio@schmorp.de>
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without modifica- 7 * Redistribution and use in source and binary forms, with or without modifica-
8 * tion, are permitted provided that the following conditions are met: 8 * tion, are permitted provided that the following conditions are met:
9 * 9 *
35 * and other provisions required by the GPL. If you do not delete the 35 * and other provisions required by the GPL. If you do not delete the
36 * provisions above, a recipient may use your version of this file under 36 * provisions above, a recipient may use your version of this file under
37 * either the BSD or the GPL. 37 * either the BSD or the GPL.
38 */ 38 */
39 39
40#ifndef _WIN32
41# include "config.h"
42#endif
43
40#include "eio.h" 44#include "eio.h"
41 45
42#ifdef EIO_STACKSIZE 46#ifdef EIO_STACKSIZE
43# define XTHREAD_STACKSIZE EIO_STACKSIZE 47# define XTHREAD_STACKSIZE EIO_STACKSIZE
44#endif 48#endif
54#include <sys/statvfs.h> 58#include <sys/statvfs.h>
55#include <limits.h> 59#include <limits.h>
56#include <fcntl.h> 60#include <fcntl.h>
57#include <assert.h> 61#include <assert.h>
58 62
63/* intptr_t comes from unistd.h, says POSIX/UNIX/tradition */
64/* intptr_t only comes form stdint.h, says idiot openbsd coder */
65#if HAVE_STDINT_H
66# include <stdint.h>
67#endif
68
59#ifndef EIO_FINISH 69#ifndef EIO_FINISH
60# define EIO_FINISH(req) ((req)->finish) && !EIO_CANCELLED (req) ? (req)->finish (req) : 0 70# define EIO_FINISH(req) ((req)->finish) && !EIO_CANCELLED (req) ? (req)->finish (req) : 0
61#endif 71#endif
62 72
63#ifndef EIO_DESTROY 73#ifndef EIO_DESTROY
71#ifdef _WIN32 81#ifdef _WIN32
72 82
73 /*doh*/ 83 /*doh*/
74#else 84#else
75 85
76# include "config.h"
77# include <sys/time.h> 86# include <sys/time.h>
78# include <sys/select.h> 87# include <sys/select.h>
79# include <unistd.h> 88# include <unistd.h>
80# include <utime.h> 89# include <utime.h>
81# include <signal.h> 90# include <signal.h>
82# include <dirent.h> 91# include <dirent.h>
83 92
84#if _POSIX_MEMLOCK || _POSIX_MAPPED_FILES 93#if _POSIX_MEMLOCK || _POSIX_MEMLOCK_RANGE || _POSIX_MAPPED_FILES
85# include <sys/mman.h> 94# include <sys/mman.h>
86#endif 95#endif
87 96
88/* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ 97/* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */
89# if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ 98# if __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__
132#endif 141#endif
133#ifndef D_NAMLEN 142#ifndef D_NAMLEN
134# define D_NAMLEN(de) strlen ((de)->d_name) 143# define D_NAMLEN(de) strlen ((de)->d_name)
135#endif 144#endif
136 145
137/* number of seconds after which an idle threads exit */
138#define IDLE_TIMEOUT 10
139
140/* used for struct dirent, AIX doesn't provide it */ 146/* used for struct dirent, AIX doesn't provide it */
141#ifndef NAME_MAX 147#ifndef NAME_MAX
142# define NAME_MAX 4096 148# define NAME_MAX 4096
143#endif 149#endif
144 150
223static unsigned int max_poll_reqs; /* reslock */ 229static unsigned int max_poll_reqs; /* reslock */
224 230
225static volatile unsigned int nreqs; /* reqlock */ 231static volatile unsigned int nreqs; /* reqlock */
226static volatile unsigned int nready; /* reqlock */ 232static volatile unsigned int nready; /* reqlock */
227static volatile unsigned int npending; /* reqlock */ 233static volatile unsigned int npending; /* reqlock */
228static volatile unsigned int max_idle = 4; 234static volatile unsigned int max_idle = 4; /* maximum number of threads that can idle indefinitely */
235static volatile unsigned int idle_timeout = 10; /* number of seconds after which an idle threads exit */
229 236
230static xmutex_t wrklock = X_MUTEX_INIT; 237static xmutex_t wrklock;
231static xmutex_t reslock = X_MUTEX_INIT; 238static xmutex_t reslock;
232static xmutex_t reqlock = X_MUTEX_INIT; 239static xmutex_t reqlock;
233static xcond_t reqwait = X_COND_INIT; 240static xcond_t reqwait;
234 241
235#if !HAVE_PREADWRITE 242#if !HAVE_PREADWRITE
236/* 243/*
237 * make our pread/pwrite emulation safe against themselves, but not against 244 * make our pread/pwrite emulation safe against themselves, but not against
238 * normal read/write by using a mutex. slows down execution a lot, 245 * normal read/write by using a mutex. slows down execution a lot,
368 } 375 }
369 376
370 abort (); 377 abort ();
371} 378}
372 379
380static void etp_thread_init (void)
381{
382 X_MUTEX_CREATE (wrklock);
383 X_MUTEX_CREATE (reslock);
384 X_MUTEX_CREATE (reqlock);
385 X_COND_CREATE (reqwait);
386}
387
373static void etp_atfork_prepare (void) 388static void etp_atfork_prepare (void)
374{ 389{
375 X_LOCK (wrklock); 390 X_LOCK (wrklock);
376 X_LOCK (reqlock); 391 X_LOCK (reqlock);
377 X_LOCK (reslock); 392 X_LOCK (reslock);
415 idle = 0; 430 idle = 0;
416 nreqs = 0; 431 nreqs = 0;
417 nready = 0; 432 nready = 0;
418 npending = 0; 433 npending = 0;
419 434
420 etp_atfork_parent (); 435 etp_thread_init ();
421} 436}
422 437
423static void 438static void
424etp_once_init (void) 439etp_once_init (void)
425{ 440{
441 etp_thread_init ();
426 X_THREAD_ATFORK (etp_atfork_prepare, etp_atfork_parent, etp_atfork_child); 442 X_THREAD_ATFORK (etp_atfork_prepare, etp_atfork_parent, etp_atfork_child);
427} 443}
428 444
429static int 445static int
430etp_init (void (*want_poll)(void), void (*done_poll)(void)) 446etp_init (void (*want_poll)(void), void (*done_poll)(void))
621} 637}
622 638
623static void etp_set_max_idle (unsigned int nthreads) 639static void etp_set_max_idle (unsigned int nthreads)
624{ 640{
625 if (WORDACCESS_UNSAFE) X_LOCK (reqlock); 641 if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
626 max_idle = nthreads <= 0 ? 1 : nthreads; 642 max_idle = nthreads;
643 if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
644}
645
646static void etp_set_idle_timeout (unsigned int seconds)
647{
648 if (WORDACCESS_UNSAFE) X_LOCK (reqlock);
649 idle_timeout = seconds;
627 if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); 650 if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock);
628} 651}
629 652
630static void etp_set_min_parallel (unsigned int nthreads) 653static void etp_set_min_parallel (unsigned int nthreads)
631{ 654{
757} 780}
758 781
759void eio_set_max_idle (unsigned int nthreads) 782void eio_set_max_idle (unsigned int nthreads)
760{ 783{
761 etp_set_max_idle (nthreads); 784 etp_set_max_idle (nthreads);
785}
786
787void eio_set_idle_timeout (unsigned int seconds)
788{
789 etp_set_idle_timeout (seconds);
762} 790}
763 791
764void eio_set_min_parallel (unsigned int nthreads) 792void eio_set_min_parallel (unsigned int nthreads)
765{ 793{
766 etp_set_min_parallel (nthreads); 794 etp_set_min_parallel (nthreads);
1041} 1069}
1042 1070
1043static signed char 1071static signed char
1044eio_dent_cmp (const eio_dirent *a, const eio_dirent *b) 1072eio_dent_cmp (const eio_dirent *a, const eio_dirent *b)
1045{ 1073{
1046 return a->score - b->score ? a->score - b->score /* works because our signed char is always 0..100 */ 1074 return a->score - b->score ? a->score - b->score /* works because our signed char is always 0..100 */
1047 : a->inode < b->inode ? -1 : a->inode > b->inode ? 1 : 0; 1075 : a->inode < b->inode ? -1
1076 : a->inode > b->inode ? 1
1077 : 0;
1048} 1078}
1049 1079
1050#define EIO_DENT_CMP(i,op,j) eio_dent_cmp (&i, &j) op 0 1080#define EIO_DENT_CMP(i,op,j) eio_dent_cmp (&i, &j) op 0
1051 1081
1052#define EIO_SORT_CUTOFF 30 /* quite high, but performs well on many filesystems */ 1082#define EIO_SORT_CUTOFF 30 /* quite high, but performs well on many filesystems */
1058 unsigned char bits [9 + sizeof (ino_t) * 8]; 1088 unsigned char bits [9 + sizeof (ino_t) * 8];
1059 unsigned char *bit = bits; 1089 unsigned char *bit = bits;
1060 1090
1061 assert (CHAR_BIT == 8); 1091 assert (CHAR_BIT == 8);
1062 assert (sizeof (eio_dirent) * 8 < 256); 1092 assert (sizeof (eio_dirent) * 8 < 256);
1063 assert (offsetof (eio_dirent, inode)); /* we use 0 as sentinel */ 1093 assert (offsetof (eio_dirent, inode)); /* we use bit #0 as sentinel */
1064 assert (offsetof (eio_dirent, score)); /* we use 0 as sentinel */ 1094 assert (offsetof (eio_dirent, score)); /* we use bit #0 as sentinel */
1065 1095
1066 if (size <= EIO_SORT_FAST) 1096 if (size <= EIO_SORT_FAST)
1067 return; 1097 return;
1068 1098
1069 /* first prepare an array of bits to test in our radix sort */ 1099 /* first prepare an array of bits to test in our radix sort */
1224 flags &= ~(EIO_READDIR_DIRS_FIRST | EIO_READDIR_STAT_ORDER); 1254 flags &= ~(EIO_READDIR_DIRS_FIRST | EIO_READDIR_STAT_ORDER);
1225 1255
1226 X_LOCK (wrklock); 1256 X_LOCK (wrklock);
1227 /* the corresponding closedir is in ETP_WORKER_CLEAR */ 1257 /* the corresponding closedir is in ETP_WORKER_CLEAR */
1228 self->dirp = dirp = opendir (req->ptr1); 1258 self->dirp = dirp = opendir (req->ptr1);
1259
1229 req->flags |= EIO_FLAG_PTR1_FREE | EIO_FLAG_PTR2_FREE; 1260 req->flags |= EIO_FLAG_PTR1_FREE | EIO_FLAG_PTR2_FREE;
1230 req->ptr1 = dents = flags ? malloc (dentalloc * sizeof (eio_dirent)) : 0; 1261 req->ptr1 = dents = flags ? malloc (dentalloc * sizeof (eio_dirent)) : 0;
1231 req->ptr2 = names = malloc (namesalloc); 1262 req->ptr2 = names = malloc (namesalloc);
1232 X_UNLOCK (wrklock); 1263 X_UNLOCK (wrklock);
1233 1264
1245 /* sort etc. */ 1276 /* sort etc. */
1246 req->int1 = flags; 1277 req->int1 = flags;
1247 req->result = dentoffs; 1278 req->result = dentoffs;
1248 1279
1249 if (flags & EIO_READDIR_STAT_ORDER) 1280 if (flags & EIO_READDIR_STAT_ORDER)
1250 eio_dent_sort (dents, dentoffs, 0, inode_bits); /* sort by inode exclusively */ 1281 eio_dent_sort (dents, dentoffs, flags & EIO_READDIR_DIRS_FIRST ? 7 : 0, inode_bits);
1251 else if (flags & EIO_READDIR_DIRS_FIRST) 1282 else if (flags & EIO_READDIR_DIRS_FIRST)
1252 if (flags & EIO_READDIR_FOUND_UNKNOWN) 1283 if (flags & EIO_READDIR_FOUND_UNKNOWN)
1253 eio_dent_sort (dents, dentoffs, 7, inode_bits); /* sort by score and inode */ 1284 eio_dent_sort (dents, dentoffs, 7, inode_bits); /* sort by score and inode */
1254 else 1285 else
1255 { 1286 {
1257 eio_dirent *oth = dents + dentoffs; 1288 eio_dirent *oth = dents + dentoffs;
1258 eio_dirent *dir = dents; 1289 eio_dirent *dir = dents;
1259 1290
1260 /* now partition dirs to the front, and non-dirs to the back */ 1291 /* now partition dirs to the front, and non-dirs to the back */
1261 /* by walking from both sides and swapping if necessary */ 1292 /* by walking from both sides and swapping if necessary */
1262 /* also clear score, so it doesn't influence sorting */
1263 while (oth > dir) 1293 while (oth > dir)
1264 { 1294 {
1265 if (dir->type == EIO_DT_DIR) 1295 if (dir->type == EIO_DT_DIR)
1266 ++dir; 1296 ++dir;
1267 else if ((--oth)->type == EIO_DT_DIR) 1297 else if ((--oth)->type == EIO_DT_DIR)
1270 1300
1271 ++dir; 1301 ++dir;
1272 } 1302 }
1273 } 1303 }
1274 1304
1275 /* now sort the dirs only */ 1305 /* now sort the dirs only (dirs all have the same score) */
1276 eio_dent_sort (dents, dir - dents, 0, inode_bits); 1306 eio_dent_sort (dents, dir - dents, 0, inode_bits);
1277 } 1307 }
1278 1308
1279 break; 1309 break;
1280 } 1310 }
1431 /* round up length */ 1461 /* round up length */
1432 *length = (*length + mask) & ~mask; 1462 *length = (*length + mask) & ~mask;
1433} 1463}
1434 1464
1435#if !_POSIX_MEMLOCK 1465#if !_POSIX_MEMLOCK
1436# define eio__mlock(a,b) ((errno = ENOSYS), -1)
1437# define eio__mlockall(a) ((errno = ENOSYS), -1) 1466# define eio__mlockall(a) ((errno = ENOSYS), -1)
1438#else 1467#else
1439
1440static int
1441eio__mlock (void *addr, size_t length)
1442{
1443 eio_page_align (&addr, &length);
1444
1445 return mlock (addr, length);
1446}
1447 1468
1448static int 1469static int
1449eio__mlockall (int flags) 1470eio__mlockall (int flags)
1450{ 1471{
1451 #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7 1472 #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7
1461 | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0); 1482 | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0);
1462 } 1483 }
1463 1484
1464 return mlockall (flags); 1485 return mlockall (flags);
1465} 1486}
1487#endif
1488
1489#if !_POSIX_MEMLOCK_RANGE
1490# define eio__mlock(a,b) ((errno = ENOSYS), -1)
1491#else
1492
1493static int
1494eio__mlock (void *addr, size_t length)
1495{
1496 eio_page_align (&addr, &length);
1497
1498 return mlock (addr, length);
1499}
1500
1466#endif 1501#endif
1467 1502
1468#if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) 1503#if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO)
1469# define eio__msync(a,b,c) ((errno = ENOSYS), -1) 1504# define eio__msync(a,b,c) ((errno = ENOSYS), -1)
1470#else 1505#else
1488} 1523}
1489 1524
1490#endif 1525#endif
1491 1526
1492int 1527int
1493eio__mtouch (void *mem, size_t len, int flags) 1528eio__mtouch (eio_req *req)
1494{ 1529{
1530 void *mem = req->ptr2;
1531 size_t len = req->size;
1532 int flags = req->int1;
1533
1495 eio_page_align (&mem, &len); 1534 eio_page_align (&mem, &len);
1496 1535
1497 { 1536 {
1498 intptr_t addr = (intptr_t)mem; 1537 intptr_t addr = (intptr_t)mem;
1499 intptr_t end = addr + len; 1538 intptr_t end = addr + len;
1500 intptr_t page = eio_pagesize (); 1539 intptr_t page = eio_pagesize ();
1501 1540
1502 if (addr < end) 1541 if (addr < end)
1503 if (flags & EIO_MT_MODIFY) /* modify */ 1542 if (flags & EIO_MT_MODIFY) /* modify */
1504 do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len); 1543 do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len && !EIO_CANCELLED (req));
1505 else 1544 else
1506 do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len); 1545 do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len && !EIO_CANCELLED (req));
1507 } 1546 }
1508 1547
1509 return 0; 1548 return 0;
1510} 1549}
1511 1550
1546 if (req) 1585 if (req)
1547 break; 1586 break;
1548 1587
1549 ++idle; 1588 ++idle;
1550 1589
1551 ts.tv_sec = time (0) + IDLE_TIMEOUT; 1590 ts.tv_sec = time (0) + idle_timeout;
1552 if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT) 1591 if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT)
1553 { 1592 {
1554 if (idle > max_idle) 1593 if (idle > max_idle)
1555 { 1594 {
1556 --idle; 1595 --idle;
1676 case EIO_RMDIR: req->result = rmdir (req->ptr1); break; 1715 case EIO_RMDIR: req->result = rmdir (req->ptr1); break;
1677 case EIO_MKDIR: req->result = mkdir (req->ptr1, (mode_t)req->int2); break; 1716 case EIO_MKDIR: req->result = mkdir (req->ptr1, (mode_t)req->int2); break;
1678 case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break; 1717 case EIO_RENAME: req->result = rename (req->ptr1, req->ptr2); break;
1679 case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break; 1718 case EIO_LINK: req->result = link (req->ptr1, req->ptr2); break;
1680 case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break; 1719 case EIO_SYMLINK: req->result = symlink (req->ptr1, req->ptr2); break;
1681 case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->int3); break; 1720 case EIO_MKNOD: req->result = mknod (req->ptr1, (mode_t)req->int2, (dev_t)req->offs); break;
1682 1721
1683 case EIO_READLINK: ALLOC (PATH_MAX); 1722 case EIO_READLINK: ALLOC (PATH_MAX);
1684 req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break; 1723 req->result = readlink (req->ptr1, req->ptr2, PATH_MAX); break;
1685 1724
1686 case EIO_SYNC: req->result = 0; sync (); break; 1725 case EIO_SYNC: req->result = 0; sync (); break;
1687 case EIO_FSYNC: req->result = fsync (req->int1); break; 1726 case EIO_FSYNC: req->result = fsync (req->int1); break;
1688 case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; 1727 case EIO_FDATASYNC: req->result = fdatasync (req->int1); break;
1689 case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break; 1728 case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break;
1690 case EIO_MTOUCH: req->result = eio__mtouch (req->ptr2, req->size, req->int1); break; 1729 case EIO_MTOUCH: req->result = eio__mtouch (req); break;
1691 case EIO_MLOCK: req->result = eio__mlock (req->ptr2, req->size); break; 1730 case EIO_MLOCK: req->result = eio__mlock (req->ptr2, req->size); break;
1692 case EIO_MLOCKALL: req->result = eio__mlockall (req->int1); break; 1731 case EIO_MLOCKALL: req->result = eio__mlockall (req->int1); break;
1693 case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; 1732 case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break;
1694 1733
1695 case EIO_READDIR: eio__scandir (req, self); break; 1734 case EIO_READDIR: eio__scandir (req, self); break;
1936 REQ (EIO_READDIR); PATH; req->int1 = flags; SEND; 1975 REQ (EIO_READDIR); PATH; req->int1 = flags; SEND;
1937} 1976}
1938 1977
1939eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data) 1978eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data)
1940{ 1979{
1941 REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->int3 = (long)dev; SEND; 1980 REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->offs = (off_t)dev; SEND;
1942} 1981}
1943 1982
1944static eio_req * 1983static eio_req *
1945eio__2path (int type, const char *path, const char *new_path, int pri, eio_cb cb, void *data) 1984eio__2path (int type, const char *path, const char *new_path, int pri, eio_cb cb, void *data)
1946{ 1985{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines