--- IO-AIO/AIO.xs 2006/10/26 06:44:48 1.75 +++ IO-AIO/AIO.xs 2006/10/27 20:10:06 1.81 @@ -1,10 +1,11 @@ /* solaris */ #define _POSIX_PTHREAD_SEMANTICS 1 -#if __linux +#if __linux && !defined(_GNU_SOURCE) # define _GNU_SOURCE #endif +/* just in case */ #define _REENTRANT 1 #include @@ -62,6 +63,18 @@ # define STACKSIZE 16384 #endif +/* wether word reads are potentially non-atomic. + * this is conservatice, likely most arches this runs + * on have atomic word read/writes. + */ +#ifndef WORDREAD_UNSAFE +# if __i386 || __x86_64 +# define WORDREAD_UNSAFE 0 +# else +# define WORDREAD_UNSAFE 1 +# endif +#endif + /* buffer size for various temporary buffers */ #define AIO_BUFSIZE 65536 @@ -81,7 +94,7 @@ REQ_STAT, REQ_LSTAT, REQ_FSTAT, REQ_FSYNC, REQ_FDATASYNC, REQ_UNLINK, REQ_RMDIR, REQ_RENAME, - REQ_READDIR, + REQ_MKNOD, REQ_READDIR, REQ_LINK, REQ_SYMLINK, REQ_GROUP, REQ_NOP, REQ_BUSY, @@ -133,9 +146,7 @@ static int next_pri = DEFAULT_PRI + PRI_BIAS; -static int started, wanted; -static volatile int nreqs; -static int respipe [2]; +static unsigned int started, wanted; #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP) # define AIO_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP @@ -146,7 +157,7 @@ #define LOCK(mutex) pthread_mutex_lock (&(mutex)) #define UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) -/* worker threasd management */ +/* worker threads management */ static pthread_mutex_t wrklock = AIO_MUTEX_INIT; typedef struct worker { @@ -186,10 +197,45 @@ free (wrk); } +static volatile unsigned int nreqs, nready, npending; +static volatile unsigned int max_outstanding = 0xffffffff; +static int respipe [2]; + static pthread_mutex_t reslock = AIO_MUTEX_INIT; static pthread_mutex_t reqlock = AIO_MUTEX_INIT; static pthread_cond_t reqwait = PTHREAD_COND_INITIALIZER; +#if WORDREAD_UNSAFE + +static unsigned int get_nready () +{ + unsigned int retval; + + LOCK (reqlock); + retval = nready; + UNLOCK (reqlock); + + return retval; +} + +static unsigned int get_npending () +{ + unsigned int retval; + + LOCK (reslock); + retval = npending; + UNLOCK (reslock); + + return retval; +} + +#else + +# define get_nready() nready +# define get_npending() npending + +#endif + /* * a somewhat faster data structure might be nice, but * with 8 priorities this actually needs <20 insns @@ -244,7 +290,7 @@ abort (); } -static int poll_cb (); +static int poll_cb (int max); static void req_invoke (aio_req req); static void req_free (aio_req req); static void req_cancel (aio_req req); @@ -319,39 +365,12 @@ } } -static void poll_wait () -{ - fd_set rfd; - - while (nreqs) - { - int size; -#if !(__i386 || __x86_64) /* safe without sempahore on these archs */ - LOCK (reslock); -#endif - size = res_queue.size; -#if !(__i386 || __x86_64) /* safe without sempahore on these archs */ - UNLOCK (reslock); -#endif - - if (size) - return; - - FD_ZERO(&rfd); - FD_SET(respipe [0], &rfd); - - select (respipe [0] + 1, &rfd, 0, 0, 0); - } -} - static void req_invoke (aio_req req) { dSP; if (!(req->flags & FLAG_CANCELLED) && SvOK (req->callback)) { - errno = req->errorno; - ENTER; SAVETMPS; PUSHMARK (SP); @@ -426,6 +445,7 @@ break; } + errno = req->errorno; PUTBACK; call_sv (req->callback, G_VOID | G_EVAL); @@ -497,69 +517,6 @@ req_cancel_subs (req); } -static int poll_cb () -{ - dSP; - int count = 0; - int do_croak = 0; - aio_req req; - - for (;;) - { - LOCK (reslock); - req = reqq_shift (&res_queue); - - if (req) - { - if (!res_queue.size) - { - /* read any signals sent by the worker threads */ - char buf [32]; - while (read (respipe [0], buf, 32) == 32) - ; - } - } - - UNLOCK (reslock); - - if (!req) - break; - - --nreqs; - - if (req->type == REQ_QUIT) - started--; - else if (req->type == REQ_GROUP && req->length) - { - req->fd = 1; /* mark request as delayed */ - continue; - } - else - { - if (req->type == REQ_READ) - SvCUR_set (req->data, req->dataoffset + (req->result > 0 ? req->result : 0)); - - if (req->data2ptr && (req->type == REQ_READ || req->type == REQ_WRITE)) - SvREADONLY_off (req->data); - - if (req->statdata) - { - PL_laststype = req->type == REQ_LSTAT ? OP_LSTAT : OP_STAT; - PL_laststatval = req->result; - PL_statcache = *(req->statdata); - } - - req_invoke (req); - - count++; - } - - req_free (req); - } - - return count; -} - static void *aio_proc(void *arg); static void start_thread (void) @@ -575,6 +532,9 @@ pthread_attr_init (&attr); pthread_attr_setstacksize (&attr, STACKSIZE); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); +#ifdef PTHREAD_SCOPE_PROCESS + pthread_attr_setscope (&attr, PTHREAD_SCOPE_PROCESS); +#endif sigfillset (&fullsigset); @@ -587,7 +547,7 @@ wrk->next = wrk_first.next; wrk_first.next->prev = wrk; wrk_first.next = wrk; - started++; + ++started; } else free (wrk); @@ -596,17 +556,51 @@ UNLOCK (wrklock); } -static void req_send (aio_req req) +static void maybe_start_thread () { - while (started < wanted && nreqs >= started) - start_thread (); +#if 0 + static struct timeval last; + struct timeval diff, now; +#endif + + if (started >= wanted) + return; + + if (nready <= nreqs - get_nready () - get_npending ()) + return; + +#if 0 + gettimeofday (&now, 0); + diff.tv_sec = now.tv_sec - last.tv_sec; + diff.tv_usec = now.tv_usec - last.tv_usec; + + if (diff.tv_usec < 0) + { + --diff.tv_sec; + diff.tv_usec += 1000000; + } + + if (!diff.tv_sec && diff.tv_usec < 10000) + return; + + last = now; +#endif + + start_thread (); +} + +static void req_send (aio_req req) +{ ++nreqs; LOCK (reqlock); + ++nready; reqq_push (&req_queue, req); pthread_cond_signal (&reqwait); UNLOCK (reqlock); + + maybe_start_thread (); } static void end_thread (void) @@ -619,6 +613,10 @@ req->pri = PRI_MAX + PRI_BIAS; req_send (req); + + LOCK (wrklock); + --started; + UNLOCK (wrklock); } static void min_parallel (int nthreads) @@ -629,22 +627,109 @@ static void max_parallel (int nthreads) { - int cur = started; - if (wanted > nthreads) wanted = nthreads; - while (cur > wanted) + while (started > wanted) + end_thread (); +} + +static void poll_wait () +{ + fd_set rfd; + + while (nreqs) { - end_thread (); - cur--; + int size; + if (WORDREAD_UNSAFE) LOCK (reslock); + size = res_queue.size; + if (WORDREAD_UNSAFE) UNLOCK (reslock); + + if (size) + return; + + maybe_start_thread (); + + FD_ZERO(&rfd); + FD_SET(respipe [0], &rfd); + + select (respipe [0] + 1, &rfd, 0, 0, 0); } +} - while (started > wanted) +static int poll_cb (int max) +{ + dSP; + int count = 0; + int do_croak = 0; + aio_req req; + + for (;;) { + while (max <= 0 || count < max) + { + maybe_start_thread (); + + LOCK (reslock); + req = reqq_shift (&res_queue); + + if (req) + { + --npending; + + if (!res_queue.size) + { + /* read any signals sent by the worker threads */ + char buf [32]; + while (read (respipe [0], buf, 32) == 32) + ; + } + } + + UNLOCK (reslock); + + if (!req) + break; + + --nreqs; + + if (req->type == REQ_GROUP && req->length) + { + req->fd = 1; /* mark request as delayed */ + continue; + } + else + { + if (req->type == REQ_READ) + SvCUR_set (req->data, req->dataoffset + (req->result > 0 ? req->result : 0)); + + if (req->data2ptr && (req->type == REQ_READ || req->type == REQ_WRITE)) + SvREADONLY_off (req->data); + + if (req->statdata) + { + PL_laststype = req->type == REQ_LSTAT ? OP_LSTAT : OP_STAT; + PL_laststatval = req->result; + PL_statcache = *(req->statdata); + } + + req_invoke (req); + + count++; + } + + req_free (req); + } + + if (nreqs <= max_outstanding) + break; + poll_wait (); - poll_cb (); + + max = 0; } + + return count; } static void create_pipe () @@ -781,7 +866,7 @@ res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); if (res < 0 && sbytes) - /* maybe only on EAGAIN only: as usual, the manpage leaves you guessing */ + /* maybe only on EAGAIN: as usual, the manpage leaves you guessing */ res = sbytes; } @@ -871,9 +956,8 @@ LOCK (wrklock); self->dirp = dirp = opendir (req->dataptr); self->dbuf = u = malloc (sizeof (*u)); - UNLOCK (wrklock); - req->data2ptr = names = malloc (memlen); + UNLOCK (wrklock); if (dirp && u && names) for (;;) @@ -919,10 +1003,9 @@ static void *aio_proc (void *thr_arg) { aio_req req; - int type; worker *self = (worker *)thr_arg; - do + for (;;) { LOCK (reqlock); @@ -936,13 +1019,14 @@ pthread_cond_wait (&reqwait, &reqlock); } + --nready; + UNLOCK (reqlock); errno = 0; /* strictly unnecessary */ - type = req->type; /* remember type for QUIT check */ if (!(req->flags & FLAG_CANCELLED)) - switch (type) + switch (req->type) { case REQ_READ: req->result = pread (req->fd, req->dataptr, req->length, req->offset); break; case REQ_WRITE: req->result = pwrite (req->fd, req->dataptr, req->length, req->offset); break; @@ -961,6 +1045,7 @@ case REQ_RENAME: req->result = rename (req->data2ptr, req->dataptr); break; case REQ_LINK: req->result = link (req->data2ptr, req->dataptr); break; case REQ_SYMLINK: req->result = symlink (req->data2ptr, req->dataptr); break; + case REQ_MKNOD: req->result = mknod (req->data2ptr, req->mode, (dev_t)req->offset); break; case REQ_FDATASYNC: req->result = fdatasync (req->fd); break; case REQ_FSYNC: req->result = fsync (req->fd); break; @@ -978,9 +1063,15 @@ case REQ_GROUP: case REQ_NOP: - case REQ_QUIT: break; + case REQ_QUIT: + LOCK (wrklock); + worker_free (self); + --started; + UNLOCK (wrklock); + return 0; + default: req->result = ENOSYS; break; @@ -990,6 +1081,8 @@ LOCK (reslock); + ++npending; + if (!reqq_push (&res_queue, req)) /* write a dummy byte to the pipe so fh becomes ready */ write (respipe [1], &respipe, 1); @@ -999,13 +1092,6 @@ UNLOCK (reslock); } - while (type != REQ_QUIT); - - LOCK (wrklock); - worker_free (self); - UNLOCK (wrklock); - - return 0; } /*****************************************************************************/ @@ -1098,21 +1184,32 @@ newCONSTSUB (stash, "EXDEV", newSViv (EXDEV)); newCONSTSUB (stash, "O_RDONLY", newSViv (O_RDONLY)); newCONSTSUB (stash, "O_WRONLY", newSViv (O_WRONLY)); + newCONSTSUB (stash, "O_CREAT", newSViv (O_CREAT)); + newCONSTSUB (stash, "O_TRUNC", newSViv (O_TRUNC)); create_pipe (); pthread_atfork (atfork_prepare, atfork_parent, atfork_child); + + start_thread (); } void -min_parallel (nthreads) - int nthreads +min_parallel (int nthreads) PROTOTYPE: $ void -max_parallel (nthreads) - int nthreads +max_parallel (int nthreads) PROTOTYPE: $ +int +max_outstanding (int maxreqs) + PROTOTYPE: $ + CODE: + RETVAL = max_outstanding; + max_outstanding = maxreqs; + OUTPUT: + RETVAL + void aio_open (pathname,flags,mode,callback=&PL_sv_undef) SV * pathname @@ -1336,6 +1433,25 @@ } void +aio_mknod (pathname,mode,dev,callback=&PL_sv_undef) + SV * pathname + SV * callback + UV mode + UV dev + PPCODE: +{ + dREQ; + + req->type = REQ_MKNOD; + req->data = newSVsv (pathname); + req->dataptr = SvPVbyte_nolen (req->data); + req->mode = (mode_t)mode; + req->offset = dev; + + REQ_SEND; +} + +void aio_busy (delay,callback=&PL_sv_undef) double delay SV * callback @@ -1376,20 +1492,27 @@ REQ_SEND; } -void -aioreq_pri (int pri = DEFAULT_PRI) - CODE: - if (pri < PRI_MIN) pri = PRI_MIN; - if (pri > PRI_MAX) pri = PRI_MAX; - next_pri = pri + PRI_BIAS; +int +aioreq_pri (int pri = 0) + PROTOTYPE: ;$ + CODE: + RETVAL = next_pri - PRI_BIAS; + if (items > 0) + { + if (pri < PRI_MIN) pri = PRI_MIN; + if (pri > PRI_MAX) pri = PRI_MAX; + next_pri = pri + PRI_BIAS; + } + OUTPUT: + RETVAL void aioreq_nice (int nice = 0) - CODE: - nice = next_pri - nice; - if (nice < PRI_MIN) nice = PRI_MIN; - if (nice > PRI_MAX) nice = PRI_MAX; - next_pri = nice + PRI_BIAS; + CODE: + nice = next_pri - nice; + if (nice < PRI_MIN) nice = PRI_MIN; + if (nice > PRI_MAX) nice = PRI_MAX; + next_pri = nice + PRI_BIAS; void flush () @@ -1398,7 +1521,7 @@ while (nreqs) { poll_wait (); - poll_cb (); + poll_cb (0); } void @@ -1408,7 +1531,7 @@ if (nreqs) { poll_wait (); - poll_cb (); + poll_cb (0); } int @@ -1423,7 +1546,15 @@ poll_cb(...) PROTOTYPE: CODE: - RETVAL = poll_cb (); + RETVAL = poll_cb (0); + OUTPUT: + RETVAL + +int +poll_some(int max = 0) + PROTOTYPE: $ + CODE: + RETVAL = poll_cb (max); OUTPUT: RETVAL @@ -1442,6 +1573,22 @@ OUTPUT: RETVAL +int +nready() + PROTOTYPE: + CODE: + RETVAL = get_nready (); + OUTPUT: + RETVAL + +int +npending() + PROTOTYPE: + CODE: + RETVAL = get_npending (); + OUTPUT: + RETVAL + PROTOTYPES: DISABLE MODULE = IO::AIO PACKAGE = IO::AIO::REQ @@ -1502,7 +1649,11 @@ CODE: { int i; - AV *av = newAV (); + AV *av; + + grp->errorno = errno; + + av = newAV (); for (i = 1; i < items; ++i ) av_push (av, newSVsv (ST (i))); @@ -1512,6 +1663,11 @@ } void +errno (aio_req grp, int errorno = errno) + CODE: + grp->errorno = errorno; + +void limit (aio_req grp, int limit) CODE: grp->fd2 = limit;