--- IO-AIO/AIO.xs 2006/10/24 00:26:32 1.65 +++ IO-AIO/AIO.xs 2006/10/26 12:38:04 1.76 @@ -1,7 +1,11 @@ -#if __linux +/* solaris */ +#define _POSIX_PTHREAD_SEMANTICS 1 + +#if __linux && !defined(_GNU_SOURCE) # define _GNU_SOURCE #endif +/* just in case */ #define _REENTRANT 1 #include @@ -46,6 +50,11 @@ # define NAME_MAX 4096 #endif +#ifndef PTHREAD_STACK_MIN +/* care for broken platforms, e.g. windows */ +# define PTHREAD_STACK_MIN 16384 +#endif + #if __ia64 # define STACKSIZE 65536 #elif __i386 || __x86_64 /* 16k is unreasonably high :( */ @@ -58,12 +67,13 @@ #define AIO_BUFSIZE 65536 #define dBUF \ - char *aio_buf = malloc (AIO_BUFSIZE); \ + char *aio_buf; \ + LOCK (wrklock); \ + self->dbuf = aio_buf = malloc (AIO_BUFSIZE); \ + UNLOCK (wrklock); \ if (!aio_buf) \ return -1; -#define fBUF free (aio_buf) - enum { REQ_QUIT, REQ_OPEN, REQ_CLOSE, @@ -75,7 +85,7 @@ REQ_READDIR, REQ_LINK, REQ_SYMLINK, REQ_GROUP, REQ_NOP, - REQ_SLEEP, + REQ_BUSY, }; #define AIO_REQ_KLASS "IO::AIO::REQ" @@ -119,13 +129,13 @@ DEFAULT_PRI = 0, PRI_BIAS = -PRI_MIN, + NUM_PRI = PRI_MAX + PRI_BIAS + 1, }; static int next_pri = DEFAULT_PRI + PRI_BIAS; static int started, wanted; static volatile int nreqs; -static int max_outstanding = 1<<30; static int respipe [2]; #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP) @@ -134,15 +144,111 @@ # define AIO_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER #endif +#define LOCK(mutex) pthread_mutex_lock (&(mutex)) +#define UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) + +/* worker threads management */ +static pthread_mutex_t wrklock = AIO_MUTEX_INIT; + +typedef struct worker { + /* locked by wrklock */ + struct worker *prev, *next; + + pthread_t tid; + + /* locked by reslock, reqlock or wrklock */ + aio_req req; /* currently processed request */ + void *dbuf; + DIR *dirp; +} worker; + +static worker wrk_first = { &wrk_first, &wrk_first, 0 }; + +static void worker_clear (worker *wrk) +{ + if (wrk->dirp) + { + closedir (wrk->dirp); + wrk->dirp = 0; + } + + if (wrk->dbuf) + { + free (wrk->dbuf); + wrk->dbuf = 0; + } +} + +static void worker_free (worker *wrk) +{ + wrk->next->prev = wrk->prev; + wrk->prev->next = wrk->next; + + free (wrk); +} + static pthread_mutex_t reslock = AIO_MUTEX_INIT; static pthread_mutex_t reqlock = AIO_MUTEX_INIT; static pthread_cond_t reqwait = PTHREAD_COND_INITIALIZER; -static volatile aio_req reqs, reqe; /* queue start, queue end */ -static volatile aio_req ress, rese; /* queue start, queue end */ +/* + * a somewhat faster data structure might be nice, but + * with 8 priorities this actually needs <20 insns + * per shift, the most expensive operation. + */ +typedef struct { + aio_req qs[NUM_PRI], qe[NUM_PRI]; /* qstart, qend */ + int size; +} reqq; + +static reqq req_queue; +static reqq res_queue; + +int reqq_push (reqq *q, aio_req req) +{ + int pri = req->pri; + req->next = 0; + + if (q->qe[pri]) + { + q->qe[pri]->next = req; + q->qe[pri] = req; + } + else + q->qe[pri] = q->qs[pri] = req; + + return q->size++; +} + +aio_req reqq_shift (reqq *q) +{ + int pri; + + if (!q->size) + return 0; + + --q->size; + + for (pri = NUM_PRI; pri--; ) + { + aio_req req = q->qs[pri]; + + if (req) + { + if (!(q->qs[pri] = req->next)) + q->qe[pri] = 0; + + return req; + } + } + + abort (); +} +static int poll_cb (int max); static void req_invoke (aio_req req); static void req_free (aio_req req); +static void req_cancel (aio_req req); /* must be called at most once */ static SV *req_sv (aio_req req, const char *klass) @@ -183,7 +289,7 @@ PUSHMARK (SP); XPUSHs (req_sv (grp, AIO_GRP_KLASS)); PUTBACK; - call_sv (grp->fh2, G_VOID | G_EVAL); + call_sv (grp->fh2, G_VOID | G_EVAL | G_KEEPERR); SPAGAIN; FREETMPS; LEAVE; @@ -220,16 +326,16 @@ while (nreqs) { - aio_req req; -#if !(__i386 || __x86_64) /* safe without sempahore on this archs */ - pthread_mutex_lock (&reslock); + int size; +#if !(__i386 || __x86_64) /* safe without sempahore on these archs */ + LOCK (reslock); #endif - req = ress; -#if !(__i386 || __x86_64) /* safe without sempahore on this archs */ - pthread_mutex_unlock (&reslock); + size = res_queue.size; +#if !(__i386 || __x86_64) /* safe without sempahore on these archs */ + UNLOCK (reslock); #endif - if (req) + if (size) return; FD_ZERO(&rfd); @@ -242,104 +348,94 @@ static void req_invoke (aio_req req) { dSP; - int errorno = errno; - if (req->flags & FLAG_CANCELLED || !SvOK (req->callback)) - return; - - errno = req->errorno; + if (!(req->flags & FLAG_CANCELLED) && SvOK (req->callback)) + { + errno = req->errorno; - ENTER; - SAVETMPS; - PUSHMARK (SP); - EXTEND (SP, 1); + ENTER; + SAVETMPS; + PUSHMARK (SP); + EXTEND (SP, 1); - switch (req->type) - { - case REQ_READDIR: + switch (req->type) { - SV *rv = &PL_sv_undef; - - if (req->result >= 0) + case REQ_READDIR: { - char *buf = req->data2ptr; - AV *av = newAV (); + SV *rv = &PL_sv_undef; - while (req->result) + if (req->result >= 0) { - SV *sv = newSVpv (buf, 0); + int i; + char *buf = req->data2ptr; + AV *av = newAV (); - av_push (av, sv); - buf += SvCUR (sv) + 1; - req->result--; - } + av_extend (av, req->result - 1); - rv = sv_2mortal (newRV_noinc ((SV *)av)); - } + for (i = 0; i < req->result; ++i) + { + SV *sv = newSVpv (buf, 0); - PUSHs (rv); - } - break; + av_store (av, i, sv); + buf += SvCUR (sv) + 1; + } - case REQ_OPEN: - { - /* convert fd to fh */ - SV *fh; + rv = sv_2mortal (newRV_noinc ((SV *)av)); + } - PUSHs (sv_2mortal (newSViv (req->result))); - PUTBACK; - call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL); - SPAGAIN; + PUSHs (rv); + } + break; - fh = SvREFCNT_inc (POPs); + case REQ_OPEN: + { + /* convert fd to fh */ + SV *fh; - PUSHMARK (SP); - XPUSHs (sv_2mortal (fh)); - } - break; + PUSHs (sv_2mortal (newSViv (req->result))); + PUTBACK; + call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL); + SPAGAIN; - case REQ_GROUP: - req->fd = 2; /* mark group as finished */ + fh = SvREFCNT_inc (POPs); - if (req->data) - { - int i; - AV *av = (AV *)req->data; + PUSHMARK (SP); + XPUSHs (sv_2mortal (fh)); + } + break; - EXTEND (SP, AvFILL (av) + 1); - for (i = 0; i <= AvFILL (av); ++i) - PUSHs (*av_fetch (av, i, 0)); - } - break; + case REQ_GROUP: + req->fd = 2; /* mark group as finished */ - case REQ_NOP: - case REQ_SLEEP: - break; + if (req->data) + { + int i; + AV *av = (AV *)req->data; - default: - PUSHs (sv_2mortal (newSViv (req->result))); - break; - } + EXTEND (SP, AvFILL (av) + 1); + for (i = 0; i <= AvFILL (av); ++i) + PUSHs (*av_fetch (av, i, 0)); + } + break; + case REQ_NOP: + case REQ_BUSY: + break; - PUTBACK; - call_sv (req->callback, G_VOID | G_EVAL); - SPAGAIN; + default: + PUSHs (sv_2mortal (newSViv (req->result))); + break; + } - FREETMPS; - LEAVE; - errno = errorno; + PUTBACK; + call_sv (req->callback, G_VOID | G_EVAL); + SPAGAIN; - if (SvTRUE (ERRSV)) - { - req_free (req); - croak (0); + FREETMPS; + LEAVE; } -} -static void req_free (aio_req req) -{ if (req->grp) { aio_req grp = req->grp; @@ -354,6 +450,15 @@ aio_grp_dec (grp); } + if (SvTRUE (ERRSV)) + { + req_free (req); + croak (0); + } +} + +static void req_free (aio_req req) +{ if (req->self) { sv_unmagic (req->self, PERL_MAGIC_ext); @@ -366,53 +471,57 @@ SvREFCNT_dec (req->callback); Safefree (req->statdata); - if (req->type == REQ_READDIR && req->result >= 0) + if (req->type == REQ_READDIR) free (req->data2ptr); Safefree (req); } +static void req_cancel_subs (aio_req grp) +{ + aio_req sub; + + if (grp->type != REQ_GROUP) + return; + + SvREFCNT_dec (grp->fh2); + grp->fh2 = 0; + + for (sub = grp->grp_first; sub; sub = sub->grp_next) + req_cancel (sub); +} + static void req_cancel (aio_req req) { req->flags |= FLAG_CANCELLED; - if (req->type == REQ_GROUP) - { - aio_req sub; - - for (sub = req->grp_first; sub; sub = sub->grp_next) - req_cancel (sub); - } + req_cancel_subs (req); } -static int poll_cb () +static int poll_cb (int max) { dSP; int count = 0; int do_croak = 0; aio_req req; - for (;;) + while (max <= 0 || count < max) { - pthread_mutex_lock (&reslock); - req = ress; + LOCK (reslock); + req = reqq_shift (&res_queue); if (req) { - ress = req->next; - - if (!ress) + if (!res_queue.size) { /* read any signals sent by the worker threads */ char buf [32]; while (read (respipe [0], buf, 32) == 32) ; - - rese = 0; } } - pthread_mutex_unlock (&reslock); + UNLOCK (reslock); if (!req) break; @@ -457,20 +566,35 @@ static void start_thread (void) { sigset_t fullsigset, oldsigset; - pthread_t tid; pthread_attr_t attr; + worker *wrk = calloc (1, sizeof (worker)); + + if (!wrk) + croak ("unable to allocate worker thread data"); + pthread_attr_init (&attr); pthread_attr_setstacksize (&attr, STACKSIZE); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); sigfillset (&fullsigset); + + LOCK (wrklock); sigprocmask (SIG_SETMASK, &fullsigset, &oldsigset); - if (pthread_create (&tid, &attr, aio_proc, 0) == 0) - started++; + if (pthread_create (&wrk->tid, &attr, aio_proc, (void *)wrk) == 0) + { + wrk->prev = &wrk_first; + wrk->next = wrk_first.next; + wrk_first.next->prev = wrk; + wrk_first.next = wrk; + started++; + } + else + free (wrk); sigprocmask (SIG_SETMASK, &oldsigset, 0); + UNLOCK (wrklock); } static void req_send (aio_req req) @@ -480,38 +604,20 @@ ++nreqs; - pthread_mutex_lock (&reqlock); - - req->next = 0; - - if (reqe) - { - reqe->next = req; - reqe = req; - } - else - reqe = reqs = req; - + LOCK (reqlock); + reqq_push (&req_queue, req); pthread_cond_signal (&reqwait); - pthread_mutex_unlock (&reqlock); - - if (nreqs > max_outstanding) - for (;;) - { - poll_cb (); - - if (nreqs <= max_outstanding) - break; - - poll_wait (); - } + UNLOCK (reqlock); } static void end_thread (void) { aio_req req; + Newz (0, req, 1, aio_cb); + req->type = REQ_QUIT; + req->pri = PRI_MAX + PRI_BIAS; req_send (req); } @@ -538,7 +644,7 @@ while (started > wanted) { poll_wait (); - poll_cb (); + poll_cb (0); } } @@ -573,12 +679,12 @@ ssize_t res; off_t ooffset; - pthread_mutex_lock (&preadwritelock); + LOCK (preadwritelock); ooffset = lseek (fd, 0, SEEK_CUR); lseek (fd, offset, SEEK_SET); res = read (fd, buf, count); lseek (fd, ooffset, SEEK_SET); - pthread_mutex_unlock (&preadwritelock); + UNLOCK (preadwritelock); return res; } @@ -588,12 +694,12 @@ ssize_t res; off_t ooffset; - pthread_mutex_lock (&preadwritelock); + LOCK (preadwritelock); ooffset = lseek (fd, 0, SEEK_CUR); lseek (fd, offset, SEEK_SET); res = write (fd, buf, count); lseek (fd, offset, SEEK_SET); - pthread_mutex_unlock (&preadwritelock); + UNLOCK (preadwritelock); return res; } @@ -604,9 +710,9 @@ #endif #if !HAVE_READAHEAD -# define readahead aio_readahead +# define readahead(fd,offset,count) aio_readahead (fd, offset, count, self) -static ssize_t readahead (int fd, off_t offset, size_t count) +static ssize_t aio_readahead (int fd, off_t offset, size_t count, worker *self) { dBUF; @@ -619,10 +725,9 @@ count -= len; } - fBUF; - errno = 0; } + #endif #if !HAVE_READDIR_R @@ -635,7 +740,7 @@ struct dirent *e; int errorno; - pthread_mutex_lock (&readdirlock); + LOCK (readdirlock); e = readdir (dirp); errorno = errno; @@ -648,7 +753,7 @@ else *res = 0; - pthread_mutex_unlock (&readdirlock); + UNLOCK (readdirlock); errno = errorno; return e ? 0 : -1; @@ -656,7 +761,7 @@ #endif /* sendfile always needs emulation */ -static ssize_t sendfile_ (int ofd, int ifd, off_t offset, size_t count) +static ssize_t sendfile_ (int ofd, int ifd, off_t offset, size_t count, worker *self) { ssize_t res; @@ -743,15 +848,13 @@ res += cnt; count -= cnt; } - - fBUF; } return res; } /* read a full directory */ -static int scandir_ (const char *path, void **namesp) +static void scandir_ (aio_req req, worker *self) { DIR *dirp; union @@ -766,14 +869,13 @@ int res = 0; int errorno; - dirp = opendir (path); - if (!dirp) - return -1; - - u = malloc (sizeof (*u)); - names = malloc (memlen); + LOCK (wrklock); + self->dirp = dirp = opendir (req->dataptr); + self->dbuf = u = malloc (sizeof (*u)); + req->data2ptr = names = malloc (memlen); + UNLOCK (wrklock); - if (u && names) + if (dirp && u && names) for (;;) { errno = 0; @@ -793,7 +895,10 @@ while (memofs + len > memlen) { memlen *= 2; - names = realloc (names, memlen); + LOCK (wrklock); + req->data2ptr = names = realloc (names, memlen); + UNLOCK (wrklock); + if (!names) break; } @@ -803,19 +908,10 @@ } } - errorno = errno; - free (u); - closedir (dirp); - - if (errorno) - { - free (names); - errno = errorno; - res = -1; - } - - *namesp = (void *)names; - return res; + if (errno) + res = -1; + + req->result = res; } /*****************************************************************************/ @@ -824,20 +920,15 @@ { aio_req req; int type; + worker *self = (worker *)thr_arg; do { - pthread_mutex_lock (&reqlock); + LOCK (reqlock); for (;;) { - req = reqs; - - if (reqs) - { - reqs = reqs->next; - if (!reqs) reqe = 0; - } + self->req = req = reqq_shift (&req_queue); if (req) break; @@ -845,7 +936,7 @@ pthread_cond_wait (&reqwait, &reqlock); } - pthread_mutex_unlock (&reqlock); + UNLOCK (reqlock); errno = 0; /* strictly unnecessary */ type = req->type; /* remember type for QUIT check */ @@ -857,7 +948,7 @@ case REQ_WRITE: req->result = pwrite (req->fd, req->dataptr, req->length, req->offset); break; case REQ_READAHEAD: req->result = readahead (req->fd, req->offset, req->length); break; - case REQ_SENDFILE: req->result = sendfile_ (req->fd, req->fd2, req->offset, req->length); break; + case REQ_SENDFILE: req->result = sendfile_ (req->fd, req->fd2, req->offset, req->length, self); break; case REQ_STAT: req->result = stat (req->dataptr, req->statdata); break; case REQ_LSTAT: req->result = lstat (req->dataptr, req->statdata); break; @@ -873,9 +964,9 @@ case REQ_FDATASYNC: req->result = fdatasync (req->fd); break; case REQ_FSYNC: req->result = fsync (req->fd); break; - case REQ_READDIR: req->result = scandir_ (req->dataptr, &req->data2ptr); break; + case REQ_READDIR: scandir_ (req, self); break; - case REQ_SLEEP: + case REQ_BUSY: { struct timeval tv; @@ -897,27 +988,23 @@ req->errorno = errno; - pthread_mutex_lock (&reslock); + LOCK (reslock); - req->next = 0; + if (!reqq_push (&res_queue, req)) + /* write a dummy byte to the pipe so fh becomes ready */ + write (respipe [1], &respipe, 1); - if (rese) - { - rese->next = req; - rese = req; - } - else - { - rese = ress = req; - - /* write a dummy byte to the pipe so fh becomes ready */ - write (respipe [1], &respipe, 1); - } + self->req = 0; + worker_clear (self); - pthread_mutex_unlock (&reslock); + UNLOCK (reslock); } while (type != REQ_QUIT); + LOCK (wrklock); + worker_free (self); + UNLOCK (wrklock); + return 0; } @@ -925,51 +1012,53 @@ static void atfork_prepare (void) { - pthread_mutex_lock (&reqlock); - pthread_mutex_lock (&reslock); + LOCK (wrklock); + LOCK (reqlock); + LOCK (reslock); #if !HAVE_PREADWRITE - pthread_mutex_lock (&preadwritelock); + LOCK (preadwritelock); #endif #if !HAVE_READDIR_R - pthread_mutex_lock (&readdirlock); + LOCK (readdirlock); #endif } static void atfork_parent (void) { #if !HAVE_READDIR_R - pthread_mutex_unlock (&readdirlock); + UNLOCK (readdirlock); #endif #if !HAVE_PREADWRITE - pthread_mutex_unlock (&preadwritelock); + UNLOCK (preadwritelock); #endif - pthread_mutex_unlock (&reslock); - pthread_mutex_unlock (&reqlock); + UNLOCK (reslock); + UNLOCK (reqlock); + UNLOCK (wrklock); } static void atfork_child (void) { aio_req prv; - started = 0; + while (prv = reqq_shift (&req_queue)) + req_free (prv); - while (reqs) - { - prv = reqs; - reqs = prv->next; - req_free (prv); - } + while (prv = reqq_shift (&res_queue)) + req_free (prv); - reqs = reqe = 0; - - while (ress) + while (wrk_first.next != &wrk_first) { - prv = ress; - ress = prv->next; - req_free (prv); + worker *wrk = wrk_first.next; + + if (wrk->req) + req_free (wrk->req); + + worker_clear (wrk); + worker_free (wrk); } - - ress = rese = 0; + + started = 0; + nreqs = 0; close (respipe [0]); close (respipe [1]); @@ -1024,14 +1113,6 @@ int nthreads PROTOTYPE: $ -int -max_outstanding (nreqs) - int nreqs - PROTOTYPE: $ - CODE: - RETVAL = max_outstanding; - max_outstanding = nreqs; - void aio_open (pathname,flags,mode,callback=&PL_sv_undef) SV * pathname @@ -1255,14 +1336,14 @@ } void -aio_sleep (delay,callback=&PL_sv_undef) +aio_busy (delay,callback=&PL_sv_undef) double delay SV * callback PPCODE: { dREQ; - req->type = REQ_SLEEP; + req->type = REQ_BUSY; req->fd = delay < 0. ? 0 : delay; req->fd2 = delay < 0. ? 0 : 1000. * (delay - req->fd); @@ -1295,16 +1376,20 @@ REQ_SEND; } -#if 0 - void -aio_pri (int pri = DEFAULT_PRI) - CODE: - if (pri < PRI_MIN) pri = PRI_MIN; - if (pri > PRI_MAX) pri = PRI_MAX; - next_pri = pri + PRI_BIAS; +aioreq_pri (int pri = DEFAULT_PRI) + CODE: + if (pri < PRI_MIN) pri = PRI_MIN; + if (pri > PRI_MAX) pri = PRI_MAX; + next_pri = pri + PRI_BIAS; -#endif +void +aioreq_nice (int nice = 0) + CODE: + nice = next_pri - nice; + if (nice < PRI_MIN) nice = PRI_MIN; + if (nice > PRI_MAX) nice = PRI_MAX; + next_pri = nice + PRI_BIAS; void flush () @@ -1313,7 +1398,7 @@ while (nreqs) { poll_wait (); - poll_cb (); + poll_cb (0); } void @@ -1323,7 +1408,7 @@ if (nreqs) { poll_wait (); - poll_cb (); + poll_cb (0); } int @@ -1338,7 +1423,15 @@ poll_cb(...) PROTOTYPE: CODE: - RETVAL = poll_cb (); + RETVAL = poll_cb (0); + OUTPUT: + RETVAL + +int +poll_some(int max = 0) + PROTOTYPE: $ + CODE: + RETVAL = poll_cb (max); OUTPUT: RETVAL @@ -1363,7 +1456,6 @@ void cancel (aio_req_ornot req) - PROTOTYPE: CODE: req_cancel (req); @@ -1409,6 +1501,11 @@ } void +cancel_subs (aio_req_ornot req) + CODE: + req_cancel_subs (req); + +void result (aio_req grp, ...) CODE: { @@ -1423,7 +1520,7 @@ } void -feed_limit (aio_req grp, int limit) +limit (aio_req grp, int limit) CODE: grp->fd2 = limit; aio_grp_feed (grp);