--- IO-AIO/AIO.xs 2006/10/23 18:38:15 1.57 +++ IO-AIO/AIO.xs 2006/10/24 03:17:39 1.67 @@ -1,4 +1,9 @@ +#if __linux +# define _GNU_SOURCE +#endif + #define _REENTRANT 1 + #include #include "EXTERN.h" @@ -43,10 +48,22 @@ #if __ia64 # define STACKSIZE 65536 +#elif __i386 || __x86_64 /* 16k is unreasonably high :( */ +# define STACKSIZE PTHREAD_STACK_MIN #else -# define STACKSIZE 8192 +# define STACKSIZE 16384 #endif +/* buffer size for various temporary buffers */ +#define AIO_BUFSIZE 65536 + +#define dBUF \ + char *aio_buf = malloc (AIO_BUFSIZE); \ + if (!aio_buf) \ + return -1; + +#define fBUF free (aio_buf) + enum { REQ_QUIT, REQ_OPEN, REQ_CLOSE, @@ -68,10 +85,6 @@ { struct aio_cb *volatile next; - struct aio_cb *grp, *grp_prev, *grp_next, *grp_first; - - SV *self; /* the perl counterpart of this request, if any */ - SV *data, *callback; SV *fh, *fh2; void *dataptr, *data2ptr; @@ -80,28 +93,105 @@ size_t length; ssize_t result; + STRLEN dataoffset; int type; int fd, fd2; int errorno; - STRLEN dataoffset; mode_t mode; /* open */ - unsigned char cancelled; + + unsigned char flags; + unsigned char pri; + + SV *self; /* the perl counterpart of this request, if any */ + struct aio_cb *grp, *grp_prev, *grp_next, *grp_first; } aio_cb; +enum { + FLAG_CANCELLED = 0x01, +}; + typedef aio_cb *aio_req; typedef aio_cb *aio_req_ornot; +enum { + PRI_MIN = -4, + PRI_MAX = 4, + + DEFAULT_PRI = 0, + PRI_BIAS = -PRI_MIN, + NUM_PRI = PRI_MAX + PRI_BIAS + 1, +}; + +static int next_pri = DEFAULT_PRI + PRI_BIAS; + static int started, wanted; static volatile int nreqs; static int max_outstanding = 1<<30; static int respipe [2]; -static pthread_mutex_t reslock = PTHREAD_MUTEX_INITIALIZER; -static pthread_mutex_t reqlock = PTHREAD_MUTEX_INITIALIZER; +#if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP) +# define AIO_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#else +# define AIO_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER +#endif + +static pthread_mutex_t reslock = AIO_MUTEX_INIT; +static pthread_mutex_t reqlock = AIO_MUTEX_INIT; static pthread_cond_t reqwait = PTHREAD_COND_INITIALIZER; -static volatile aio_req reqs, reqe; /* queue start, queue end */ -static volatile aio_req ress, rese; /* queue start, queue end */ +/* + * a somewhat faster data structure might be nice, but + * with 8 priorities this actually needs <20 insns + * per shift, the most expensive operation. + */ +typedef struct { + aio_req qs[NUM_PRI], qe[NUM_PRI]; /* qstart, qend */ + int size; +} reqq; + +static reqq req_queue; +static reqq res_queue; + +int reqq_push (reqq *q, aio_req req) +{ + int pri = req->pri; + req->next = 0; + + if (q->qe[pri]) + { + q->qe[pri]->next = req; + q->qe[pri] = req; + } + else + q->qe[pri] = q->qs[pri] = req; + + return q->size++; +} + +aio_req reqq_shift (reqq *q) +{ + int pri; + + if (!q->size) + return 0; + + --q->size; + + for (pri = NUM_PRI; pri--; ) + { + aio_req req = q->qs[pri]; + + if (req) + { + if (!(q->qs[pri] = req->next)) + q->qe[pri] = 0; + + return req; + } + } + + abort (); +} static void req_invoke (aio_req req); static void req_free (aio_req req); @@ -132,7 +222,7 @@ static void aio_grp_feed (aio_req grp) { - while (grp->length < grp->fd2 && !grp->cancelled) + while (grp->length < grp->fd2 && !(grp->flags & FLAG_CANCELLED)) { int old_len = grp->length; @@ -145,7 +235,7 @@ PUSHMARK (SP); XPUSHs (req_sv (grp, AIO_GRP_KLASS)); PUTBACK; - call_sv (grp->fh2, G_VOID | G_EVAL); + call_sv (grp->fh2, G_VOID | G_EVAL | G_KEEPERR); SPAGAIN; FREETMPS; LEAVE; @@ -178,9 +268,22 @@ static void poll_wait () { - if (nreqs && !ress) + fd_set rfd; + + while (nreqs) { - fd_set rfd; + int size; +#if !(__i386 || __x86_64) /* safe without sempahore on this archs */ + pthread_mutex_lock (&reslock); +#endif + size = res_queue.size; +#if !(__i386 || __x86_64) /* safe without sempahore on this archs */ + pthread_mutex_unlock (&reslock); +#endif + + if (size) + return; + FD_ZERO(&rfd); FD_SET(respipe [0], &rfd); @@ -191,104 +294,92 @@ static void req_invoke (aio_req req) { dSP; - int errorno = errno; - - if (req->cancelled || !SvOK (req->callback)) - return; - errno = req->errorno; + if (!(req->flags & FLAG_CANCELLED) && SvOK (req->callback)) + { + errno = req->errorno; - ENTER; - SAVETMPS; - PUSHMARK (SP); - EXTEND (SP, 1); + ENTER; + SAVETMPS; + PUSHMARK (SP); + EXTEND (SP, 1); - switch (req->type) - { - case REQ_READDIR: + switch (req->type) { - SV *rv = &PL_sv_undef; - - if (req->result >= 0) + case REQ_READDIR: { - char *buf = req->data2ptr; - AV *av = newAV (); + SV *rv = &PL_sv_undef; - while (req->result) + if (req->result >= 0) { - SV *sv = newSVpv (buf, 0); + char *buf = req->data2ptr; + AV *av = newAV (); - av_push (av, sv); - buf += SvCUR (sv) + 1; - req->result--; + while (req->result) + { + SV *sv = newSVpv (buf, 0); + + av_push (av, sv); + buf += SvCUR (sv) + 1; + req->result--; + } + + rv = sv_2mortal (newRV_noinc ((SV *)av)); } - rv = sv_2mortal (newRV_noinc ((SV *)av)); + PUSHs (rv); } + break; - PUSHs (rv); - } - break; - - case REQ_OPEN: - { - /* convert fd to fh */ - SV *fh; - - PUSHs (sv_2mortal (newSViv (req->result))); - PUTBACK; - call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL); - SPAGAIN; - - fh = SvREFCNT_inc (POPs); + case REQ_OPEN: + { + /* convert fd to fh */ + SV *fh; - PUSHMARK (SP); - XPUSHs (sv_2mortal (fh)); - } - break; + PUSHs (sv_2mortal (newSViv (req->result))); + PUTBACK; + call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL); + SPAGAIN; - case REQ_GROUP: - req->fd = 2; /* mark group as finished */ + fh = SvREFCNT_inc (POPs); - if (req->data) - { - int i; - AV *av = (AV *)req->data; + PUSHMARK (SP); + XPUSHs (sv_2mortal (fh)); + } + break; - EXTEND (SP, AvFILL (av) + 1); - for (i = 0; i <= AvFILL (av); ++i) - PUSHs (*av_fetch (av, i, 0)); - } - break; + case REQ_GROUP: + req->fd = 2; /* mark group as finished */ - case REQ_NOP: - case REQ_SLEEP: - break; + if (req->data) + { + int i; + AV *av = (AV *)req->data; - default: - PUSHs (sv_2mortal (newSViv (req->result))); - break; - } + EXTEND (SP, AvFILL (av) + 1); + for (i = 0; i <= AvFILL (av); ++i) + PUSHs (*av_fetch (av, i, 0)); + } + break; + case REQ_NOP: + case REQ_SLEEP: + break; - PUTBACK; - call_sv (req->callback, G_VOID | G_EVAL); - SPAGAIN; + default: + PUSHs (sv_2mortal (newSViv (req->result))); + break; + } - FREETMPS; - LEAVE; - errno = errorno; + PUTBACK; + call_sv (req->callback, G_VOID | G_EVAL); + SPAGAIN; - if (SvTRUE (ERRSV)) - { - req_free (req); - croak (0); + FREETMPS; + LEAVE; } -} -static void req_free (aio_req req) -{ if (req->grp) { aio_req grp = req->grp; @@ -303,6 +394,15 @@ aio_grp_dec (grp); } + if (SvTRUE (ERRSV)) + { + req_free (req); + croak (0); + } +} + +static void req_free (aio_req req) +{ if (req->self) { sv_unmagic (req->self, PERL_MAGIC_ext); @@ -323,7 +423,7 @@ static void req_cancel (aio_req req) { - req->cancelled = 1; + req->flags |= FLAG_CANCELLED; if (req->type == REQ_GROUP) { @@ -344,20 +444,16 @@ for (;;) { pthread_mutex_lock (&reslock); - req = ress; + req = reqq_shift (&res_queue); if (req) { - ress = req->next; - - if (!ress) + if (!res_queue.size) { /* read any signals sent by the worker threads */ char buf [32]; while (read (respipe [0], buf, 32) == 32) ; - - rese = 0; } } @@ -430,17 +526,7 @@ ++nreqs; pthread_mutex_lock (&reqlock); - - req->next = 0; - - if (reqe) - { - reqe->next = req; - reqe = req; - } - else - reqe = reqs = req; - + reqq_push (&req_queue, req); pthread_cond_signal (&reqwait); pthread_mutex_unlock (&reqlock); @@ -459,8 +545,11 @@ static void end_thread (void) { aio_req req; + Newz (0, req, 1, aio_cb); + req->type = REQ_QUIT; + req->pri = PRI_MAX + PRI_BIAS; req_send (req); } @@ -557,17 +646,19 @@ static ssize_t readahead (int fd, off_t offset, size_t count) { - char readahead_buf[4096]; + dBUF; while (count > 0) { - size_t len = count < sizeof (readahead_buf) ? count : sizeof (readahead_buf); + size_t len = count < AIO_BUFSIZE ? count : AIO_BUFSIZE; - pread (fd, readahead_buf, len, offset); + pread (fd, aio_buf, len, offset); offset += len; count -= len; } + fBUF; + errno = 0; } #endif @@ -662,14 +753,15 @@ ) { /* emulate sendfile. this is a major pain in the ass */ - char buf[4096]; + dBUF; + res = 0; while (count) { ssize_t cnt; - cnt = pread (ifd, buf, count > 4096 ? 4096 : count, offset); + cnt = pread (ifd, aio_buf, count > AIO_BUFSIZE ? AIO_BUFSIZE : count, offset); if (cnt <= 0) { @@ -677,7 +769,7 @@ break; } - cnt = write (ofd, buf, cnt); + cnt = write (ofd, aio_buf, cnt); if (cnt <= 0) { @@ -689,6 +781,8 @@ res += cnt; count -= cnt; } + + fBUF; } return res; @@ -697,12 +791,12 @@ /* read a full directory */ static int scandir_ (const char *path, void **namesp) { - DIR *dirp = opendir (path); + DIR *dirp; union { struct dirent d; char b [offsetof (struct dirent, d_name) + NAME_MAX + 1]; - } u; + } *u; struct dirent *entp; char *name, *names; int memlen = 4096; @@ -710,40 +804,45 @@ int res = 0; int errorno; + dirp = opendir (path); if (!dirp) return -1; + u = malloc (sizeof (*u)); names = malloc (memlen); - for (;;) - { - errno = 0, readdir_r (dirp, &u.d, &entp); + if (u && names) + for (;;) + { + errno = 0; + readdir_r (dirp, &u->d, &entp); - if (!entp) - break; + if (!entp) + break; - name = entp->d_name; + name = entp->d_name; - if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2]))) - { - int len = strlen (name) + 1; + if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2]))) + { + int len = strlen (name) + 1; - res++; + res++; - while (memofs + len > memlen) - { - memlen *= 2; - names = realloc (names, memlen); - if (!names) - break; - } + while (memofs + len > memlen) + { + memlen *= 2; + names = realloc (names, memlen); + if (!names) + break; + } - memcpy (names + memofs, name, len); - memofs += len; - } - } + memcpy (names + memofs, name, len); + memofs += len; + } + } errorno = errno; + free (u); closedir (dirp); if (errorno) @@ -770,13 +869,7 @@ for (;;) { - req = reqs; - - if (reqs) - { - reqs = reqs->next; - if (!reqs) reqe = 0; - } + req = reqq_shift (&req_queue); if (req) break; @@ -787,9 +880,10 @@ pthread_mutex_unlock (&reqlock); errno = 0; /* strictly unnecessary */ + type = req->type; /* remember type for QUIT check */ - if (!req->cancelled) - switch (type = req->type) /* remember type for QUIT check */ + if (!(req->flags & FLAG_CANCELLED)) + switch (type) { case REQ_READ: req->result = pread (req->fd, req->dataptr, req->length, req->offset); break; case REQ_WRITE: req->result = pwrite (req->fd, req->dataptr, req->length, req->offset); break; @@ -837,20 +931,9 @@ pthread_mutex_lock (&reslock); - req->next = 0; - - if (rese) - { - rese->next = req; - rese = req; - } - else - { - rese = ress = req; - - /* write a dummy byte to the pipe so fh becomes ready */ - write (respipe [1], &respipe, 1); - } + if (!reqq_push (&res_queue, req)) + /* write a dummy byte to the pipe so fh becomes ready */ + write (respipe [1], &respipe, 1); pthread_mutex_unlock (&reslock); } @@ -891,24 +974,12 @@ started = 0; - while (reqs) - { - prv = reqs; - reqs = prv->next; - req_free (prv); - } - - reqs = reqe = 0; - - while (ress) - { - prv = ress; - ress = prv->next; - req_free (prv); - } - - ress = rese = 0; + while (prv = reqq_shift (&req_queue)) + req_free (prv); + while (prv = reqq_shift (&res_queue)) + req_free (prv); + close (respipe [0]); close (respipe [1]); create_pipe (); @@ -918,6 +989,8 @@ #define dREQ \ aio_req req; \ + int req_pri = next_pri; \ + next_pri = DEFAULT_PRI + PRI_BIAS; \ \ if (SvOK (callback) && !SvROK (callback)) \ croak ("callback must be undef or of reference type"); \ @@ -926,7 +999,8 @@ if (!req) \ croak ("out of memory during aio_req allocation"); \ \ - req->callback = newSVsv (callback) + req->callback = newSVsv (callback); \ + req->pri = req_pri #define REQ_SEND \ req_send (req); \ @@ -1211,8 +1285,10 @@ PPCODE: { dREQ; + req->type = REQ_GROUP; req_send (req); + XPUSHs (req_sv (req, AIO_GRP_KLASS)); } @@ -1229,6 +1305,13 @@ } void +aioreq_pri (int pri = DEFAULT_PRI) + CODE: + if (pri < PRI_MIN) pri = PRI_MIN; + if (pri > PRI_MAX) pri = PRI_MAX; + next_pri = pri + PRI_BIAS; + +void flush () PROTOTYPE: CODE: @@ -1285,12 +1368,11 @@ void cancel (aio_req_ornot req) - PROTOTYPE: CODE: req_cancel (req); void -cb (aio_req req, SV *callback=&PL_sv_undef) +cb (aio_req_ornot req, SV *callback=&PL_sv_undef) CODE: SvREFCNT_dec (req->callback); req->callback = newSVsv (callback); @@ -1345,7 +1427,7 @@ } void -feed_limit (aio_req grp, int limit) +limit (aio_req grp, int limit) CODE: grp->fd2 = limit; aio_grp_feed (grp);