--- IO-AIO/AIO.xs 2005/08/17 06:12:10 1.29 +++ IO-AIO/AIO.xs 2006/06/24 19:14:04 1.41 @@ -7,34 +7,54 @@ #include "autoconf/config.h" +#include + +#include +#include #include #include - +#include #include #include #include #include -#include +#if HAVE_SENDFILE +# if __linux +# include +# elif __freebsd +# include +# include +# elif __hpux +# include +# elif __solaris /* not yet */ +# include +# else +# error sendfile support requested but not available +# endif +#endif -typedef void *InputStream; /* hack, but 5.6.1 is simply toooo old ;) */ -typedef void *OutputStream; /* hack, but 5.6.1 is simply toooo old ;) */ -typedef void *InOutStream; /* hack, but 5.6.1 is simply toooo old ;) */ +/* used for struct dirent, AIX doesn't provide it */ +#ifndef NAME_MAX +# define NAME_MAX 4096 +#endif #if __ia64 # define STACKSIZE 65536 #else -# define STACKSIZE 4096 +# define STACKSIZE 8192 #endif enum { REQ_QUIT, REQ_OPEN, REQ_CLOSE, REQ_READ, REQ_WRITE, REQ_READAHEAD, + REQ_SENDFILE, REQ_STAT, REQ_LSTAT, REQ_FSTAT, REQ_FSYNC, REQ_FDATASYNC, - REQ_UNLINK, REQ_RMDIR, - REQ_SYMLINK, + REQ_UNLINK, REQ_RMDIR, REQ_RENAME, + REQ_READDIR, + REQ_LINK, REQ_SYMLINK, }; typedef struct aio_cb { @@ -42,13 +62,15 @@ int type; - int fd; + /* should receive a cleanup, with unions */ + int fd, fd2; off_t offset; size_t length; ssize_t result; mode_t mode; /* open */ int errorno; - SV *data, *callback, *fh; + SV *data, *callback; + SV *fh, *fh2; void *dataptr, *data2ptr; STRLEN dataoffset; @@ -57,7 +79,7 @@ typedef aio_cb *aio_req; -static int started; +static int started, wanted; static volatile int nreqs; static int max_outstanding = 1<<30; static int respipe [2]; @@ -77,12 +99,18 @@ if (req->fh) SvREFCNT_dec (req->fh); + if (req->fh2) + SvREFCNT_dec (req->fh2); + if (req->statdata) Safefree (req->statdata); if (req->callback) SvREFCNT_dec (req->callback); + if (req->type == REQ_READDIR && req->result >= 0) + free (req->data2ptr); + Safefree (req); } @@ -118,12 +146,12 @@ if (!ress) { - rese = 0; - /* read any signals sent by the worker threads */ char buf [32]; while (read (respipe [0], buf, 32) == 32) ; + + rese = 0; } } @@ -156,21 +184,48 @@ ENTER; PUSHMARK (SP); - XPUSHs (sv_2mortal (newSViv (req->result))); - if (req->type == REQ_OPEN) + if (req->type == REQ_READDIR) { - /* convert fd to fh */ - SV *fh; + SV *rv = &PL_sv_undef; - PUTBACK; - call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL); - SPAGAIN; + if (req->result >= 0) + { + char *buf = req->data2ptr; + AV *av = newAV (); + + while (req->result) + { + SV *sv = newSVpv (buf, 0); + + av_push (av, sv); + buf += SvCUR (sv) + 1; + req->result--; + } - fh = SvREFCNT_inc (POPs); + rv = sv_2mortal (newRV_noinc ((SV *)av)); + } - PUSHMARK (SP); - XPUSHs (sv_2mortal (fh)); + XPUSHs (rv); + } + else + { + XPUSHs (sv_2mortal (newSViv (req->result))); + + if (req->type == REQ_OPEN) + { + /* convert fd to fh */ + SV *fh; + + PUTBACK; + call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL); + SPAGAIN; + + fh = SvREFCNT_inc (POPs); + + PUSHMARK (SP); + XPUSHs (sv_2mortal (fh)); + } } if (SvOK (req->callback)) @@ -223,6 +278,9 @@ static void send_req (aio_req req) { + while (started < wanted && nreqs >= started) + start_thread (); + nreqs++; pthread_mutex_lock (&reqlock); @@ -264,21 +322,24 @@ static void min_parallel (int nthreads) { - while (nthreads > started) - start_thread (); + if (wanted < nthreads) + wanted = nthreads; } static void max_parallel (int nthreads) { int cur = started; - while (cur > nthreads) - { + if (wanted > nthreads) + wanted = nthreads; + + while (cur > wanted) + { end_thread (); cur--; } - while (started > nthreads) + while (started > wanted) { poll_wait (); poll_cb (); @@ -297,52 +358,6 @@ croak ("cannot set result pipe to nonblocking mode"); } -static void atfork_prepare (void) -{ - pthread_mutex_lock (&reqlock); - pthread_mutex_lock (&reslock); -} - -static void atfork_parent (void) -{ - pthread_mutex_unlock (&reslock); - pthread_mutex_unlock (&reqlock); -} - -static void atfork_child (void) -{ - aio_req prv; - - int restart = started; - started = 0; - - while (reqs) - { - prv = reqs; - reqs = prv->next; - free_req (prv); - } - - reqs = reqe = 0; - - while (ress) - { - prv = ress; - ress = prv->next; - free_req (prv); - } - - ress = rese = 0; - - close (respipe [0]); - close (respipe [1]); - create_pipe (); - - atfork_parent (); - - min_parallel (restart); -} - /*****************************************************************************/ /* work around various missing functions */ @@ -355,7 +370,7 @@ * normal read/write by using a mutex. slows down execution a lot, * but that's your problem, not mine. */ -static pthread_mutex_t iolock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t preadwritelock = PTHREAD_MUTEX_INITIALIZER; static ssize_t pread (int fd, void *buf, size_t count, off_t offset) @@ -363,12 +378,12 @@ ssize_t res; off_t ooffset; - pthread_mutex_lock (&iolock); + pthread_mutex_lock (&preadwritelock); ooffset = lseek (fd, 0, SEEK_CUR); lseek (fd, offset, SEEK_SET); res = read (fd, buf, count); lseek (fd, ooffset, SEEK_SET); - pthread_mutex_unlock (&iolock); + pthread_mutex_unlock (&preadwritelock); return res; } @@ -379,12 +394,12 @@ ssize_t res; off_t ooffset; - pthread_mutex_lock (&iolock); + pthread_mutex_lock (&preadwritelock); ooffset = lseek (fd, 0, SEEK_CUR); lseek (fd, offset, SEEK_SET); res = write (fd, buf, count); lseek (fd, offset, SEEK_SET); - pthread_mutex_unlock (&iolock); + pthread_mutex_unlock (&preadwritelock); return res; } @@ -397,11 +412,11 @@ #if !HAVE_READAHEAD # define readahead aio_readahead -static char readahead_buf[4096]; - static ssize_t readahead (int fd, off_t offset, size_t count) { + char readahead_buf[4096]; + while (count > 0) { size_t len = count < sizeof (readahead_buf) ? count : sizeof (readahead_buf); @@ -415,6 +430,194 @@ } #endif +#if !HAVE_READDIR_R +# define readdir_r aio_readdir_r + +static pthread_mutex_t readdirlock = PTHREAD_MUTEX_INITIALIZER; + +static int +readdir_r (DIR *dirp, struct dirent *ent, struct dirent **res) +{ + struct dirent *e; + int errorno; + + pthread_mutex_lock (&readdirlock); + + e = readdir (dirp); + errorno = errno; + + if (e) + { + *res = ent; + strcpy (ent->d_name, e->d_name); + } + else + *res = 0; + + pthread_mutex_unlock (&readdirlock); + + errno = errorno; + return e ? 0 : -1; +} +#endif + +/* sendfile always needs emulation */ +static ssize_t +sendfile_ (int ofd, int ifd, off_t offset, size_t count) +{ + ssize_t res; + + if (!count) + return 0; + +#if HAVE_SENDFILE +# if __linux + res = sendfile (ofd, ifd, &offset, count); + +# elif __freebsd + /* + * Of course, the freebsd sendfile is a dire hack with no thoughts + * wasted on making it similar to other I/O functions. + */ + { + off_t sbytes; + res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); + + if (res < 0 && sbytes) + /* maybe only on EAGAIN only: as usual, the manpage leaves you guessing */ + res = sbytes; + } + +# elif __hpux + res = sendfile (ofd, ifd, offset, count, 0, 0); + +# elif __solaris + { + struct sendfilevec vec; + size_t sbytes; + + vec.sfv_fd = ifd; + vec.sfv_flag = 0; + vec.sfv_off = offset; + vec.sfv_len = count; + + res = sendfilev (ofd, &vec, 1, &sbytes); + + if (res < 0 && sbytes) + res = sbytes; + } + +# endif +#else + res = -1; + errno = ENOSYS; +#endif + + if (res < 0 + && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK +#if __solaris + || errno == EAFNOSUPPORT || errno == EPROTOTYPE +#endif + ) + ) + { + /* emulate sendfile. this is a major pain in the ass */ + char buf[4096]; + res = 0; + + while (count) + { + ssize_t cnt; + + cnt = pread (ifd, buf, count > 4096 ? 4096 : count, offset); + + if (cnt <= 0) + { + if (cnt && !res) res = -1; + break; + } + + cnt = write (ofd, buf, cnt); + + if (cnt <= 0) + { + if (cnt && !res) res = -1; + break; + } + + offset += cnt; + res += cnt; + count -= cnt; + } + } + + return res; +} + +/* read a full directory */ +static int +scandir_ (const char *path, void **namesp) +{ + DIR *dirp = opendir (path); + union + { + struct dirent d; + char b [offsetof (struct dirent, d_name) + NAME_MAX + 1]; + } u; + struct dirent *entp; + char *name, *names; + int memlen = 4096; + int memofs = 0; + int res = 0; + int errorno; + + if (!dirp) + return -1; + + names = malloc (memlen); + + for (;;) + { + errno = 0, readdir_r (dirp, &u.d, &entp); + + if (!entp) + break; + + name = entp->d_name; + + if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2]))) + { + int len = strlen (name) + 1; + + res++; + + while (memofs + len > memlen) + { + memlen *= 2; + names = realloc (names, memlen); + if (!names) + break; + } + + memcpy (names + memofs, name, len); + memofs += len; + } + } + + errorno = errno; + closedir (dirp); + + if (errorno) + { + free (names); + errno = errorno; + res = -1; + } + + *namesp = (void *)names; + return res; +} + /*****************************************************************************/ static void * @@ -455,6 +658,7 @@ case REQ_WRITE: req->result = pwrite (req->fd, req->dataptr, req->length, req->offset); break; case REQ_READAHEAD: req->result = readahead (req->fd, req->offset, req->length); break; + case REQ_SENDFILE: req->result = sendfile_ (req->fd, req->fd2, req->offset, req->length); break; case REQ_STAT: req->result = stat (req->dataptr, req->statdata); break; case REQ_LSTAT: req->result = lstat (req->dataptr, req->statdata); break; @@ -464,10 +668,13 @@ case REQ_CLOSE: req->result = close (req->fd); break; case REQ_UNLINK: req->result = unlink (req->dataptr); break; case REQ_RMDIR: req->result = rmdir (req->dataptr); break; + case REQ_RENAME: req->result = rename (req->data2ptr, req->dataptr); break; + case REQ_LINK: req->result = link (req->data2ptr, req->dataptr); break; case REQ_SYMLINK: req->result = symlink (req->data2ptr, req->dataptr); break; case REQ_FDATASYNC: req->result = fdatasync (req->fd); break; case REQ_FSYNC: req->result = fsync (req->fd); break; + case REQ_READDIR: req->result = scandir_ (req->dataptr, &req->data2ptr); break; case REQ_QUIT: break; @@ -503,6 +710,63 @@ return 0; } +/*****************************************************************************/ + +static void atfork_prepare (void) +{ + pthread_mutex_lock (&reqlock); + pthread_mutex_lock (&reslock); +#if !HAVE_PREADWRITE + pthread_mutex_lock (&preadwritelock); +#endif +#if !HAVE_READDIR_R + pthread_mutex_lock (&readdirlock); +#endif +} + +static void atfork_parent (void) +{ +#if !HAVE_READDIR_R + pthread_mutex_unlock (&readdirlock); +#endif +#if !HAVE_PREADWRITE + pthread_mutex_unlock (&preadwritelock); +#endif + pthread_mutex_unlock (&reslock); + pthread_mutex_unlock (&reqlock); +} + +static void atfork_child (void) +{ + aio_req prv; + + started = 0; + + while (reqs) + { + prv = reqs; + reqs = prv->next; + free_req (prv); + } + + reqs = reqe = 0; + + while (ress) + { + prv = ress; + ress = prv->next; + free_req (prv); + } + + ress = rese = 0; + + close (respipe [0]); + close (respipe [1]); + create_pipe (); + + atfork_parent (); +} + #define dREQ \ aio_req req; \ \ @@ -521,22 +785,27 @@ BOOT: { + HV *stash = gv_stashpv ("IO::AIO", 1); + newCONSTSUB (stash, "EXDEV", newSViv (EXDEV)); + newCONSTSUB (stash, "O_RDONLY", newSViv (O_RDONLY)); + newCONSTSUB (stash, "O_WRONLY", newSViv (O_WRONLY)); + create_pipe (); pthread_atfork (atfork_prepare, atfork_parent, atfork_child); } void -min_parallel(nthreads) +min_parallel (nthreads) int nthreads PROTOTYPE: $ void -max_parallel(nthreads) +max_parallel (nthreads) int nthreads PROTOTYPE: $ int -max_outstanding(nreqs) +max_outstanding (nreqs) int nreqs PROTOTYPE: $ CODE: @@ -544,7 +813,7 @@ max_outstanding = nreqs; void -aio_open(pathname,flags,mode,callback=&PL_sv_undef) +aio_open (pathname,flags,mode,callback=&PL_sv_undef) SV * pathname int flags int mode @@ -564,7 +833,7 @@ } void -aio_close(fh,callback=&PL_sv_undef) +aio_close (fh,callback=&PL_sv_undef) SV * fh SV * callback PROTOTYPE: $;$ @@ -584,12 +853,12 @@ } void -aio_read(fh,offset,length,data,dataoffset,callback=&PL_sv_undef) +aio_read (fh,offset,length,data,dataoffset,callback=&PL_sv_undef) SV * fh UV offset - IV length + UV length SV * data - IV dataoffset + UV dataoffset SV * callback ALIAS: aio_read = REQ_READ @@ -648,7 +917,30 @@ } void -aio_readahead(fh,offset,length,callback=&PL_sv_undef) +aio_sendfile (out_fh,in_fh,in_offset,length,callback=&PL_sv_undef) + SV * out_fh + SV * in_fh + UV in_offset + UV length + SV * callback + PROTOTYPE: $$$$;$ + CODE: +{ + dREQ; + + req->type = REQ_SENDFILE; + req->fh = newSVsv (out_fh); + req->fd = PerlIO_fileno (IoIFP (sv_2io (out_fh))); + req->fh2 = newSVsv (in_fh); + req->fd2 = PerlIO_fileno (IoIFP (sv_2io (in_fh))); + req->offset = in_offset; + req->length = length; + + send_req (req); +} + +void +aio_readahead (fh,offset,length,callback=&PL_sv_undef) SV * fh UV offset IV length @@ -668,7 +960,7 @@ } void -aio_stat(fh_or_path,callback=&PL_sv_undef) +aio_stat (fh_or_path,callback=&PL_sv_undef) SV * fh_or_path SV * callback ALIAS: @@ -702,12 +994,13 @@ } void -aio_unlink(pathname,callback=&PL_sv_undef) +aio_unlink (pathname,callback=&PL_sv_undef) SV * pathname SV * callback ALIAS: - aio_unlink = REQ_UNLINK - aio_rmdir = REQ_RMDIR + aio_unlink = REQ_UNLINK + aio_rmdir = REQ_RMDIR + aio_readdir = REQ_READDIR CODE: { dREQ; @@ -720,15 +1013,19 @@ } void -aio_symlink(oldpath,newpath,callback=&PL_sv_undef) +aio_link (oldpath,newpath,callback=&PL_sv_undef) SV * oldpath SV * newpath SV * callback + ALIAS: + aio_link = REQ_LINK + aio_symlink = REQ_SYMLINK + aio_rename = REQ_RENAME CODE: { dREQ; - req->type = REQ_SYMLINK; + req->type = ix; req->fh = newSVsv (oldpath); req->data2ptr = SvPVbyte_nolen (req->fh); req->data = newSVsv (newpath); @@ -738,7 +1035,7 @@ } void -flush() +flush () PROTOTYPE: CODE: while (nreqs)