ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/IO-AIO/AIO.xs
Revision: 1.84
Committed: Sat Oct 28 01:24:19 2006 UTC (17 years, 6 months ago) by root
Branch: MAIN
Changes since 1.83: +48 -35 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 /* solaris */
2 #define _POSIX_PTHREAD_SEMANTICS 1
3
4 #if __linux && !defined(_GNU_SOURCE)
5 # define _GNU_SOURCE
6 #endif
7
8 /* just in case */
9 #define _REENTRANT 1
10
11 #include <errno.h>
12
13 #include "EXTERN.h"
14 #include "perl.h"
15 #include "XSUB.h"
16
17 #include "autoconf/config.h"
18
19 #include <pthread.h>
20
21 #include <stddef.h>
22 #include <errno.h>
23 #include <sys/time.h>
24 #include <sys/select.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <limits.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <signal.h>
31 #include <sched.h>
32
33 #if HAVE_SENDFILE
34 # if __linux
35 # include <sys/sendfile.h>
36 # elif __freebsd
37 # include <sys/socket.h>
38 # include <sys/uio.h>
39 # elif __hpux
40 # include <sys/socket.h>
41 # elif __solaris /* not yet */
42 # include <sys/sendfile.h>
43 # else
44 # error sendfile support requested but not available
45 # endif
46 #endif
47
48 /* number of seconds after which idle threads exit */
49 #define IDLE_TIMEOUT 10
50
51 /* used for struct dirent, AIX doesn't provide it */
52 #ifndef NAME_MAX
53 # define NAME_MAX 4096
54 #endif
55
56 #ifndef PTHREAD_STACK_MIN
57 /* care for broken platforms, e.g. windows */
58 # define PTHREAD_STACK_MIN 16384
59 #endif
60
61 #if __ia64
62 # define STACKSIZE 65536
63 #elif __i386 || __x86_64 /* 16k is unreasonably high :( */
64 # define STACKSIZE PTHREAD_STACK_MIN
65 #else
66 # define STACKSIZE 16384
67 #endif
68
69 /* wether word reads are potentially non-atomic.
70 * this is conservatice, likely most arches this runs
71 * on have atomic word read/writes.
72 */
73 #ifndef WORDREAD_UNSAFE
74 # if __i386 || __x86_64
75 # define WORDREAD_UNSAFE 0
76 # else
77 # define WORDREAD_UNSAFE 1
78 # endif
79 #endif
80
81 /* buffer size for various temporary buffers */
82 #define AIO_BUFSIZE 65536
83
84 #define dBUF \
85 char *aio_buf; \
86 LOCK (wrklock); \
87 self->dbuf = aio_buf = malloc (AIO_BUFSIZE); \
88 UNLOCK (wrklock); \
89 if (!aio_buf) \
90 return -1;
91
92 enum {
93 REQ_QUIT,
94 REQ_OPEN, REQ_CLOSE,
95 REQ_READ, REQ_WRITE, REQ_READAHEAD,
96 REQ_SENDFILE,
97 REQ_STAT, REQ_LSTAT, REQ_FSTAT,
98 REQ_FSYNC, REQ_FDATASYNC,
99 REQ_UNLINK, REQ_RMDIR, REQ_RENAME,
100 REQ_MKNOD, REQ_READDIR,
101 REQ_LINK, REQ_SYMLINK,
102 REQ_GROUP, REQ_NOP,
103 REQ_BUSY,
104 };
105
106 #define AIO_REQ_KLASS "IO::AIO::REQ"
107 #define AIO_GRP_KLASS "IO::AIO::GRP"
108
109 typedef struct aio_cb
110 {
111 struct aio_cb *volatile next;
112
113 SV *data, *callback;
114 SV *fh, *fh2;
115 void *dataptr, *data2ptr;
116 Stat_t *statdata;
117 off_t offset;
118 size_t length;
119 ssize_t result;
120
121 STRLEN dataoffset;
122 int type;
123 int fd, fd2;
124 int errorno;
125 mode_t mode; /* open */
126
127 unsigned char flags;
128 unsigned char pri;
129
130 SV *self; /* the perl counterpart of this request, if any */
131 struct aio_cb *grp, *grp_prev, *grp_next, *grp_first;
132 } aio_cb;
133
134 enum {
135 FLAG_CANCELLED = 0x01,
136 };
137
138 typedef aio_cb *aio_req;
139 typedef aio_cb *aio_req_ornot;
140
141 enum {
142 PRI_MIN = -4,
143 PRI_MAX = 4,
144
145 DEFAULT_PRI = 0,
146 PRI_BIAS = -PRI_MIN,
147 NUM_PRI = PRI_MAX + PRI_BIAS + 1,
148 };
149
150 static int next_pri = DEFAULT_PRI + PRI_BIAS;
151
152 static unsigned int started, idle, wanted;
153
154 #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)
155 # define AIO_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
156 #else
157 # define AIO_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER
158 #endif
159
160 #define LOCK(mutex) pthread_mutex_lock (&(mutex))
161 #define UNLOCK(mutex) pthread_mutex_unlock (&(mutex))
162
163 /* worker threads management */
164 static pthread_mutex_t wrklock = AIO_MUTEX_INIT;
165
166 typedef struct worker {
167 /* locked by wrklock */
168 struct worker *prev, *next;
169
170 pthread_t tid;
171
172 /* locked by reslock, reqlock or wrklock */
173 aio_req req; /* currently processed request */
174 void *dbuf;
175 DIR *dirp;
176 } worker;
177
178 static worker wrk_first = { &wrk_first, &wrk_first, 0 };
179
180 static void worker_clear (worker *wrk)
181 {
182 if (wrk->dirp)
183 {
184 closedir (wrk->dirp);
185 wrk->dirp = 0;
186 }
187
188 if (wrk->dbuf)
189 {
190 free (wrk->dbuf);
191 wrk->dbuf = 0;
192 }
193 }
194
195 static void worker_free (worker *wrk)
196 {
197 wrk->next->prev = wrk->prev;
198 wrk->prev->next = wrk->next;
199
200 free (wrk);
201 }
202
203 static volatile unsigned int nreqs, nready, npending;
204 static volatile unsigned int max_idle = 4;
205 static volatile unsigned int max_outstanding = 0xffffffff;
206 static int respipe [2];
207
208 static pthread_mutex_t reslock = AIO_MUTEX_INIT;
209 static pthread_mutex_t reqlock = AIO_MUTEX_INIT;
210 static pthread_cond_t reqwait = PTHREAD_COND_INITIALIZER;
211
212 #if WORDREAD_UNSAFE
213
214 static unsigned int get_nready ()
215 {
216 unsigned int retval;
217
218 LOCK (reqlock);
219 retval = nready;
220 UNLOCK (reqlock);
221
222 return retval;
223 }
224
225 static unsigned int get_npending ()
226 {
227 unsigned int retval;
228
229 LOCK (reslock);
230 retval = npending;
231 UNLOCK (reslock);
232
233 return retval;
234 }
235
236 #else
237
238 # define get_nready() nready
239 # define get_npending() npending
240
241 #endif
242
243 /*
244 * a somewhat faster data structure might be nice, but
245 * with 8 priorities this actually needs <20 insns
246 * per shift, the most expensive operation.
247 */
248 typedef struct {
249 aio_req qs[NUM_PRI], qe[NUM_PRI]; /* qstart, qend */
250 int size;
251 } reqq;
252
253 static reqq req_queue;
254 static reqq res_queue;
255
256 int reqq_push (reqq *q, aio_req req)
257 {
258 int pri = req->pri;
259 req->next = 0;
260
261 if (q->qe[pri])
262 {
263 q->qe[pri]->next = req;
264 q->qe[pri] = req;
265 }
266 else
267 q->qe[pri] = q->qs[pri] = req;
268
269 return q->size++;
270 }
271
272 aio_req reqq_shift (reqq *q)
273 {
274 int pri;
275
276 if (!q->size)
277 return 0;
278
279 --q->size;
280
281 for (pri = NUM_PRI; pri--; )
282 {
283 aio_req req = q->qs[pri];
284
285 if (req)
286 {
287 if (!(q->qs[pri] = req->next))
288 q->qe[pri] = 0;
289
290 return req;
291 }
292 }
293
294 abort ();
295 }
296
297 static int poll_cb (int max);
298 static void req_invoke (aio_req req);
299 static void req_free (aio_req req);
300 static void req_cancel (aio_req req);
301
302 /* must be called at most once */
303 static SV *req_sv (aio_req req, const char *klass)
304 {
305 if (!req->self)
306 {
307 req->self = (SV *)newHV ();
308 sv_magic (req->self, 0, PERL_MAGIC_ext, (char *)req, 0);
309 }
310
311 return sv_2mortal (sv_bless (newRV_inc (req->self), gv_stashpv (klass, 1)));
312 }
313
314 static aio_req SvAIO_REQ (SV *sv)
315 {
316 MAGIC *mg;
317
318 if (!sv_derived_from (sv, AIO_REQ_KLASS) || !SvROK (sv))
319 croak ("object of class " AIO_REQ_KLASS " expected");
320
321 mg = mg_find (SvRV (sv), PERL_MAGIC_ext);
322
323 return mg ? (aio_req)mg->mg_ptr : 0;
324 }
325
326 static void aio_grp_feed (aio_req grp)
327 {
328 while (grp->length < grp->fd2 && !(grp->flags & FLAG_CANCELLED))
329 {
330 int old_len = grp->length;
331
332 if (grp->fh2 && SvOK (grp->fh2))
333 {
334 dSP;
335
336 ENTER;
337 SAVETMPS;
338 PUSHMARK (SP);
339 XPUSHs (req_sv (grp, AIO_GRP_KLASS));
340 PUTBACK;
341 call_sv (grp->fh2, G_VOID | G_EVAL | G_KEEPERR);
342 SPAGAIN;
343 FREETMPS;
344 LEAVE;
345 }
346
347 /* stop if no progress has been made */
348 if (old_len == grp->length)
349 {
350 SvREFCNT_dec (grp->fh2);
351 grp->fh2 = 0;
352 break;
353 }
354 }
355 }
356
357 static void aio_grp_dec (aio_req grp)
358 {
359 --grp->length;
360
361 /* call feeder, if applicable */
362 aio_grp_feed (grp);
363
364 /* finish, if done */
365 if (!grp->length && grp->fd)
366 {
367 req_invoke (grp);
368 req_free (grp);
369 }
370 }
371
372 static void req_invoke (aio_req req)
373 {
374 dSP;
375
376 if (!(req->flags & FLAG_CANCELLED) && SvOK (req->callback))
377 {
378 ENTER;
379 SAVETMPS;
380 PUSHMARK (SP);
381 EXTEND (SP, 1);
382
383 switch (req->type)
384 {
385 case REQ_READDIR:
386 {
387 SV *rv = &PL_sv_undef;
388
389 if (req->result >= 0)
390 {
391 int i;
392 char *buf = req->data2ptr;
393 AV *av = newAV ();
394
395 av_extend (av, req->result - 1);
396
397 for (i = 0; i < req->result; ++i)
398 {
399 SV *sv = newSVpv (buf, 0);
400
401 av_store (av, i, sv);
402 buf += SvCUR (sv) + 1;
403 }
404
405 rv = sv_2mortal (newRV_noinc ((SV *)av));
406 }
407
408 PUSHs (rv);
409 }
410 break;
411
412 case REQ_OPEN:
413 {
414 /* convert fd to fh */
415 SV *fh;
416
417 PUSHs (sv_2mortal (newSViv (req->result)));
418 PUTBACK;
419 call_pv ("IO::AIO::_fd2fh", G_SCALAR | G_EVAL);
420 SPAGAIN;
421
422 fh = SvREFCNT_inc (POPs);
423
424 PUSHMARK (SP);
425 XPUSHs (sv_2mortal (fh));
426 }
427 break;
428
429 case REQ_GROUP:
430 req->fd = 2; /* mark group as finished */
431
432 if (req->data)
433 {
434 int i;
435 AV *av = (AV *)req->data;
436
437 EXTEND (SP, AvFILL (av) + 1);
438 for (i = 0; i <= AvFILL (av); ++i)
439 PUSHs (*av_fetch (av, i, 0));
440 }
441 break;
442
443 case REQ_NOP:
444 case REQ_BUSY:
445 break;
446
447 default:
448 PUSHs (sv_2mortal (newSViv (req->result)));
449 break;
450 }
451
452 errno = req->errorno;
453
454 PUTBACK;
455 call_sv (req->callback, G_VOID | G_EVAL);
456 SPAGAIN;
457
458 FREETMPS;
459 LEAVE;
460 }
461
462 if (req->grp)
463 {
464 aio_req grp = req->grp;
465
466 /* unlink request */
467 if (req->grp_next) req->grp_next->grp_prev = req->grp_prev;
468 if (req->grp_prev) req->grp_prev->grp_next = req->grp_next;
469
470 if (grp->grp_first == req)
471 grp->grp_first = req->grp_next;
472
473 aio_grp_dec (grp);
474 }
475
476 if (SvTRUE (ERRSV))
477 {
478 req_free (req);
479 croak (0);
480 }
481 }
482
483 static void req_free (aio_req req)
484 {
485 if (req->self)
486 {
487 sv_unmagic (req->self, PERL_MAGIC_ext);
488 SvREFCNT_dec (req->self);
489 }
490
491 SvREFCNT_dec (req->data);
492 SvREFCNT_dec (req->fh);
493 SvREFCNT_dec (req->fh2);
494 SvREFCNT_dec (req->callback);
495 Safefree (req->statdata);
496
497 if (req->type == REQ_READDIR)
498 free (req->data2ptr);
499
500 Safefree (req);
501 }
502
503 static void req_cancel_subs (aio_req grp)
504 {
505 aio_req sub;
506
507 if (grp->type != REQ_GROUP)
508 return;
509
510 SvREFCNT_dec (grp->fh2);
511 grp->fh2 = 0;
512
513 for (sub = grp->grp_first; sub; sub = sub->grp_next)
514 req_cancel (sub);
515 }
516
517 static void req_cancel (aio_req req)
518 {
519 req->flags |= FLAG_CANCELLED;
520
521 req_cancel_subs (req);
522 }
523
524 static void *aio_proc(void *arg);
525
526 static void start_thread (void)
527 {
528 sigset_t fullsigset, oldsigset;
529 pthread_attr_t attr;
530
531 worker *wrk = calloc (1, sizeof (worker));
532
533 if (!wrk)
534 croak ("unable to allocate worker thread data");
535
536 pthread_attr_init (&attr);
537 pthread_attr_setstacksize (&attr, STACKSIZE);
538 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
539 #ifdef PTHREAD_SCOPE_PROCESS
540 pthread_attr_setscope (&attr, PTHREAD_SCOPE_PROCESS);
541 #endif
542
543 sigfillset (&fullsigset);
544
545 LOCK (wrklock);
546 sigprocmask (SIG_SETMASK, &fullsigset, &oldsigset);
547
548 if (pthread_create (&wrk->tid, &attr, aio_proc, (void *)wrk) == 0)
549 {
550 wrk->prev = &wrk_first;
551 wrk->next = wrk_first.next;
552 wrk_first.next->prev = wrk;
553 wrk_first.next = wrk;
554 ++started;
555 }
556 else
557 free (wrk);
558
559 sigprocmask (SIG_SETMASK, &oldsigset, 0);
560 UNLOCK (wrklock);
561 }
562
563 static void maybe_start_thread ()
564 {
565 if (started >= wanted)
566 return;
567
568 /* todo: maybe use idle here, but might be less exact */
569 if ((int)nready <= (int)started - (int)(nreqs - get_nready () - get_npending ()))
570 return;
571
572 start_thread ();
573 }
574
575 static void req_send (aio_req req)
576 {
577 ++nreqs;
578
579 LOCK (reqlock);
580 ++nready;
581 reqq_push (&req_queue, req);
582 pthread_cond_signal (&reqwait);
583 UNLOCK (reqlock);
584
585 maybe_start_thread ();
586 }
587
588 static void end_thread (void)
589 {
590 aio_req req;
591
592 Newz (0, req, 1, aio_cb);
593
594 req->type = REQ_QUIT;
595 req->pri = PRI_MAX + PRI_BIAS;
596
597 LOCK (reqlock);
598 reqq_push (&req_queue, req);
599 pthread_cond_signal (&reqwait);
600 UNLOCK (reqlock);
601
602 LOCK (wrklock);
603 --started;
604 UNLOCK (wrklock);
605 }
606
607 static void min_parallel (int nthreads)
608 {
609 if (wanted < nthreads)
610 wanted = nthreads;
611 }
612
613 static void max_parallel (int nthreads)
614 {
615 if (wanted > nthreads)
616 wanted = nthreads;
617
618 while (started > wanted)
619 end_thread ();
620 }
621
622 static void poll_wait ()
623 {
624 fd_set rfd;
625
626 while (nreqs)
627 {
628 int size;
629 if (WORDREAD_UNSAFE) LOCK (reslock);
630 size = res_queue.size;
631 if (WORDREAD_UNSAFE) UNLOCK (reslock);
632
633 if (size)
634 return;
635
636 maybe_start_thread ();
637
638 FD_ZERO(&rfd);
639 FD_SET(respipe [0], &rfd);
640
641 select (respipe [0] + 1, &rfd, 0, 0, 0);
642 }
643 }
644
645 static int poll_cb (int max)
646 {
647 dSP;
648 int count = 0;
649 int do_croak = 0;
650 aio_req req;
651
652 for (;;)
653 {
654 while (max <= 0 || count < max)
655 {
656 maybe_start_thread ();
657
658 LOCK (reslock);
659 req = reqq_shift (&res_queue);
660
661 if (req)
662 {
663 --npending;
664
665 if (!res_queue.size)
666 {
667 /* read any signals sent by the worker threads */
668 char buf [32];
669 while (read (respipe [0], buf, 32) == 32)
670 ;
671 }
672 }
673
674 UNLOCK (reslock);
675
676 if (!req)
677 break;
678
679 --nreqs;
680
681 if (req->type == REQ_GROUP && req->length)
682 {
683 req->fd = 1; /* mark request as delayed */
684 continue;
685 }
686 else
687 {
688 if (req->type == REQ_READ)
689 SvCUR_set (req->data, req->dataoffset + (req->result > 0 ? req->result : 0));
690
691 if (req->data2ptr && (req->type == REQ_READ || req->type == REQ_WRITE))
692 SvREADONLY_off (req->data);
693
694 if (req->statdata)
695 {
696 PL_laststype = req->type == REQ_LSTAT ? OP_LSTAT : OP_STAT;
697 PL_laststatval = req->result;
698 PL_statcache = *(req->statdata);
699 }
700
701 req_invoke (req);
702
703 count++;
704 }
705
706 req_free (req);
707 }
708
709 if (nreqs <= max_outstanding)
710 break;
711
712 poll_wait ();
713
714 max = 0;
715 }
716
717 return count;
718 }
719
720 static void create_pipe ()
721 {
722 if (pipe (respipe))
723 croak ("unable to initialize result pipe");
724
725 if (fcntl (respipe [0], F_SETFL, O_NONBLOCK))
726 croak ("cannot set result pipe to nonblocking mode");
727
728 if (fcntl (respipe [1], F_SETFL, O_NONBLOCK))
729 croak ("cannot set result pipe to nonblocking mode");
730 }
731
732 /*****************************************************************************/
733 /* work around various missing functions */
734
735 #if !HAVE_PREADWRITE
736 # define pread aio_pread
737 # define pwrite aio_pwrite
738
739 /*
740 * make our pread/pwrite safe against themselves, but not against
741 * normal read/write by using a mutex. slows down execution a lot,
742 * but that's your problem, not mine.
743 */
744 static pthread_mutex_t preadwritelock = PTHREAD_MUTEX_INITIALIZER;
745
746 static ssize_t pread (int fd, void *buf, size_t count, off_t offset)
747 {
748 ssize_t res;
749 off_t ooffset;
750
751 LOCK (preadwritelock);
752 ooffset = lseek (fd, 0, SEEK_CUR);
753 lseek (fd, offset, SEEK_SET);
754 res = read (fd, buf, count);
755 lseek (fd, ooffset, SEEK_SET);
756 UNLOCK (preadwritelock);
757
758 return res;
759 }
760
761 static ssize_t pwrite (int fd, void *buf, size_t count, off_t offset)
762 {
763 ssize_t res;
764 off_t ooffset;
765
766 LOCK (preadwritelock);
767 ooffset = lseek (fd, 0, SEEK_CUR);
768 lseek (fd, offset, SEEK_SET);
769 res = write (fd, buf, count);
770 lseek (fd, offset, SEEK_SET);
771 UNLOCK (preadwritelock);
772
773 return res;
774 }
775 #endif
776
777 #if !HAVE_FDATASYNC
778 # define fdatasync fsync
779 #endif
780
781 #if !HAVE_READAHEAD
782 # define readahead(fd,offset,count) aio_readahead (fd, offset, count, self)
783
784 static ssize_t aio_readahead (int fd, off_t offset, size_t count, worker *self)
785 {
786 dBUF;
787
788 while (count > 0)
789 {
790 size_t len = count < AIO_BUFSIZE ? count : AIO_BUFSIZE;
791
792 pread (fd, aio_buf, len, offset);
793 offset += len;
794 count -= len;
795 }
796
797 errno = 0;
798 }
799
800 #endif
801
802 #if !HAVE_READDIR_R
803 # define readdir_r aio_readdir_r
804
805 static pthread_mutex_t readdirlock = PTHREAD_MUTEX_INITIALIZER;
806
807 static int readdir_r (DIR *dirp, struct dirent *ent, struct dirent **res)
808 {
809 struct dirent *e;
810 int errorno;
811
812 LOCK (readdirlock);
813
814 e = readdir (dirp);
815 errorno = errno;
816
817 if (e)
818 {
819 *res = ent;
820 strcpy (ent->d_name, e->d_name);
821 }
822 else
823 *res = 0;
824
825 UNLOCK (readdirlock);
826
827 errno = errorno;
828 return e ? 0 : -1;
829 }
830 #endif
831
832 /* sendfile always needs emulation */
833 static ssize_t sendfile_ (int ofd, int ifd, off_t offset, size_t count, worker *self)
834 {
835 ssize_t res;
836
837 if (!count)
838 return 0;
839
840 #if HAVE_SENDFILE
841 # if __linux
842 res = sendfile (ofd, ifd, &offset, count);
843
844 # elif __freebsd
845 /*
846 * Of course, the freebsd sendfile is a dire hack with no thoughts
847 * wasted on making it similar to other I/O functions.
848 */
849 {
850 off_t sbytes;
851 res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0);
852
853 if (res < 0 && sbytes)
854 /* maybe only on EAGAIN: as usual, the manpage leaves you guessing */
855 res = sbytes;
856 }
857
858 # elif __hpux
859 res = sendfile (ofd, ifd, offset, count, 0, 0);
860
861 # elif __solaris
862 {
863 struct sendfilevec vec;
864 size_t sbytes;
865
866 vec.sfv_fd = ifd;
867 vec.sfv_flag = 0;
868 vec.sfv_off = offset;
869 vec.sfv_len = count;
870
871 res = sendfilev (ofd, &vec, 1, &sbytes);
872
873 if (res < 0 && sbytes)
874 res = sbytes;
875 }
876
877 # endif
878 #else
879 res = -1;
880 errno = ENOSYS;
881 #endif
882
883 if (res < 0
884 && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK
885 #if __solaris
886 || errno == EAFNOSUPPORT || errno == EPROTOTYPE
887 #endif
888 )
889 )
890 {
891 /* emulate sendfile. this is a major pain in the ass */
892 dBUF;
893
894 res = 0;
895
896 while (count)
897 {
898 ssize_t cnt;
899
900 cnt = pread (ifd, aio_buf, count > AIO_BUFSIZE ? AIO_BUFSIZE : count, offset);
901
902 if (cnt <= 0)
903 {
904 if (cnt && !res) res = -1;
905 break;
906 }
907
908 cnt = write (ofd, aio_buf, cnt);
909
910 if (cnt <= 0)
911 {
912 if (cnt && !res) res = -1;
913 break;
914 }
915
916 offset += cnt;
917 res += cnt;
918 count -= cnt;
919 }
920 }
921
922 return res;
923 }
924
925 /* read a full directory */
926 static void scandir_ (aio_req req, worker *self)
927 {
928 DIR *dirp;
929 union
930 {
931 struct dirent d;
932 char b [offsetof (struct dirent, d_name) + NAME_MAX + 1];
933 } *u;
934 struct dirent *entp;
935 char *name, *names;
936 int memlen = 4096;
937 int memofs = 0;
938 int res = 0;
939 int errorno;
940
941 LOCK (wrklock);
942 self->dirp = dirp = opendir (req->dataptr);
943 self->dbuf = u = malloc (sizeof (*u));
944 req->data2ptr = names = malloc (memlen);
945 UNLOCK (wrklock);
946
947 if (dirp && u && names)
948 for (;;)
949 {
950 errno = 0;
951 readdir_r (dirp, &u->d, &entp);
952
953 if (!entp)
954 break;
955
956 name = entp->d_name;
957
958 if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2])))
959 {
960 int len = strlen (name) + 1;
961
962 res++;
963
964 while (memofs + len > memlen)
965 {
966 memlen *= 2;
967 LOCK (wrklock);
968 req->data2ptr = names = realloc (names, memlen);
969 UNLOCK (wrklock);
970
971 if (!names)
972 break;
973 }
974
975 memcpy (names + memofs, name, len);
976 memofs += len;
977 }
978 }
979
980 if (errno)
981 res = -1;
982
983 req->result = res;
984 }
985
986 /*****************************************************************************/
987
988 static void *aio_proc (void *thr_arg)
989 {
990 aio_req req;
991 struct timespec ts;
992 worker *self = (worker *)thr_arg;
993
994 /* try to distribute timeouts somewhat evenly */
995 ts.tv_nsec = (((unsigned long)self + (unsigned long)ts.tv_sec) & 1023UL)
996 * (1000000000UL / 1024UL);
997
998 for (;;)
999 {
1000 ts.tv_sec = time (0) + IDLE_TIMEOUT;
1001
1002 LOCK (reqlock);
1003
1004 for (;;)
1005 {
1006 self->req = req = reqq_shift (&req_queue);
1007
1008 if (req)
1009 break;
1010
1011 ++idle;
1012
1013 if (pthread_cond_timedwait (&reqwait, &reqlock, &ts)
1014 == ETIMEDOUT)
1015 {
1016 if (idle > max_idle)
1017 {
1018 --idle;
1019 UNLOCK (reqlock);
1020 LOCK (wrklock);
1021 --started;
1022 UNLOCK (wrklock);
1023 goto quit;
1024 }
1025
1026 /* we are allowed to idle, so do so without any timeout */
1027 pthread_cond_wait (&reqwait, &reqlock);
1028 ts.tv_sec = time (0) + IDLE_TIMEOUT;
1029 }
1030
1031 --idle;
1032 }
1033
1034 --nready;
1035
1036 UNLOCK (reqlock);
1037
1038 errno = 0; /* strictly unnecessary */
1039
1040 if (!(req->flags & FLAG_CANCELLED))
1041 switch (req->type)
1042 {
1043 case REQ_READ: req->result = pread (req->fd, req->dataptr, req->length, req->offset); break;
1044 case REQ_WRITE: req->result = pwrite (req->fd, req->dataptr, req->length, req->offset); break;
1045
1046 case REQ_READAHEAD: req->result = readahead (req->fd, req->offset, req->length); break;
1047 case REQ_SENDFILE: req->result = sendfile_ (req->fd, req->fd2, req->offset, req->length, self); break;
1048
1049 case REQ_STAT: req->result = stat (req->dataptr, req->statdata); break;
1050 case REQ_LSTAT: req->result = lstat (req->dataptr, req->statdata); break;
1051 case REQ_FSTAT: req->result = fstat (req->fd , req->statdata); break;
1052
1053 case REQ_OPEN: req->result = open (req->dataptr, req->fd, req->mode); break;
1054 case REQ_CLOSE: req->result = close (req->fd); break;
1055 case REQ_UNLINK: req->result = unlink (req->dataptr); break;
1056 case REQ_RMDIR: req->result = rmdir (req->dataptr); break;
1057 case REQ_RENAME: req->result = rename (req->data2ptr, req->dataptr); break;
1058 case REQ_LINK: req->result = link (req->data2ptr, req->dataptr); break;
1059 case REQ_SYMLINK: req->result = symlink (req->data2ptr, req->dataptr); break;
1060 case REQ_MKNOD: req->result = mknod (req->data2ptr, req->mode, (dev_t)req->offset); break;
1061
1062 case REQ_FDATASYNC: req->result = fdatasync (req->fd); break;
1063 case REQ_FSYNC: req->result = fsync (req->fd); break;
1064 case REQ_READDIR: scandir_ (req, self); break;
1065
1066 case REQ_BUSY:
1067 {
1068 struct timeval tv;
1069
1070 tv.tv_sec = req->fd;
1071 tv.tv_usec = req->fd2;
1072
1073 req->result = select (0, 0, 0, 0, &tv);
1074 }
1075
1076 case REQ_GROUP:
1077 case REQ_NOP:
1078 break;
1079
1080 case REQ_QUIT:
1081 goto quit;
1082
1083 default:
1084 req->result = ENOSYS;
1085 break;
1086 }
1087
1088 req->errorno = errno;
1089
1090 LOCK (reslock);
1091
1092 ++npending;
1093
1094 if (!reqq_push (&res_queue, req))
1095 /* write a dummy byte to the pipe so fh becomes ready */
1096 write (respipe [1], &respipe, 1);
1097
1098 self->req = 0;
1099 worker_clear (self);
1100
1101 UNLOCK (reslock);
1102 }
1103
1104 quit:
1105 LOCK (wrklock);
1106 worker_free (self);
1107 UNLOCK (wrklock);
1108
1109 return 0;
1110 }
1111
1112 /*****************************************************************************/
1113
1114 static void atfork_prepare (void)
1115 {
1116 LOCK (wrklock);
1117 LOCK (reqlock);
1118 LOCK (reslock);
1119 #if !HAVE_PREADWRITE
1120 LOCK (preadwritelock);
1121 #endif
1122 #if !HAVE_READDIR_R
1123 LOCK (readdirlock);
1124 #endif
1125 }
1126
1127 static void atfork_parent (void)
1128 {
1129 #if !HAVE_READDIR_R
1130 UNLOCK (readdirlock);
1131 #endif
1132 #if !HAVE_PREADWRITE
1133 UNLOCK (preadwritelock);
1134 #endif
1135 UNLOCK (reslock);
1136 UNLOCK (reqlock);
1137 UNLOCK (wrklock);
1138 }
1139
1140 static void atfork_child (void)
1141 {
1142 aio_req prv;
1143
1144 while (prv = reqq_shift (&req_queue))
1145 req_free (prv);
1146
1147 while (prv = reqq_shift (&res_queue))
1148 req_free (prv);
1149
1150 while (wrk_first.next != &wrk_first)
1151 {
1152 worker *wrk = wrk_first.next;
1153
1154 if (wrk->req)
1155 req_free (wrk->req);
1156
1157 worker_clear (wrk);
1158 worker_free (wrk);
1159 }
1160
1161 started = 0;
1162 idle = 0;
1163 nreqs = 0;
1164 nready = 0;
1165 npending = 0;
1166
1167 close (respipe [0]);
1168 close (respipe [1]);
1169 create_pipe ();
1170
1171 atfork_parent ();
1172 }
1173
1174 #define dREQ \
1175 aio_req req; \
1176 int req_pri = next_pri; \
1177 next_pri = DEFAULT_PRI + PRI_BIAS; \
1178 \
1179 if (SvOK (callback) && !SvROK (callback)) \
1180 croak ("callback must be undef or of reference type"); \
1181 \
1182 Newz (0, req, 1, aio_cb); \
1183 if (!req) \
1184 croak ("out of memory during aio_req allocation"); \
1185 \
1186 req->callback = newSVsv (callback); \
1187 req->pri = req_pri
1188
1189 #define REQ_SEND \
1190 req_send (req); \
1191 \
1192 if (GIMME_V != G_VOID) \
1193 XPUSHs (req_sv (req, AIO_REQ_KLASS));
1194
1195 MODULE = IO::AIO PACKAGE = IO::AIO
1196
1197 PROTOTYPES: ENABLE
1198
1199 BOOT:
1200 {
1201 HV *stash = gv_stashpv ("IO::AIO", 1);
1202
1203 newCONSTSUB (stash, "EXDEV", newSViv (EXDEV));
1204 newCONSTSUB (stash, "O_RDONLY", newSViv (O_RDONLY));
1205 newCONSTSUB (stash, "O_WRONLY", newSViv (O_WRONLY));
1206 newCONSTSUB (stash, "O_CREAT", newSViv (O_CREAT));
1207 newCONSTSUB (stash, "O_TRUNC", newSViv (O_TRUNC));
1208 newCONSTSUB (stash, "S_IFIFO", newSViv (S_IFIFO));
1209
1210 create_pipe ();
1211 pthread_atfork (atfork_prepare, atfork_parent, atfork_child);
1212 }
1213
1214 void
1215 min_parallel (int nthreads)
1216 PROTOTYPE: $
1217
1218 void
1219 max_parallel (int nthreads)
1220 PROTOTYPE: $
1221
1222 int
1223 max_outstanding (int maxreqs)
1224 PROTOTYPE: $
1225 CODE:
1226 RETVAL = max_outstanding;
1227 max_outstanding = maxreqs;
1228 OUTPUT:
1229 RETVAL
1230
1231 void
1232 aio_open (pathname,flags,mode,callback=&PL_sv_undef)
1233 SV * pathname
1234 int flags
1235 int mode
1236 SV * callback
1237 PROTOTYPE: $$$;$
1238 PPCODE:
1239 {
1240 dREQ;
1241
1242 req->type = REQ_OPEN;
1243 req->data = newSVsv (pathname);
1244 req->dataptr = SvPVbyte_nolen (req->data);
1245 req->fd = flags;
1246 req->mode = mode;
1247
1248 REQ_SEND;
1249 }
1250
1251 void
1252 aio_close (fh,callback=&PL_sv_undef)
1253 SV * fh
1254 SV * callback
1255 PROTOTYPE: $;$
1256 ALIAS:
1257 aio_close = REQ_CLOSE
1258 aio_fsync = REQ_FSYNC
1259 aio_fdatasync = REQ_FDATASYNC
1260 PPCODE:
1261 {
1262 dREQ;
1263
1264 req->type = ix;
1265 req->fh = newSVsv (fh);
1266 req->fd = PerlIO_fileno (IoIFP (sv_2io (fh)));
1267
1268 REQ_SEND (req);
1269 }
1270
1271 void
1272 aio_read (fh,offset,length,data,dataoffset,callback=&PL_sv_undef)
1273 SV * fh
1274 UV offset
1275 UV length
1276 SV * data
1277 UV dataoffset
1278 SV * callback
1279 ALIAS:
1280 aio_read = REQ_READ
1281 aio_write = REQ_WRITE
1282 PROTOTYPE: $$$$$;$
1283 PPCODE:
1284 {
1285 aio_req req;
1286 STRLEN svlen;
1287 char *svptr = SvPVbyte (data, svlen);
1288
1289 SvUPGRADE (data, SVt_PV);
1290 SvPOK_on (data);
1291
1292 if (dataoffset < 0)
1293 dataoffset += svlen;
1294
1295 if (dataoffset < 0 || dataoffset > svlen)
1296 croak ("data offset outside of string");
1297
1298 if (ix == REQ_WRITE)
1299 {
1300 /* write: check length and adjust. */
1301 if (length < 0 || length + dataoffset > svlen)
1302 length = svlen - dataoffset;
1303 }
1304 else
1305 {
1306 /* read: grow scalar as necessary */
1307 svptr = SvGROW (data, length + dataoffset);
1308 }
1309
1310 if (length < 0)
1311 croak ("length must not be negative");
1312
1313 {
1314 dREQ;
1315
1316 req->type = ix;
1317 req->fh = newSVsv (fh);
1318 req->fd = PerlIO_fileno (ix == REQ_READ ? IoIFP (sv_2io (fh))
1319 : IoOFP (sv_2io (fh)));
1320 req->offset = offset;
1321 req->length = length;
1322 req->data = SvREFCNT_inc (data);
1323 req->dataptr = (char *)svptr + dataoffset;
1324
1325 if (!SvREADONLY (data))
1326 {
1327 SvREADONLY_on (data);
1328 req->data2ptr = (void *)data;
1329 }
1330
1331 REQ_SEND;
1332 }
1333 }
1334
1335 void
1336 aio_sendfile (out_fh,in_fh,in_offset,length,callback=&PL_sv_undef)
1337 SV * out_fh
1338 SV * in_fh
1339 UV in_offset
1340 UV length
1341 SV * callback
1342 PROTOTYPE: $$$$;$
1343 PPCODE:
1344 {
1345 dREQ;
1346
1347 req->type = REQ_SENDFILE;
1348 req->fh = newSVsv (out_fh);
1349 req->fd = PerlIO_fileno (IoIFP (sv_2io (out_fh)));
1350 req->fh2 = newSVsv (in_fh);
1351 req->fd2 = PerlIO_fileno (IoIFP (sv_2io (in_fh)));
1352 req->offset = in_offset;
1353 req->length = length;
1354
1355 REQ_SEND;
1356 }
1357
1358 void
1359 aio_readahead (fh,offset,length,callback=&PL_sv_undef)
1360 SV * fh
1361 UV offset
1362 IV length
1363 SV * callback
1364 PROTOTYPE: $$$;$
1365 PPCODE:
1366 {
1367 dREQ;
1368
1369 req->type = REQ_READAHEAD;
1370 req->fh = newSVsv (fh);
1371 req->fd = PerlIO_fileno (IoIFP (sv_2io (fh)));
1372 req->offset = offset;
1373 req->length = length;
1374
1375 REQ_SEND;
1376 }
1377
1378 void
1379 aio_stat (fh_or_path,callback=&PL_sv_undef)
1380 SV * fh_or_path
1381 SV * callback
1382 ALIAS:
1383 aio_stat = REQ_STAT
1384 aio_lstat = REQ_LSTAT
1385 PPCODE:
1386 {
1387 dREQ;
1388
1389 New (0, req->statdata, 1, Stat_t);
1390 if (!req->statdata)
1391 {
1392 req_free (req);
1393 croak ("out of memory during aio_req->statdata allocation");
1394 }
1395
1396 if (SvPOK (fh_or_path))
1397 {
1398 req->type = ix;
1399 req->data = newSVsv (fh_or_path);
1400 req->dataptr = SvPVbyte_nolen (req->data);
1401 }
1402 else
1403 {
1404 req->type = REQ_FSTAT;
1405 req->fh = newSVsv (fh_or_path);
1406 req->fd = PerlIO_fileno (IoIFP (sv_2io (fh_or_path)));
1407 }
1408
1409 REQ_SEND;
1410 }
1411
1412 void
1413 aio_unlink (pathname,callback=&PL_sv_undef)
1414 SV * pathname
1415 SV * callback
1416 ALIAS:
1417 aio_unlink = REQ_UNLINK
1418 aio_rmdir = REQ_RMDIR
1419 aio_readdir = REQ_READDIR
1420 PPCODE:
1421 {
1422 dREQ;
1423
1424 req->type = ix;
1425 req->data = newSVsv (pathname);
1426 req->dataptr = SvPVbyte_nolen (req->data);
1427
1428 REQ_SEND;
1429 }
1430
1431 void
1432 aio_link (oldpath,newpath,callback=&PL_sv_undef)
1433 SV * oldpath
1434 SV * newpath
1435 SV * callback
1436 ALIAS:
1437 aio_link = REQ_LINK
1438 aio_symlink = REQ_SYMLINK
1439 aio_rename = REQ_RENAME
1440 PPCODE:
1441 {
1442 dREQ;
1443
1444 req->type = ix;
1445 req->fh = newSVsv (oldpath);
1446 req->data2ptr = SvPVbyte_nolen (req->fh);
1447 req->data = newSVsv (newpath);
1448 req->dataptr = SvPVbyte_nolen (req->data);
1449
1450 REQ_SEND;
1451 }
1452
1453 void
1454 aio_mknod (pathname,mode,dev,callback=&PL_sv_undef)
1455 SV * pathname
1456 SV * callback
1457 UV mode
1458 UV dev
1459 PPCODE:
1460 {
1461 dREQ;
1462
1463 req->type = REQ_MKNOD;
1464 req->data = newSVsv (pathname);
1465 req->dataptr = SvPVbyte_nolen (req->data);
1466 req->mode = (mode_t)mode;
1467 req->offset = dev;
1468
1469 REQ_SEND;
1470 }
1471
1472 void
1473 aio_busy (delay,callback=&PL_sv_undef)
1474 double delay
1475 SV * callback
1476 PPCODE:
1477 {
1478 dREQ;
1479
1480 req->type = REQ_BUSY;
1481 req->fd = delay < 0. ? 0 : delay;
1482 req->fd2 = delay < 0. ? 0 : 1000. * (delay - req->fd);
1483
1484 REQ_SEND;
1485 }
1486
1487 void
1488 aio_group (callback=&PL_sv_undef)
1489 SV * callback
1490 PROTOTYPE: ;$
1491 PPCODE:
1492 {
1493 dREQ;
1494
1495 req->type = REQ_GROUP;
1496 req_send (req);
1497
1498 XPUSHs (req_sv (req, AIO_GRP_KLASS));
1499 }
1500
1501 void
1502 aio_nop (callback=&PL_sv_undef)
1503 SV * callback
1504 PPCODE:
1505 {
1506 dREQ;
1507
1508 req->type = REQ_NOP;
1509
1510 REQ_SEND;
1511 }
1512
1513 int
1514 aioreq_pri (int pri = 0)
1515 PROTOTYPE: ;$
1516 CODE:
1517 RETVAL = next_pri - PRI_BIAS;
1518 if (items > 0)
1519 {
1520 if (pri < PRI_MIN) pri = PRI_MIN;
1521 if (pri > PRI_MAX) pri = PRI_MAX;
1522 next_pri = pri + PRI_BIAS;
1523 }
1524 OUTPUT:
1525 RETVAL
1526
1527 void
1528 aioreq_nice (int nice = 0)
1529 CODE:
1530 nice = next_pri - nice;
1531 if (nice < PRI_MIN) nice = PRI_MIN;
1532 if (nice > PRI_MAX) nice = PRI_MAX;
1533 next_pri = nice + PRI_BIAS;
1534
1535 void
1536 flush ()
1537 PROTOTYPE:
1538 CODE:
1539 while (nreqs)
1540 {
1541 poll_wait ();
1542 poll_cb (0);
1543 }
1544
1545 void
1546 poll()
1547 PROTOTYPE:
1548 CODE:
1549 if (nreqs)
1550 {
1551 poll_wait ();
1552 poll_cb (0);
1553 }
1554
1555 int
1556 poll_fileno()
1557 PROTOTYPE:
1558 CODE:
1559 RETVAL = respipe [0];
1560 OUTPUT:
1561 RETVAL
1562
1563 int
1564 poll_cb(...)
1565 PROTOTYPE:
1566 CODE:
1567 RETVAL = poll_cb (0);
1568 OUTPUT:
1569 RETVAL
1570
1571 int
1572 poll_some(int max = 0)
1573 PROTOTYPE: $
1574 CODE:
1575 RETVAL = poll_cb (max);
1576 OUTPUT:
1577 RETVAL
1578
1579 void
1580 poll_wait()
1581 PROTOTYPE:
1582 CODE:
1583 if (nreqs)
1584 poll_wait ();
1585
1586 int
1587 nreqs()
1588 PROTOTYPE:
1589 CODE:
1590 RETVAL = nreqs;
1591 OUTPUT:
1592 RETVAL
1593
1594 int
1595 nready()
1596 PROTOTYPE:
1597 CODE:
1598 RETVAL = get_nready ();
1599 OUTPUT:
1600 RETVAL
1601
1602 int
1603 npending()
1604 PROTOTYPE:
1605 CODE:
1606 RETVAL = get_npending ();
1607 OUTPUT:
1608 RETVAL
1609
1610 PROTOTYPES: DISABLE
1611
1612 MODULE = IO::AIO PACKAGE = IO::AIO::REQ
1613
1614 void
1615 cancel (aio_req_ornot req)
1616 CODE:
1617 req_cancel (req);
1618
1619 void
1620 cb (aio_req_ornot req, SV *callback=&PL_sv_undef)
1621 CODE:
1622 SvREFCNT_dec (req->callback);
1623 req->callback = newSVsv (callback);
1624
1625 MODULE = IO::AIO PACKAGE = IO::AIO::GRP
1626
1627 void
1628 add (aio_req grp, ...)
1629 PPCODE:
1630 {
1631 int i;
1632 aio_req req;
1633
1634 if (grp->fd == 2)
1635 croak ("cannot add requests to IO::AIO::GRP after the group finished");
1636
1637 for (i = 1; i < items; ++i )
1638 {
1639 if (GIMME_V != G_VOID)
1640 XPUSHs (sv_2mortal (newSVsv (ST (i))));
1641
1642 req = SvAIO_REQ (ST (i));
1643
1644 if (req)
1645 {
1646 ++grp->length;
1647 req->grp = grp;
1648
1649 req->grp_prev = 0;
1650 req->grp_next = grp->grp_first;
1651
1652 if (grp->grp_first)
1653 grp->grp_first->grp_prev = req;
1654
1655 grp->grp_first = req;
1656 }
1657 }
1658 }
1659
1660 void
1661 cancel_subs (aio_req_ornot req)
1662 CODE:
1663 req_cancel_subs (req);
1664
1665 void
1666 result (aio_req grp, ...)
1667 CODE:
1668 {
1669 int i;
1670 AV *av;
1671
1672 grp->errorno = errno;
1673
1674 av = newAV ();
1675
1676 for (i = 1; i < items; ++i )
1677 av_push (av, newSVsv (ST (i)));
1678
1679 SvREFCNT_dec (grp->data);
1680 grp->data = (SV *)av;
1681 }
1682
1683 void
1684 errno (aio_req grp, int errorno = errno)
1685 CODE:
1686 grp->errorno = errorno;
1687
1688 void
1689 limit (aio_req grp, int limit)
1690 CODE:
1691 grp->fd2 = limit;
1692 aio_grp_feed (grp);
1693
1694 void
1695 feed (aio_req grp, SV *callback=&PL_sv_undef)
1696 CODE:
1697 {
1698 SvREFCNT_dec (grp->fh2);
1699 grp->fh2 = newSVsv (callback);
1700
1701 if (grp->fd2 <= 0)
1702 grp->fd2 = 2;
1703
1704 aio_grp_feed (grp);
1705 }
1706