/* solaris */ #define _POSIX_PTHREAD_SEMANTICS 1 #if __linux && !defined(_GNU_SOURCE) # define _GNU_SOURCE #endif /* just in case */ #define _REENTRANT 1 #include #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include #include #include #include #include #include #include #include #include #include /* number of seconds after which idle threads exit */ #define IDLE_TIMEOUT 10 /* wether word reads are potentially non-atomic. * this is conservatice, likely most arches this runs * on have atomic word read/writes. */ #ifndef WORDACCESS_UNSAFE # if __i386 || __x86_64 # define WORDACCESS_UNSAFE 0 # else # define WORDACCESS_UNSAFE 1 # endif #endif typedef SV SV8; /* byte-sv, used for argument-checking */ enum { REQ_QUIT, REQ_ENV_OPEN, REQ_ENV_CLOSE, REQ_DB_OPEN, REQ_DB_CLOSE, }; typedef struct aio_cb { struct aio_cb *volatile next; SV *callback; int type, pri, errorno; DB_ENV *env; DB *db; DB_TXN *txn; DBC *cursor; int int1, int2; U32 uint1, uint2; char *buf1, *buf2; } aio_cb; typedef aio_cb *aio_req; enum { PRI_MIN = -4, PRI_MAX = 4, DEFAULT_PRI = 0, PRI_BIAS = -PRI_MIN, NUM_PRI = PRI_MAX + PRI_BIAS + 1, }; #define AIO_TICKS ((1000000 + 1023) >> 10) static unsigned int max_poll_time = 0; static unsigned int max_poll_reqs = 0; /* calculcate time difference in ~1/AIO_TICKS of a second */ static int tvdiff (struct timeval *tv1, struct timeval *tv2) { return (tv2->tv_sec - tv1->tv_sec ) * AIO_TICKS + ((tv2->tv_usec - tv1->tv_usec) >> 10); } static int next_pri = DEFAULT_PRI + PRI_BIAS; static unsigned int started, idle, wanted; #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP) # define AIO_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP #else # define AIO_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER #endif #define LOCK(mutex) pthread_mutex_lock (&(mutex)) #define UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) /* worker threads management */ static pthread_mutex_t wrklock = AIO_MUTEX_INIT; typedef struct worker { /* locked by wrklock */ struct worker *prev, *next; pthread_t tid; /* locked by reslock, reqlock or wrklock */ aio_req req; /* currently processed request */ void *dbuf; DIR *dirp; } worker; static worker wrk_first = { &wrk_first, &wrk_first, 0 }; static void worker_clear (worker *wrk) { } static void worker_free (worker *wrk) { wrk->next->prev = wrk->prev; wrk->prev->next = wrk->next; free (wrk); } static volatile unsigned int nreqs, nready, npending; static volatile unsigned int max_idle = 4; static volatile unsigned int max_outstanding = 0xffffffff; static int respipe [2]; static pthread_mutex_t reslock = AIO_MUTEX_INIT; static pthread_mutex_t reqlock = AIO_MUTEX_INIT; static pthread_cond_t reqwait = PTHREAD_COND_INITIALIZER; #if WORDACCESS_UNSAFE static unsigned int get_nready () { unsigned int retval; LOCK (reqlock); retval = nready; UNLOCK (reqlock); return retval; } static unsigned int get_npending () { unsigned int retval; LOCK (reslock); retval = npending; UNLOCK (reslock); return retval; } static unsigned int get_nthreads () { unsigned int retval; LOCK (wrklock); retval = started; UNLOCK (wrklock); return retval; } #else # define get_nready() nready # define get_npending() npending # define get_nthreads() started #endif /* * a somewhat faster data structure might be nice, but * with 8 priorities this actually needs <20 insns * per shift, the most expensive operation. */ typedef struct { aio_req qs[NUM_PRI], qe[NUM_PRI]; /* qstart, qend */ int size; } reqq; static reqq req_queue; static reqq res_queue; int reqq_push (reqq *q, aio_req req) { int pri = req->pri; req->next = 0; if (q->qe[pri]) { q->qe[pri]->next = req; q->qe[pri] = req; } else q->qe[pri] = q->qs[pri] = req; return q->size++; } aio_req reqq_shift (reqq *q) { int pri; if (!q->size) return 0; --q->size; for (pri = NUM_PRI; pri--; ) { aio_req req = q->qs[pri]; if (req) { if (!(q->qs[pri] = req->next)) q->qe[pri] = 0; return req; } } abort (); } static int poll_cb (); static void req_free (aio_req req); static void req_cancel (aio_req req); static int req_invoke (aio_req req) { dSP; if (SvOK (req->callback)) { ENTER; SAVETMPS; PUSHMARK (SP); EXTEND (SP, 1); switch (req->type) { } PUTBACK; call_sv (req->callback, G_VOID | G_EVAL); SPAGAIN; FREETMPS; LEAVE; } return !SvTRUE (ERRSV); } static void req_free (aio_req req) { free (req->buf1); free (req->buf2); Safefree (req); } static void *aio_proc(void *arg); static void start_thread (void) { sigset_t fullsigset, oldsigset; pthread_attr_t attr; worker *wrk = calloc (1, sizeof (worker)); if (!wrk) croak ("unable to allocate worker thread data"); pthread_attr_init (&attr); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); #ifdef PTHREAD_SCOPE_PROCESS pthread_attr_setscope (&attr, PTHREAD_SCOPE_PROCESS); #endif sigfillset (&fullsigset); LOCK (wrklock); pthread_sigmask (SIG_SETMASK, &fullsigset, &oldsigset); if (pthread_create (&wrk->tid, &attr, aio_proc, (void *)wrk) == 0) { wrk->prev = &wrk_first; wrk->next = wrk_first.next; wrk_first.next->prev = wrk; wrk_first.next = wrk; ++started; } else free (wrk); pthread_sigmask (SIG_SETMASK, &oldsigset, 0); UNLOCK (wrklock); } static void maybe_start_thread () { if (get_nthreads () >= wanted) return; /* todo: maybe use idle here, but might be less exact */ if (0 <= (int)get_nthreads () + (int)get_npending () - (int)nreqs) return; start_thread (); } static void req_send (aio_req req) { ++nreqs; LOCK (reqlock); ++nready; reqq_push (&req_queue, req); pthread_cond_signal (&reqwait); UNLOCK (reqlock); maybe_start_thread (); } static void end_thread (void) { aio_req req; Newz (0, req, 1, aio_cb); req->type = REQ_QUIT; req->pri = PRI_MAX + PRI_BIAS; LOCK (reqlock); reqq_push (&req_queue, req); pthread_cond_signal (&reqwait); UNLOCK (reqlock); LOCK (wrklock); --started; UNLOCK (wrklock); } static void set_max_idle (int nthreads) { if (WORDACCESS_UNSAFE) LOCK (reqlock); max_idle = nthreads <= 0 ? 1 : nthreads; if (WORDACCESS_UNSAFE) UNLOCK (reqlock); } static void min_parallel (int nthreads) { if (wanted < nthreads) wanted = nthreads; } static void max_parallel (int nthreads) { if (wanted > nthreads) wanted = nthreads; while (started > wanted) end_thread (); } static void poll_wait () { fd_set rfd; while (nreqs) { int size; if (WORDACCESS_UNSAFE) LOCK (reslock); size = res_queue.size; if (WORDACCESS_UNSAFE) UNLOCK (reslock); if (size) return; maybe_start_thread (); FD_ZERO(&rfd); FD_SET(respipe [0], &rfd); select (respipe [0] + 1, &rfd, 0, 0, 0); } } static int poll_cb () { dSP; int count = 0; int maxreqs = max_poll_reqs; int do_croak = 0; struct timeval tv_start, tv_now; aio_req req; if (max_poll_time) gettimeofday (&tv_start, 0); for (;;) { for (;;) { maybe_start_thread (); LOCK (reslock); req = reqq_shift (&res_queue); if (req) { --npending; if (!res_queue.size) { /* read any signals sent by the worker threads */ char buf [4]; while (read (respipe [0], buf, 4) == 4) ; } } UNLOCK (reslock); if (!req) break; --nreqs; if (!req_invoke (req)) { req_free (req); croak (0); } count++; req_free (req); if (maxreqs && !--maxreqs) break; if (max_poll_time) { gettimeofday (&tv_now, 0); if (tvdiff (&tv_start, &tv_now) >= max_poll_time) break; } } if (nreqs <= max_outstanding) break; poll_wait (); ++maxreqs; } return count; } static void create_pipe () { if (pipe (respipe)) croak ("unable to initialize result pipe"); if (fcntl (respipe [0], F_SETFL, O_NONBLOCK)) croak ("cannot set result pipe to nonblocking mode"); if (fcntl (respipe [1], F_SETFL, O_NONBLOCK)) croak ("cannot set result pipe to nonblocking mode"); } /*****************************************************************************/ static void *aio_proc (void *thr_arg) { aio_req req; struct timespec ts; worker *self = (worker *)thr_arg; /* try to distribute timeouts somewhat evenly */ ts.tv_nsec = (((unsigned long)self + (unsigned long)ts.tv_sec) & 1023UL) * (1000000000UL / 1024UL); for (;;) { ts.tv_sec = time (0) + IDLE_TIMEOUT; LOCK (reqlock); for (;;) { self->req = req = reqq_shift (&req_queue); if (req) break; ++idle; if (pthread_cond_timedwait (&reqwait, &reqlock, &ts) == ETIMEDOUT) { if (idle > max_idle) { --idle; UNLOCK (reqlock); LOCK (wrklock); --started; UNLOCK (wrklock); goto quit; } /* we are allowed to idle, so do so without any timeout */ pthread_cond_wait (&reqwait, &reqlock); ts.tv_sec = time (0) + IDLE_TIMEOUT; } --idle; } --nready; UNLOCK (reqlock); errno = 0; /* strictly unnecessary */ switch (req->type) { case REQ_QUIT: goto quit; default: //req->result = ENOSYS; break; } //req->errorno = errno; LOCK (reslock); ++npending; if (!reqq_push (&res_queue, req)) /* write a dummy byte to the pipe so fh becomes ready */ write (respipe [1], &respipe, 1); self->req = 0; worker_clear (self); UNLOCK (reslock); } quit: LOCK (wrklock); worker_free (self); UNLOCK (wrklock); return 0; } /*****************************************************************************/ static void atfork_prepare (void) { LOCK (wrklock); LOCK (reqlock); LOCK (reslock); } static void atfork_parent (void) { UNLOCK (reslock); UNLOCK (reqlock); UNLOCK (wrklock); } static void atfork_child (void) { aio_req prv; while (prv = reqq_shift (&req_queue)) req_free (prv); while (prv = reqq_shift (&res_queue)) req_free (prv); while (wrk_first.next != &wrk_first) { worker *wrk = wrk_first.next; if (wrk->req) req_free (wrk->req); worker_clear (wrk); worker_free (wrk); } started = 0; idle = 0; nreqs = 0; nready = 0; npending = 0; close (respipe [0]); close (respipe [1]); create_pipe (); atfork_parent (); } #define dREQ(reqtype) \ aio_req req; \ int req_pri = next_pri; \ next_pri = DEFAULT_PRI + PRI_BIAS; \ \ if (SvOK (callback) && !SvROK (callback)) \ croak ("callback must be undef or of reference type"); \ \ Newz (0, req, 1, aio_cb); \ if (!req) \ croak ("out of memory during aio_req allocation"); \ \ req->callback = newSVsv (callback); \ req->type = (reqtype); \ req->pri = req_pri #define REQ_SEND \ req_send (req) #define SvPTR(var, arg, type, class) \ if (!SvOK (arg)) \ (var) = 0; \ else if (sv_derived_from ((arg), # class)) \ { \ IV tmp = SvIV ((SV*) SvRV (arg)); \ (var) = INT2PTR (type, tmp); \ } \ else \ Perl_croak (# var " is not of type " # type) MODULE = BDB PACKAGE = BDB PROTOTYPES: ENABLE BOOT: { HV *stash = gv_stashpv ("BDB", 1); static const struct { const char *name; IV iv; } *civ, const_iv[] = { #define const_iv(name) { # name, (IV)DB_ ## name }, const_iv (RPCCLIENT) const_iv (INIT_CDB) const_iv (INIT_LOCK) const_iv (INIT_LOG) const_iv (INIT_MPOOL) const_iv (INIT_REP) const_iv (INIT_TXN) const_iv (RECOVER) const_iv (INIT_TXN) const_iv (RECOVER_FATAL) const_iv (CREATE) const_iv (USE_ENVIRON) const_iv (USE_ENVIRON_ROOT) const_iv (LOCKDOWN) const_iv (PRIVATE) const_iv (REGISTER) const_iv (SYSTEM_MEM) const_iv (AUTO_COMMIT) const_iv (CDB_ALLDB) const_iv (DIRECT_DB) const_iv (DIRECT_LOG) const_iv (DSYNC_DB) const_iv (DSYNC_LOG) const_iv (LOG_AUTOREMOVE) const_iv (LOG_INMEMORY) const_iv (NOLOCKING) const_iv (MULTIVERSION) const_iv (NOMMAP) const_iv (NOPANIC) const_iv (OVERWRITE) const_iv (PANIC_ENVIRONMENT) const_iv (REGION_INIT) const_iv (TIME_NOTGRANTED) const_iv (TXN_NOSYNC) const_iv (TXN_SNAPSHOT) const_iv (TXN_WRITE_NOSYNC) const_iv (YIELDCPU) const_iv (ENCRYPT_AES) const_iv (XA_CREATE) const_iv (BTREE) const_iv (HASH) const_iv (QUEUE) const_iv (RECNO) const_iv (UNKNOWN) const_iv (EXCL) const_iv (READ_UNCOMMITTED) const_iv (TRUNCATE) const_iv (NOSYNC) const_iv (CHKSUM) const_iv (ENCRYPT) const_iv (TXN_NOT_DURABLE) const_iv (DUP) const_iv (DUPSORT) const_iv (RECNUM) const_iv (RENUMBER) const_iv (REVSPLITOFF) const_iv (INORDER) const_iv (CONSUME) const_iv (CONSUME_WAIT) const_iv (SNAPSHOT) const_iv (JOIN_ITEM) const_iv (RMW) const_iv (NOTFOUND) const_iv (KEYEMPTY) const_iv (LOCK_DEADLOCK) const_iv (LOCK_NOTGRANTED) const_iv (RUNRECOVERY) }; for (civ = const_iv + sizeof (const_iv) / sizeof (const_iv [0]); civ-- > const_iv; ) newCONSTSUB (stash, (char *)civ->name, newSViv (civ->iv)); create_pipe (); pthread_atfork (atfork_prepare, atfork_parent, atfork_child); } void max_poll_reqs (int nreqs) PROTOTYPE: $ CODE: max_poll_reqs = nreqs; void max_poll_time (double nseconds) PROTOTYPE: $ CODE: max_poll_time = nseconds * AIO_TICKS; void min_parallel (int nthreads) PROTOTYPE: $ void max_parallel (int nthreads) PROTOTYPE: $ void max_idle (int nthreads) PROTOTYPE: $ CODE: set_max_idle (nthreads); int max_outstanding (int maxreqs) PROTOTYPE: $ CODE: RETVAL = max_outstanding; max_outstanding = maxreqs; OUTPUT: RETVAL int bdbreq_pri (int pri = 0) PROTOTYPE: ;$ CODE: RETVAL = next_pri - PRI_BIAS; if (items > 0) { if (pri < PRI_MIN) pri = PRI_MIN; if (pri > PRI_MAX) pri = PRI_MAX; next_pri = pri + PRI_BIAS; } OUTPUT: RETVAL void bdbreq_nice (int nice = 0) CODE: nice = next_pri - nice; if (nice < PRI_MIN) nice = PRI_MIN; if (nice > PRI_MAX) nice = PRI_MAX; next_pri = nice + PRI_BIAS; void flush () PROTOTYPE: CODE: while (nreqs) { poll_wait (); poll_cb (); } int poll() PROTOTYPE: CODE: poll_wait (); RETVAL = poll_cb (); OUTPUT: RETVAL int poll_fileno() PROTOTYPE: CODE: RETVAL = respipe [0]; OUTPUT: RETVAL int poll_cb(...) PROTOTYPE: CODE: RETVAL = poll_cb (); OUTPUT: RETVAL void poll_wait() PROTOTYPE: CODE: poll_wait (); int nreqs() PROTOTYPE: CODE: RETVAL = nreqs; OUTPUT: RETVAL int nready() PROTOTYPE: CODE: RETVAL = get_nready (); OUTPUT: RETVAL int npending() PROTOTYPE: CODE: RETVAL = get_npending (); OUTPUT: RETVAL int nthreads() PROTOTYPE: CODE: if (WORDACCESS_UNSAFE) LOCK (wrklock); RETVAL = started; if (WORDACCESS_UNSAFE) UNLOCK (wrklock); OUTPUT: RETVAL DB_ENV * bdb_env_create (U32 env_flags = 0) CODE: { int err = db_env_create (&RETVAL, env_flags); if (err) croak ("db_env_create: %s", db_strerror (err)); } void bdb_env_open (DB_ENV *env, char *db_home, U32 open_flags, int mode, SV *callback = 0) CODE: { dREQ (REQ_ENV_OPEN); req->env = env; req->uint1 = open_flags; req->int1 = mode; req->buf1 = strdup (db_home); REQ_SEND; } void bdb_env_close (DB_ENV *env, U32 flags = 0, SV *callback = 0) CODE: { dREQ (REQ_ENV_CLOSE); req->env = env; req->uint1 = flags; REQ_SEND; } DB * bdb_db_create (DB_ENV *env = 0, U32 flags = 0) CODE: { int err = db_create (&RETVAL, env, flags); if (err) croak ("db_env_create: %s", db_strerror (err)); } void bdb_db_open (DB *db, DB_TXN *txnid, const char *file, const char *database, int type, U32 flags, int mode, SV *callback = 0) CODE: { dREQ (REQ_DB_OPEN); req->db = db; req->txn = txnid; req->buf1 = strdup (file); req->buf2 = strdup (database); req->int1 = type; req->uint1 = flags; req->int2 = mode; REQ_SEND; } void bdb_db_close (DB *db, U32 flags = 0, SV *callback = 0) CODE: { dREQ (REQ_DB_CLOSE); req->db = db; req->uint1 = flags; REQ_SEND; } MODULE = BDB PACKAGE = BDB::Env int set_cachesize (DB_ENV *env, U32 gbytes, U32 bytes, int ncache = 0) int set_flags (DB_ENV *env, U32 flags, int onoff) int set_encrypt (DB_ENV *env, const char *password, U32 flags) MODULE = BDB PACKAGE = BDB::Db int set_cachesize (DB *db, U32 gbytes, U32 bytes, int ncache = 0) int set_flags (DB *env, U32 flags, int onoff) int set_encrypt (DB *db, const char *password, U32 flags) int set_lorder (DB *db, int lorder) int set_bt_minkey (DB *db, U32 minkey) int set_re_delim(DB *db, int delim); int set_re_pad (DB *db, int re_pad) int set_re_source (DB *db, char *source) int set_re_len (DB *db, U32 re_len) int set_h_ffactor (DB *db, U32 h_ffactor) int set_h_nelem (DB *db, U32 h_nelem) int set_q_extentsize (DB *db, U32 extentsize)