… | |
… | |
3 | BDB - Asynchronous Berkeley DB access |
3 | BDB - Asynchronous Berkeley DB access |
4 | |
4 | |
5 | =head1 SYNOPSIS |
5 | =head1 SYNOPSIS |
6 | |
6 | |
7 | use BDB; |
7 | use BDB; |
|
|
8 | |
|
|
9 | my $env = db_env_create; |
|
|
10 | |
|
|
11 | mkdir "bdtest", 0700; |
|
|
12 | db_env_open |
|
|
13 | $env, |
|
|
14 | "bdtest", |
|
|
15 | BDB::INIT_LOCK | BDB::INIT_LOG | BDB::INIT_MPOOL |
|
|
16 | | BDB::INIT_TXN | BDB::RECOVER | BDB::USE_ENVIRON | BDB::CREATE, |
|
|
17 | 0600; |
|
|
18 | |
|
|
19 | $env->set_flags (BDB::AUTO_COMMIT | BDB::TXN_NOSYNC, 1); |
|
|
20 | |
|
|
21 | my $db = db_create $env; |
|
|
22 | db_open $db, undef, "table", undef, BDB::BTREE, BDB::AUTO_COMMIT | BDB::CREATE |
|
|
23 | | BDB::READ_UNCOMMITTED, 0600; |
|
|
24 | db_put $db, undef, "key", "data", 0, sub { |
|
|
25 | db_del $db, undef, "key"; |
|
|
26 | }; |
|
|
27 | db_sync $db; |
|
|
28 | |
|
|
29 | # when you also use Coro, management is easy: |
|
|
30 | use Coro::BDB; |
|
|
31 | |
|
|
32 | # automatic result processing with AnyEvent: |
|
|
33 | our $FH; open $FH, "<&=" . BDB::poll_fileno; |
|
|
34 | our $WATCHER = AnyEvent->io (fh => $FH, poll => 'r', cb => \&BDB::poll_cb); |
|
|
35 | |
|
|
36 | # automatic result processing with EV: |
|
|
37 | my $WATCHER = EV::io BDB::poll_fileno, EV::READ, \&BDB::poll_cb; |
|
|
38 | |
|
|
39 | # with Glib: |
|
|
40 | add_watch Glib::IO BDB::poll_fileno, |
|
|
41 | in => sub { BDB::poll_cb; 1 }; |
|
|
42 | |
|
|
43 | # or simply flush manually |
|
|
44 | BDB::flush; |
|
|
45 | |
8 | |
46 | |
9 | =head1 DESCRIPTION |
47 | =head1 DESCRIPTION |
10 | |
48 | |
11 | See the BerkeleyDB documentation (L<http://www.oracle.com/technology/documentation/berkeley-db/db/index.html>). |
49 | See the BerkeleyDB documentation (L<http://www.oracle.com/technology/documentation/berkeley-db/db/index.html>). |
12 | The BDB API is very similar to the C API (the translation has been very faithful). |
50 | The BDB API is very similar to the C API (the translation has been very faithful). |
… | |
… | |
72 | use strict 'vars'; |
110 | use strict 'vars'; |
73 | |
111 | |
74 | use base 'Exporter'; |
112 | use base 'Exporter'; |
75 | |
113 | |
76 | BEGIN { |
114 | BEGIN { |
77 | our $VERSION = '1.1'; |
115 | our $VERSION = '1.43'; |
78 | |
116 | |
79 | our @BDB_REQ = qw( |
117 | our @BDB_REQ = qw( |
80 | db_env_open db_env_close db_env_txn_checkpoint db_env_lock_detect |
118 | db_env_open db_env_close db_env_txn_checkpoint db_env_lock_detect |
81 | db_env_memp_sync db_env_memp_trickle |
119 | db_env_memp_sync db_env_memp_trickle |
82 | db_open db_close db_compact db_sync db_put db_get db_pget db_del db_key_range |
120 | db_open db_close db_compact db_sync db_upgrade |
|
|
121 | db_put db_get db_pget db_del db_key_range |
83 | db_txn_commit db_txn_abort db_txn_finish |
122 | db_txn_commit db_txn_abort db_txn_finish |
84 | db_c_close db_c_count db_c_put db_c_get db_c_pget db_c_del |
123 | db_c_close db_c_count db_c_put db_c_get db_c_pget db_c_del |
85 | db_sequence_open db_sequence_close |
124 | db_sequence_open db_sequence_close |
86 | db_sequence_get db_sequence_remove |
125 | db_sequence_get db_sequence_remove |
87 | ); |
126 | ); |
… | |
… | |
99 | |
138 | |
100 | =head2 BERKELEYDB FUNCTIONS |
139 | =head2 BERKELEYDB FUNCTIONS |
101 | |
140 | |
102 | All of these are functions. The create functions simply return a new |
141 | All of these are functions. The create functions simply return a new |
103 | object and never block. All the remaining functions all take an optional |
142 | object and never block. All the remaining functions all take an optional |
104 | callback as last argument. If it is missing, then the fucntion will be |
143 | callback as last argument. If it is missing, then the function will be |
105 | executed synchronously. |
144 | executed synchronously. In both cases, C<$!> will reflect the return value |
|
|
145 | of the function. |
106 | |
146 | |
107 | BDB functions that cannot block (mostly functions that manipulate |
147 | BDB functions that cannot block (mostly functions that manipulate |
108 | settings) are method calls on the relevant objects, so the rule of thumb |
148 | settings) are method calls on the relevant objects, so the rule of thumb |
109 | is: if its a method, its not blocking, if its a function, it takes a |
149 | is: if its a method, its not blocking, if its a function, it takes a |
110 | callback as last argument. |
150 | callback as last argument. |
… | |
… | |
145 | |
185 | |
146 | db_open (DB *db, DB_TXN_ornull *txnid, octetstring file, octetstring database, int type, U32 flags, int mode, SV *callback = &PL_sv_undef) |
186 | db_open (DB *db, DB_TXN_ornull *txnid, octetstring file, octetstring database, int type, U32 flags, int mode, SV *callback = &PL_sv_undef) |
147 | flags: AUTO_COMMIT CREATE EXCL MULTIVERSION NOMMAP RDONLY READ_UNCOMMITTED THREAD TRUNCATE |
187 | flags: AUTO_COMMIT CREATE EXCL MULTIVERSION NOMMAP RDONLY READ_UNCOMMITTED THREAD TRUNCATE |
148 | db_close (DB *db, U32 flags = 0, SV *callback = &PL_sv_undef) |
188 | db_close (DB *db, U32 flags = 0, SV *callback = &PL_sv_undef) |
149 | flags: DB_NOSYNC |
189 | flags: DB_NOSYNC |
|
|
190 | db_upgrade (DB *db, octetstring file, U32 flags = 0, SV *callback = &PL_sv_undef) |
150 | db_compact (DB *db, DB_TXN_ornull *txn = 0, SV *start = 0, SV *stop = 0, SV *unused1 = 0, U32 flags = DB_FREE_SPACE, SV *unused2 = 0, SV *callback = &PL_sv_undef) |
191 | db_compact (DB *db, DB_TXN_ornull *txn = 0, SV *start = 0, SV *stop = 0, SV *unused1 = 0, U32 flags = DB_FREE_SPACE, SV *unused2 = 0, SV *callback = &PL_sv_undef) |
151 | flags: FREELIST_ONLY FREE_SPACE |
192 | flags: FREELIST_ONLY FREE_SPACE |
152 | db_sync (DB *db, U32 flags = 0, SV *callback = &PL_sv_undef) |
193 | db_sync (DB *db, U32 flags = 0, SV *callback = &PL_sv_undef) |
153 | db_key_range (DB *db, DB_TXN_ornull *txn, SV *key, SV *key_range, U32 flags = 0, SV *callback = &PL_sv_undef) |
194 | db_key_range (DB *db, DB_TXN_ornull *txn, SV *key, SV *key_range, U32 flags = 0, SV *callback = &PL_sv_undef) |
154 | db_put (DB *db, DB_TXN_ornull *txn, SV *key, SV *data, U32 flags = 0, SV *callback = &PL_sv_undef) |
195 | db_put (DB *db, DB_TXN_ornull *txn, SV *key, SV *data, U32 flags = 0, SV *callback = &PL_sv_undef) |
… | |
… | |
179 | db_sequence_remove (DB_SEQUENCE *seq, DB_TXN_ornull *txnid = 0, U32 flags = 0, SV *callback = &PL_sv_undef) |
220 | db_sequence_remove (DB_SEQUENCE *seq, DB_TXN_ornull *txnid = 0, U32 flags = 0, SV *callback = &PL_sv_undef) |
180 | flags: TXN_NOSYNC |
221 | flags: TXN_NOSYNC |
181 | |
222 | |
182 | =head4 db_txn_finish (DB_TXN *txn, U32 flags = 0, SV *callback = &PL_sv_undef) |
223 | =head4 db_txn_finish (DB_TXN *txn, U32 flags = 0, SV *callback = &PL_sv_undef) |
183 | |
224 | |
184 | This is not a Berkeley DB function but a BDB module extension. It is very |
225 | This is not actually a Berkeley DB function but a BDB module |
|
|
226 | extension. The background for this exytension is: It is very annoying to |
185 | annoying to have to check every single BDB function for error returns and |
227 | have to check every single BDB function for error returns and provide a |
186 | provide a codepath out of your transaction. While the BDB module still |
228 | codepath out of your transaction. While the BDB module still makes this |
187 | makes this possible, it contains the following extensions: |
229 | possible, it contains the following extensions: |
188 | |
230 | |
189 | When a transaction-protected function returns any operating system |
231 | When a transaction-protected function returns any operating system |
190 | error (errno > 0), BDB will set the C<TXN_DEADLOCK> flag on the |
232 | error (errno > 0), BDB will set the C<TXN_DEADLOCK> flag on the |
191 | transaction. This flag is also set by Berkeley DB functions externally |
233 | transaction. This flag is also set by Berkeley DB functions themselves |
192 | when an operation fails with LOCK_DEADLOCK, and it causes all further |
234 | when an operation fails with LOCK_DEADLOCK, and it causes all further |
193 | operations on that transaction (including C<db_txn_commit>) to fail. |
235 | operations on that transaction (including C<db_txn_commit>) to fail. |
194 | |
236 | |
195 | The C<db_txn_finish> request will look at this flag, and, if it is set, |
237 | The C<db_txn_finish> request will look at this flag, and, if it is set, |
196 | will automatically call C<db_txn_abort> (setting errno to C<LOCK_DEADLOCK> |
238 | will automatically call C<db_txn_abort> (setting errno to C<LOCK_DEADLOCK> |
197 | if it isn't set). If it isn't set, it will call C<db_txn_commit> and |
239 | if it isn't set to something else yet). If it isn't set, it will call |
198 | return the error normally. |
240 | C<db_txn_commit> and return the error normally. |
199 | |
241 | |
200 | How to use this? Easy: just write your transaction normally: |
242 | How to use this? Easy: just write your transaction normally: |
201 | |
243 | |
202 | my $txn = $db_env->txn_begin; |
244 | my $txn = $db_env->txn_begin; |
203 | db_get $db, $txn, "key", my $data; |
245 | db_get $db, $txn, "key", my $data; |
… | |
… | |
208 | That is, handle only the expected errors. If something unexpected happens |
250 | That is, handle only the expected errors. If something unexpected happens |
209 | (EIO, LOCK_NOTGRANTED or a deadlock in either db_get or db_put), then the remaining |
251 | (EIO, LOCK_NOTGRANTED or a deadlock in either db_get or db_put), then the remaining |
210 | requests (db_put in this case) will simply be skipped (they will fail with |
252 | requests (db_put in this case) will simply be skipped (they will fail with |
211 | LOCK_DEADLOCK) and the transaction will be aborted. |
253 | LOCK_DEADLOCK) and the transaction will be aborted. |
212 | |
254 | |
213 | You cna use the C<< $txn->failed >> method to check wether a transaction |
255 | You can use the C<< $txn->failed >> method to check wether a transaction |
214 | has failed in this way and abort further processing (excluding |
256 | has failed in this way and abort further processing (excluding |
215 | C<db_txn_finish>). |
257 | C<db_txn_finish>). |
216 | |
258 | |
217 | =head3 DB_ENV/database environment methods |
259 | =head3 DB_ENV/database environment methods |
218 | |
260 | |
… | |
… | |
241 | $int = $env->set_lk_max_lockers (U32 max) |
283 | $int = $env->set_lk_max_lockers (U32 max) |
242 | $int = $env->set_lk_max_locks (U32 max) |
284 | $int = $env->set_lk_max_locks (U32 max) |
243 | $int = $env->set_lk_max_objects (U32 max) |
285 | $int = $env->set_lk_max_objects (U32 max) |
244 | $int = $env->set_lg_bsize (U32 max) |
286 | $int = $env->set_lg_bsize (U32 max) |
245 | $int = $env->set_lg_max (U32 max) |
287 | $int = $env->set_lg_max (U32 max) |
|
|
288 | $int = $env->mutex_set_increment (U32 increment) |
|
|
289 | $int = $env->mutex_set_tas_spins (U32 tas_spins) |
|
|
290 | $int = $env->mutex_set_max (U32 max) |
|
|
291 | $int = $env->mutex_set_align (U32 align) |
246 | |
292 | |
247 | $txn = $env->txn_begin (DB_TXN_ornull *parent = 0, U32 flags = 0) |
293 | $txn = $env->txn_begin (DB_TXN_ornull *parent = 0, U32 flags = 0) |
248 | flags: READ_COMMITTED READ_UNCOMMITTED TXN_NOSYNC TXN_NOWAIT TXN_SNAPSHOT TXN_SYNC TXN_WAIT TXN_WRITE_NOSYNC |
294 | flags: READ_COMMITTED READ_UNCOMMITTED TXN_NOSYNC TXN_NOWAIT TXN_SNAPSHOT TXN_SYNC TXN_WAIT TXN_WRITE_NOSYNC |
249 | |
295 | |
250 | =head4 Example: |
296 | =head4 Example: |
… | |
… | |
345 | DESTROY (DBC_ornull *dbc) |
391 | DESTROY (DBC_ornull *dbc) |
346 | CODE: |
392 | CODE: |
347 | if (dbc) |
393 | if (dbc) |
348 | dbc->c_close (dbc); |
394 | dbc->c_close (dbc); |
349 | |
395 | |
|
|
396 | $int = $cursor->set_priority ($priority = PRIORITY_*) |
|
|
397 | |
350 | =head4 Example: |
398 | =head4 Example: |
351 | |
399 | |
352 | my $c = $db->cursor; |
400 | my $c = $db->cursor; |
353 | |
401 | |
354 | for (;;) { |
402 | for (;;) { |
… | |
… | |
387 | |
435 | |
388 | =head3 EVENT PROCESSING AND EVENT LOOP INTEGRATION |
436 | =head3 EVENT PROCESSING AND EVENT LOOP INTEGRATION |
389 | |
437 | |
390 | =over 4 |
438 | =over 4 |
391 | |
439 | |
|
|
440 | =item $msg = BDB::strerror [$errno] |
|
|
441 | |
|
|
442 | Returns the string corresponding to the given errno value. If no argument |
|
|
443 | is given, use C<$!>. |
|
|
444 | |
392 | =item $fileno = BDB::poll_fileno |
445 | =item $fileno = BDB::poll_fileno |
393 | |
446 | |
394 | Return the I<request result pipe file descriptor>. This filehandle must be |
447 | Return the I<request result pipe file descriptor>. This filehandle must be |
395 | polled for reading by some mechanism outside this module (e.g. Event or |
448 | polled for reading by some mechanism outside this module (e.g. Event or |
396 | select, see below or the SYNOPSIS). If the pipe becomes readable you have |
449 | select, see below or the SYNOPSIS). If the pipe becomes readable you have |
… | |
… | |
434 | interactiveness when perl is not fast enough to process all requests in |
487 | interactiveness when perl is not fast enough to process all requests in |
435 | time. |
488 | time. |
436 | |
489 | |
437 | For interactive programs, values such as C<0.01> to C<0.1> should be fine. |
490 | For interactive programs, values such as C<0.01> to C<0.1> should be fine. |
438 | |
491 | |
439 | Example: Install an Event watcher that automatically calls |
492 | Example: Install an EV watcher that automatically calls |
440 | BDB::poll_cb with low priority, to ensure that other parts of the |
493 | BDB::poll_cb with low priority, to ensure that other parts of the |
441 | program get the CPU sometimes even under high AIO load. |
494 | program get the CPU sometimes even under high load. |
442 | |
495 | |
443 | # try not to spend much more than 0.1s in poll_cb |
496 | # try not to spend much more than 0.1s in poll_cb |
444 | BDB::max_poll_time 0.1; |
497 | BDB::max_poll_time 0.1; |
445 | |
498 | |
446 | # use a low priority so other tasks have priority |
499 | my $bdb_poll = EV::io BDB::poll_fileno, EV::READ, \&BDB::poll_cb); |
447 | Event->io (fd => BDB::poll_fileno, |
|
|
448 | poll => 'r', nice => 1, |
|
|
449 | cb => &BDB::poll_cb); |
|
|
450 | |
500 | |
451 | =item BDB::poll_wait |
501 | =item BDB::poll_wait |
452 | |
502 | |
453 | If there are any outstanding requests and none of them in the result |
503 | If there are any outstanding requests and none of them in the result |
454 | phase, wait till the result filehandle becomes ready for reading (simply |
504 | phase, wait till the result filehandle becomes ready for reading (simply |
… | |
… | |
466 | |
516 | |
467 | BDB::poll_wait, BDB::poll_cb |
517 | BDB::poll_wait, BDB::poll_cb |
468 | |
518 | |
469 | =item BDB::flush |
519 | =item BDB::flush |
470 | |
520 | |
471 | Wait till all outstanding AIO requests have been handled. |
521 | Wait till all outstanding BDB requests have been handled. |
472 | |
522 | |
473 | Strictly equivalent to: |
523 | Strictly equivalent to: |
474 | |
524 | |
475 | BDB::poll_wait, BDB::poll_cb |
525 | BDB::poll_wait, BDB::poll_cb |
476 | while BDB::nreqs; |
526 | while BDB::nreqs; |
… | |
… | |
481 | |
531 | |
482 | =over 4 |
532 | =over 4 |
483 | |
533 | |
484 | =item BDB::min_parallel $nthreads |
534 | =item BDB::min_parallel $nthreads |
485 | |
535 | |
486 | Set the minimum number of AIO threads to C<$nthreads>. The current |
536 | Set the minimum number of BDB threads to C<$nthreads>. The current |
487 | default is C<8>, which means eight asynchronous operations can execute |
537 | default is C<8>, which means eight asynchronous operations can execute |
488 | concurrently at any one time (the number of outstanding requests, |
538 | concurrently at any one time (the number of outstanding requests, |
489 | however, is unlimited). |
539 | however, is unlimited). |
490 | |
540 | |
491 | BDB starts threads only on demand, when an AIO request is queued and |
541 | BDB starts threads only on demand, when an BDB request is queued and |
492 | no free thread exists. Please note that queueing up a hundred requests can |
542 | no free thread exists. Please note that queueing up a hundred requests can |
493 | create demand for a hundred threads, even if it turns out that everything |
543 | create demand for a hundred threads, even if it turns out that everything |
494 | is in the cache and could have been processed faster by a single thread. |
544 | is in the cache and could have been processed faster by a single thread. |
495 | |
545 | |
496 | It is recommended to keep the number of threads relatively low, as some |
546 | It is recommended to keep the number of threads relatively low, as some |
… | |
… | |
501 | Under most circumstances you don't need to call this function, as the |
551 | Under most circumstances you don't need to call this function, as the |
502 | module selects a default that is suitable for low to moderate load. |
552 | module selects a default that is suitable for low to moderate load. |
503 | |
553 | |
504 | =item BDB::max_parallel $nthreads |
554 | =item BDB::max_parallel $nthreads |
505 | |
555 | |
506 | Sets the maximum number of AIO threads to C<$nthreads>. If more than the |
556 | Sets the maximum number of BDB threads to C<$nthreads>. If more than the |
507 | specified number of threads are currently running, this function kills |
557 | specified number of threads are currently running, this function kills |
508 | them. This function blocks until the limit is reached. |
558 | them. This function blocks until the limit is reached. |
509 | |
559 | |
510 | While C<$nthreads> are zero, aio requests get queued but not executed |
560 | While C<$nthreads> are zero, aio requests get queued but not executed |
511 | until the number of threads has been increased again. |
561 | until the number of threads has been increased again. |
… | |
… | |
614 | |
664 | |
615 | =head2 FORK BEHAVIOUR |
665 | =head2 FORK BEHAVIOUR |
616 | |
666 | |
617 | This module should do "the right thing" when the process using it forks: |
667 | This module should do "the right thing" when the process using it forks: |
618 | |
668 | |
619 | Before the fork, IO::AIO enters a quiescent state where no requests |
669 | Before the fork, BDB enters a quiescent state where no requests |
620 | can be added in other threads and no results will be processed. After |
670 | can be added in other threads and no results will be processed. After |
621 | the fork the parent simply leaves the quiescent state and continues |
671 | the fork the parent simply leaves the quiescent state and continues |
622 | request/result processing, while the child frees the request/result queue |
672 | request/result processing, while the child frees the request/result queue |
623 | (so that the requests started before the fork will only be handled in the |
673 | (so that the requests started before the fork will only be handled in the |
624 | parent). Threads will be started on demand until the limit set in the |
674 | parent). Threads will be started on demand until the limit set in the |
625 | parent process has been reached again. |
675 | parent process has been reached again. |
626 | |
676 | |
627 | In short: the parent will, after a short pause, continue as if fork had |
677 | In short: the parent will, after a short pause, continue as if fork had |
628 | not been called, while the child will act as if IO::AIO has not been used |
678 | not been called, while the child will act as if BDB has not been used |
629 | yet. |
679 | yet. |
|
|
680 | |
|
|
681 | Win32 note: there is no fork on win32, and perls emulation of it is too |
|
|
682 | broken to be supported, so do not use BDB in a windows pseudo-fork, better |
|
|
683 | yet, switch to a more capable platform. |
630 | |
684 | |
631 | =head2 MEMORY USAGE |
685 | =head2 MEMORY USAGE |
632 | |
686 | |
633 | Per-request usage: |
687 | Per-request usage: |
634 | |
688 | |
… | |
… | |
656 | TXN_DEADLOCK flag will be set on the transaction. See C<db_txn_finish>, |
710 | TXN_DEADLOCK flag will be set on the transaction. See C<db_txn_finish>, |
657 | above. |
711 | above. |
658 | |
712 | |
659 | =head1 SEE ALSO |
713 | =head1 SEE ALSO |
660 | |
714 | |
661 | L<Coro::AIO>. |
715 | L<Coro::BDB>, L<IO::AIO>. |
662 | |
716 | |
663 | =head1 AUTHOR |
717 | =head1 AUTHOR |
664 | |
718 | |
665 | Marc Lehmann <schmorp@schmorp.de> |
719 | Marc Lehmann <schmorp@schmorp.de> |
666 | http://home.schmorp.de/ |
720 | http://home.schmorp.de/ |