--- Coro/Coro.pm 2007/10/02 23:16:24 1.142 +++ Coro/Coro.pm 2008/11/18 08:59:46 1.222 @@ -4,44 +4,61 @@ =head1 SYNOPSIS - use Coro; - - async { - # some asynchronous thread of execution - }; - - # alternatively create an async coroutine like this: - - sub some_func : Coro { - # some more async code - } - - cede; + use Coro; + + async { + # some asynchronous thread of execution + print "2\n"; + cede; # yield back to main + print "4\n"; + }; + print "1\n"; + cede; # yield to coroutine + print "3\n"; + cede; # and again + + # use locking + use Coro::Semaphore; + my $lock = new Coro::Semaphore; + my $locked; + + $lock->down; + $locked = 1; + $lock->up; =head1 DESCRIPTION -This module collection manages coroutines. Coroutines are similar -to threads but don't run in parallel at the same time even on SMP -machines. The specific flavor of coroutine used in this module also -guarantees you that it will not switch between coroutines unless +This module collection manages coroutines. Coroutines are similar to +threads but don't (in general) run in parallel at the same time even +on SMP machines. The specific flavor of coroutine used in this module +also guarantees you that it will not switch between coroutines unless necessary, at easily-identified points in your program, so locking and parallel access are rarely an issue, making coroutine programming much -safer than threads programming. +safer and easier than threads programming. -(Perl, however, does not natively support real threads but instead does a -very slow and memory-intensive emulation of processes using threads. This -is a performance win on Windows machines, and a loss everywhere else). +Unlike a normal perl program, however, coroutines allow you to have +multiple running interpreters that share data, which is especially useful +to code pseudo-parallel processes and for event-based programming, such as +multiple HTTP-GET requests running concurrently. See L to +learn more. + +Coroutines are also useful because Perl has no support for threads (the so +called "threads" that perl offers are nothing more than the (bad) process +emulation coming from the Windows platform: On standard operating systems +they serve no purpose whatsoever, except by making your programs slow and +making them use a lot of memory. Best disable them when building perl, or +aks your software vendor/distributor to do it for you). In this module, coroutines are defined as "callchain + lexical variables + @_ + $_ + $@ + $/ + C stack), that is, a coroutine has its own callchain, its own set of lexicals and its own set of perls most important global -variables. +variables (see L for more configuration). =cut package Coro; -use strict; +use strict qw(vars subs); no warnings "uninitialized"; use Coro::State; @@ -52,7 +69,7 @@ our $main; # main coroutine our $current; # current coroutine -our $VERSION = '3.8'; +our $VERSION = 5.0; our @EXPORT = qw(async async_pool cede schedule terminate current unblock_sub); our %EXPORT_TAGS = ( @@ -60,83 +77,58 @@ ); our @EXPORT_OK = (@{$EXPORT_TAGS{prio}}, qw(nready)); -{ - my @async; - my $init; - - # this way of handling attributes simply is NOT scalable ;() - sub import { - no strict 'refs'; - - Coro->export_to_level (1, @_); - - my $old = *{(caller)[0]."::MODIFY_CODE_ATTRIBUTES"}{CODE}; - *{(caller)[0]."::MODIFY_CODE_ATTRIBUTES"} = sub { - my ($package, $ref) = (shift, shift); - my @attrs; - for (@_) { - if ($_ eq "Coro") { - push @async, $ref; - unless ($init++) { - eval q{ - sub INIT { - &async(pop @async) while @async; - } - }; - } - } else { - push @attrs, $_; - } - } - return $old ? $old->($package, $ref, @attrs) : @attrs; - }; - } - -} - =over 4 -=item $main +=item $Coro::main -This coroutine represents the main program. +This variable stores the coroutine object that represents the main +program. While you cna C it and do most other things you can do to +coroutines, it is mainly useful to compare again C<$Coro::current>, to see +whether you are running in the main program or not. =cut -$main = new Coro; +# $main is now being initialised by Coro::State -=item $current (or as function: current) +=item $Coro::current -The current coroutine (the last coroutine switched to). The initial value -is C<$main> (of course). +The coroutine object representing the current coroutine (the last +coroutine that the Coro scheduler switched to). The initial value is +C<$Coro::main> (of course). -This variable is B I. It is provided for performance -reasons. If performance is not essential you are encouraged to use the -C function instead. +This variable is B I. You can take copies of the +value stored in it and use it as any other coroutine object, but you must +not otherwise modify the variable itself. =cut -$main->{desc} = "[main::]"; - -# maybe some other module used Coro::Specific before... -$main->{_specific} = $current->{_specific} - if $current; - -_set_current $main; +sub current() { $current } # [DEPRECATED] -sub current() { $current } +=item $Coro::idle -=item $idle +This variable is mainly useful to integrate Coro into event loops. It is +usually better to rely on L or LC, as this is +pretty low-level functionality. -A callback that is called whenever the scheduler finds no ready coroutines -to run. The default implementation prints "FATAL: deadlock detected" and -exits, because the program has no other way to continue. +This variable stores a callback that is called whenever the scheduler +finds no ready coroutines to run. The default implementation prints +"FATAL: deadlock detected" and exits, because the program has no other way +to continue. This hook is overwritten by modules such as C and -C to wait on an external event that hopefully wake up a +C to wait on an external event that hopefully wake up a coroutine so the scheduler can run it. +Note that the callback I, under any circumstances, block +the current coroutine. Normally, this is achieved by having an "idle +coroutine" that calls the event loop and then blocks again, and then +readying that coroutine in the idle handler. + +See L or L for examples of using this +technique. + Please note that if your callback recursively invokes perl (e.g. for event -handlers), then it must be prepared to be called recursively. +handlers), then it must be prepared to be called recursively itself. =cut @@ -154,7 +146,7 @@ # call all destruction callbacks $_->(@{$self->{_status}}) - for @{(delete $self->{_on_destroy}) || []}; + for @{ delete $self->{_on_destroy} || [] }; } # this coroutine is necessary because a coroutine @@ -170,30 +162,39 @@ &schedule; } }; -$manager->desc ("[coro manager]"); +$manager->{desc} = "[coro manager]"; $manager->prio (PRIO_MAX); -# static methods. not really. - =back -=head2 STATIC METHODS - -Static methods are actually functions that operate on the current coroutine only. +=head2 SIMPLE COROUTINE CREATION =over 4 =item async { ... } [@args...] -Create a new asynchronous coroutine and return it's coroutine object -(usually unused). When the sub returns the new coroutine is automatically +Create a new coroutine and return it's coroutine object (usually +unused). The coroutine will be put into the ready queue, so +it will start running automatically on the next scheduler run. + +The first argument is a codeblock/closure that should be executed in the +coroutine. When it returns argument returns the coroutine is automatically terminated. +The remaining arguments are passed as arguments to the closure. + +See the C constructor for info about the coroutine +environment in which coroutines are executed. + Calling C in a coroutine will do the same as calling exit outside the coroutine. Likewise, when the coroutine dies, the program will exit, just as it would in the main program. - # create a new coroutine that just prints its arguments +If you do not want that, you can provide a default C handler, or +simply avoid dieing (by use of C). + +Example: Create a new coroutine that just prints its arguments. + async { print "@_\n"; } 1,2,3,4; @@ -209,27 +210,37 @@ =item async_pool { ... } [@args...] Similar to C, but uses a coroutine pool, so you should not call -terminate or join (although you are allowed to), and you get a coroutine -that might have executed other code already (which can be good or bad :). +terminate or join on it (although you are allowed to), and you get a +coroutine that might have executed other code already (which can be good +or bad :). + +On the plus side, this function is faster than creating (and destroying) +a completly new coroutine, so if you need a lot of generic coroutines in +quick successsion, use C, not C. -Also, the block is executed in an C context and a warning will be +The code block is executed in an C context and a warning will be issued in case of an exception instead of terminating the program, as C does. As the coroutine is being reused, stuff like C will not work in the expected way, unless you call terminate or cancel, -which somehow defeats the purpose of pooling. - -The priority will be reset to C<0> after each job, otherwise the coroutine -will be re-used "as-is". +which somehow defeats the purpose of pooling (but is fine in the +exceptional case). -The pool size is limited to 8 idle coroutines (this can be adjusted by -changing $Coro::POOL_SIZE), and there can be as many non-idle coros as -required. +The priority will be reset to C<0> after each run, tracing will be +disabled, the description will be reset and the default output filehandle +gets restored, so you can change all these. Otherwise the coroutine will +be re-used "as-is": most notably if you change other per-coroutine global +stuff such as C<$/> you I revert that change, which is most +simply done by using local as in: C<< local $/ >>. + +The idle pool size is limited to C<8> idle coroutines (this can be +adjusted by changing $Coro::POOL_SIZE), but there can be as many non-idle +coros as required. If you are concerned about pooled coroutines growing a lot because a single C used a lot of stackspace you can e.g. C once per second or so to slowly replenish the pool. In addition to that, when the stacks used by a handler grows larger than 16kb -(adjustable with $Coro::POOL_RSS) it will also exit. +(adjustable via $Coro::POOL_RSS) it will also be destroyed. =cut @@ -250,13 +261,15 @@ } }; - last if $@ eq "\3terminate\2\n"; - warn $@ if $@; + if ($@) { + last if $@ eq "\3async_pool terminate\2\n"; + warn $@; + } } } sub async_pool(&@) { - # this is also inlined into the unlock_scheduler + # this is also inlined into the unblock_scheduler my $coro = (pop @async_pool) || new Coro \&pool_handler; $coro->{_invoke} = [@_]; @@ -265,12 +278,34 @@ $coro } +=back + +=head2 STATIC METHODS + +Static methods are actually functions that operate on the current coroutine. + +=over 4 + =item schedule -Calls the scheduler. Please note that the current coroutine will not be put -into the ready queue, so calling this function usually means you will -never be called again unless something else (e.g. an event handler) calls -ready. +Calls the scheduler. The scheduler will find the next coroutine that is +to be run from the ready queue and switches to it. The next coroutine +to be run is simply the one with the highest priority that is longest +in its ready queue. If there is no coroutine ready, it will clal the +C<$Coro::idle> hook. + +Please note that the current coroutine will I be put into the ready +queue, so calling this function usually means you will never be called +again unless something else (e.g. an event handler) calls C<< ->ready >>, +thus waking you up. + +This makes C I generic method to use to block the current +coroutine and wait for events: first you remember the current coroutine in +a variable, then arrange for some callback of yours to call C<< ->ready +>> on that once some event happens, and last you call C to put +yourself to sleep. Note that a lot of things can wake your coroutine up, +so you need to check whether the event indeed happened, e.g. by storing the +status in a variable. The canonical way to wait on external events is this: @@ -293,18 +328,19 @@ =item cede -"Cede" to other coroutines. This function puts the current coroutine into the -ready queue and calls C, which has the effect of giving up the -current "timeslice" to other coroutines of the same or higher priority. +"Cede" to other coroutines. This function puts the current coroutine into +the ready queue and calls C, which has the effect of giving +up the current "timeslice" to other coroutines of the same or higher +priority. Once your coroutine gets its turn again it will automatically be +resumed. -Returns true if at least one coroutine switch has happened. +This function is often called C in other languages. =item Coro::cede_notself -Works like cede, but is not exported by default and will cede to any -coroutine, regardless of priority, once. - -Returns true if at least one coroutine switch has happened. +Works like cede, but is not exported by default and will cede to I +coroutine, regardless of priority. This is useful sometimes to ensure +progress is made. =item terminate [arg...] @@ -316,6 +352,10 @@ one. This is useful after a fork, either in the child or the parent, as usually only one of them should inherit the running coroutines. +Note that while this will try to free some of the main programs resources, +you cannot free all of them, so if a coroutine that is not the main +program calls this function, there will be some one-time resource leak. + =cut sub terminate { @@ -331,22 +371,22 @@ =back -# dynamic methods - =head2 COROUTINE METHODS -These are the methods you can call on coroutine objects. +These are the methods you can call on coroutine objects (or to create +them). =over 4 =item new Coro \&sub [, @args...] -Create a new coroutine and return it. When the sub returns the coroutine +Create a new coroutine and return it. When the sub returns, the coroutine automatically terminates as if C with the returned values were -called. To make the coroutine run you must first put it into the ready queue -by calling the ready method. +called. To make the coroutine run you must first put it into the ready +queue by calling the ready method. -See C for additional discussion. +See C and C for additional info about the +coroutine environment. =cut @@ -362,13 +402,17 @@ =item $success = $coroutine->ready -Put the given coroutine into the ready queue (according to it's priority) -and return true. If the coroutine is already in the ready queue, do nothing -and return false. +Put the given coroutine into the end of its ready queue (there is one +queue for each priority) and return true. If the coroutine is already in +the ready queue, do nothing and return false. + +This ensures that the scheduler will resume this coroutine automatically +once all the coroutines of higher priority and all coroutines of the same +priority that were put into the ready queue earlier have been resumed. =item $is_ready = $coroutine->is_ready -Return wether the coroutine is currently the ready queue or not, +Return whether the coroutine is currently the ready queue or not, =item $coroutine->cancel (arg...) @@ -391,11 +435,36 @@ } } +=item $coroutine->throw ([$scalar]) + +If C<$throw> is specified and defined, it will be thrown as an exception +inside the coroutine at the next convenient point in time. Otherwise +clears the exception object. + +Coro will check for the exception each time a schedule-like-function +returns, i.e. after each C, C, C<< Coro::Semaphore->down +>>, C<< Coro::Handle->readable >> and so on. Note that this means that +when a coroutine is acquiring a lock, it might only throw after it has +sucessfully acquired it. + +The exception object will be thrown "as is" with the specified scalar in +C<$@>, i.e. if it is a string, no line number or newline will be appended +(unlike with C). + +This can be used as a softer means than C to ask a coroutine to +end itself, although there is no guarantee that the exception will lead to +termination, and if the exception isn't caught it might well end the whole +program. + +You might also think of C as being the moral equivalent of +Cing a coroutine with a signal (in this case, a scalar). + =item $coroutine->join Wait until the coroutine terminates and return any values given to the -C or C functions. C can be called multiple times -from multiple coroutine. +C or C functions. C can be called concurrently +from multiple coroutines, and all will be resumed and given the status +return once the C<$coroutine> terminates. =cut @@ -420,7 +489,7 @@ Registers a callback that is called when this coroutine gets destroyed, but before it is joined. The callback gets passed the terminate arguments, -if any. +if any, and I die, under any circumstances. =cut @@ -460,10 +529,11 @@ =item $olddesc = $coroutine->desc ($newdesc) Sets (or gets in case the argument is missing) the description for this -coroutine. This is just a free-form string you can associate with a coroutine. +coroutine. This is just a free-form string you can associate with a +coroutine. -This method simply sets the C<< $coroutine->{desc} >> member to the given string. You -can modify this member directly if you wish. +This method simply sets the C<< $coroutine->{desc} >> member to the given +string. You can modify this member directly if you wish. =cut @@ -482,10 +552,11 @@ =item Coro::nready Returns the number of coroutines that are currently in the ready state, -i.e. that can be switched to. The value C<0> means that the only runnable -coroutine is the currently running one, so C would have no effect, -and C would cause a deadlock unless there is an idle handler -that wakes up some coroutines. +i.e. that can be switched to by calling C directory or +indirectly. The value C<0> means that the only runnable coroutine is the +currently running one, so C would have no effect, and C +would cause a deadlock unless there is an idle handler that wakes up some +coroutines. =item my $guard = Coro::guard { ... } @@ -524,23 +595,35 @@ =item unblock_sub { ... } This utility function takes a BLOCK or code reference and "unblocks" it, -returning the new coderef. This means that the new coderef will return -immediately without blocking, returning nothing, while the original code -ref will be called (with parameters) from within its own coroutine. +returning a new coderef. Unblocking means that calling the new coderef +will return immediately without blocking, returning nothing, while the +original code ref will be called (with parameters) from within another +coroutine. The reason this function exists is that many event libraries (such as the venerable L module) are not coroutine-safe (a weaker form of thread-safety). This means you must not block within event callbacks, -otherwise you might suffer from crashes or worse. +otherwise you might suffer from crashes or worse. The only event library +currently known that is safe to use without C is L. This function allows your callbacks to block by executing them in another coroutine where it is safe to block. One example where blocking is handy is when you use the L functions to save results to -disk. +disk, for example. In short: simply use C instead of C when creating event callbacks that want to block. +If your handler does not plan to block (e.g. simply sends a message to +another coroutine, or puts some other coroutine into the ready queue), +there is no reason to use C. + +Note that you also need to use C for any other callbacks that +are indirectly executed by any C-based event loop. For example, when you +use a module that uses L (and you use L) and it +provides callbacks that are the result of some event callback, then you +must not block either, or use C. + =cut our @unblock_queue; @@ -562,7 +645,7 @@ schedule; # sleep well } }; -$unblock_scheduler->desc ("[unblock_sub scheduler]"); +$unblock_scheduler->{desc} = "[unblock_sub scheduler]"; sub unblock_sub(&) { my $cb = shift; @@ -581,23 +664,54 @@ =head1 BUGS/LIMITATIONS - - you must make very sure that no coro is still active on global - destruction. very bad things might happen otherwise (usually segfaults). +=over 4 + +=item fork with pthread backend + +When Coro is compiled using the pthread backend (which isn't recommended +but required on many BSDs as their libcs are completely broken), then +coroutines will not survive a fork. There is no known workaround except to +fix your libc and use a saner backend. + +=item perl process emulation ("threads") + +This module is not perl-pseudo-thread-safe. You should only ever use this +module from the same thread (this requirement might be removed in the +future to allow per-thread schedulers, but Coro::State does not yet allow +this). I recommend disabling thread support and using processes, as having +the windows process emulation enabled under unix roughly halves perl +performance, even when not used. + +=item coroutine switching not signal safe + +You must not switch to another coroutine from within a signal handler +(only relevant with %SIG - most event libraries provide safe signals). + +That means you I call any function that might "block" the +current coroutine - C, C C<< Coro::Semaphore->down >> or +anything that calls those. Everything else, including calling C, +works. + +=back - - this module is not thread-safe. You should only ever use this module - from the same thread (this requirement might be loosened in the future - to allow per-thread schedulers, but Coro::State does not yet allow - this). =head1 SEE ALSO -Support/Utility: L, L, L, L. +Event-Loop integration: L, L, L. + +Debugging: L. + +Support/Utility: L, L. Locking/IPC: L, L, L, L, L. -Event/IO: L, L, L, L, L. +IO/Timers: L, L, L, L. + +Compatibility: L, L, L, L. + +XS API: L. -Embedding: L +Low level Configuration, Coroutine Environment: L. =head1 AUTHOR