--- AnyEvent-Fork-RPC/RPC.pm	2013/04/18 07:59:46	1.11
+++ AnyEvent-Fork-RPC/RPC.pm	2013/04/18 20:27:02	1.19
@@ -2,6 +2,8 @@
 
 AnyEvent::Fork::RPC - simple RPC extension for AnyEvent::Fork
 
+THE API IS NOT FINISHED, CONSIDER THIS A TECHNOLOGY DEMO
+
 =head1 SYNOPSIS
 
    use AnyEvent::Fork::RPC;
@@ -14,6 +16,8 @@
          "MyModule::server",
       );
 
+   use AnyEvent;
+
    my $cv = AE::cv;
 
    $rpc->(1, 2, 3, sub {
@@ -374,7 +378,9 @@
 allows a single RPC call to execute concurrently.
 
 Setting C<async> to a true value switches to another implementation that
-uses L<AnyEvent> in the child and allows multiple concurrent RPC calls.
+uses L<AnyEvent> in the child and allows multiple concurrent RPC calls (it
+does not support recursion in the event loop however, blocking condvar
+calls will fail).
 
 The actual API in the child is documented in the section that describes
 the calling semantics of the returned C<$rpc> function.
@@ -386,14 +392,14 @@
 If you use a template process and want to fork both sync and async
 children, then it is permissible to load both modules.
 
-=item serialiser => $string (default: '(sub { pack "(w/a*)*", @_ }, sub { unpack "(w/a*)*", shift })')
+=item serialiser => $string (default: $AnyEvent::Fork::RPC::STRING_SERIALISER)
 
 All arguments, result data and event data have to be serialised to be
 transferred between the processes. For this, they have to be frozen and
 thawed in both parent and child processes.
 
 By default, only octet strings can be passed between the processes, which
-is reasonably fast and efficient.
+is reasonably fast and efficient and requires no extra modules.
 
 For more complicated use cases, you can provide your own freeze and thaw
 functions, by specifying a string with perl source code. It's supposed to
@@ -405,6 +411,57 @@
 pre-load it into your L<AnyEvent::Fork> process, or you can add a C<use>
 or C<require> statement into the serialiser string. Or both.
 
+Here are some examples - some of them are also available as global
+variables that make them easier to use.
+
+=over 4
+
+=item octet strings - C<$AnyEvent::Fork::RPC::STRING_SERIALISER>
+
+This serialiser concatenates length-prefixes octet strings, and is the
+default.
+
+Implementation:
+
+   (
+      sub { pack   "(w/a*)*", @_ },
+      sub { unpack "(w/a*)*", shift }
+   )
+
+=item json - C<$AnyEvent::Fork::RPC::JSON_SERIALISER>
+
+This serialiser creates JSON arrays - you have to make sure the L<JSON>
+module is installed for this serialiser to work. It can be beneficial for
+sharing when you preload the L<JSON> module in a template process.
+
+L<JSON> (with L<JSON::XS> installed) is slower than the octet string
+serialiser, but usually much faster than L<Storable>, unless big chunks of
+binary data need to be transferred.
+
+Implementation:
+
+   use JSON ();
+   (
+      sub {    JSON::encode_json \@_ },
+      sub { @{ JSON::decode_json shift } }
+   )
+
+=item storable - C<$AnyEvent::Fork::RPC::STORABLE_SERIALISER>
+
+This serialiser uses L<Storable>, which means it has high chance of
+serialising just about anything you throw at it, at the cost of having
+very high overhead per operation. It also comes with perl.
+
+Implementation:
+
+   use Storable ();
+   (
+      sub {    Storable::freeze \@_ },
+      sub { @{ Storable::thaw shift } }
+   )
+
+=back
+
 =back
 
 See the examples section earlier in this document for some actual
@@ -412,7 +469,9 @@
 
 =cut
 
-our $STRING_SERIALISER = '(sub { pack "(w/a*)*", @_ }, sub { unpack "(w/a*)*", shift })';
+our $STRING_SERIALISER   = '(sub { pack "(w/a*)*", @_ }, sub { unpack "(w/a*)*", shift })';
+our $JSON_SERIALISER     = 'use JSON (); (sub { JSON::encode_json \@_ }, sub { @{ JSON::decode_json shift } })';
+our $STORABLE_SERIALISER = 'use Storable (); (sub { Storable::freeze \@_ }, sub { @{ Storable::thaw shift } })';
 
 sub run {
    my ($self, $function, %arg) = @_;
@@ -491,7 +550,6 @@
             undef $rw; undef $ww; # it ends here
 
             if (@rcb || %rcb) {
-               use Data::Dump;ddx[\@rcb,\%rcb];#d#
                $on_error->("unexpected eof");
             } else {
                $on_destroy->();
@@ -507,7 +565,8 @@
 
    my $guard = Guard::guard {
       $shutdown = 1;
-      $ww ||= $fh && AE::io $fh, 1, $wcb;
+
+      shutdown $fh, 1 if $fh && !$ww;
    };
 
    my $id;
@@ -581,10 +640,145 @@
 
 =back
 
+=head1 ADVANCED TOPICS
+
+=head2 Choosing a backend
+
+So how do you decide which backend to use? Well, that's your problem to
+solve, but here are some thoughts on the matter:
+
+=over 4
+
+=item Synchronous
+
+The synchronous backend does not rely on any external modules (well,
+except L<common::sense>, which works around a bug in how perl's warning
+system works). This keeps the process very small, for example, on my
+system, an empty perl interpreter uses 1492kB RSS, which becomes 2020kB
+after C<use warnings; use strict> (for people who grew up with C64s around
+them this is probably shocking every single time they see it). The worker
+process in the first example in this document uses 1792kB.
+
+Since the calls are done synchronously, slow jobs will keep newer jobs
+from executing.
+
+The synchronous backend also has no overhead due to running an event loop
+- reading requests is therefore very efficient, while writing responses is
+less so, as every response results in a write syscall.
+
+If the parent process is busy and a bit slow reading responses, the child
+waits instead of processing further requests. This also limits the amount
+of memory needed for buffering, as never more than one response has to be
+buffered.
+
+The API in the child is simple - you just have to define a function that
+does something and returns something.
+
+It's hard to use modules or code that relies on an event loop, as the
+child cannot execute anything while it waits for more input.
+
+=item Asynchronous
+
+The asynchronous backend relies on L<AnyEvent>, which tries to be small,
+but still comes at a price: On my system, the worker from example 1a uses
+3420kB RSS (for L<AnyEvent>, which loads L<EV>, which needs L<XSLoader>
+which in turn loads a lot of other modules such as L<warnings>, L<strict>,
+L<vars>, L<Exporter>...).
+
+It batches requests and responses reasonably efficiently, doing only as
+few reads and writes as needed, but needs to poll for events via the event
+loop.
+
+Responses are queued when the parent process is busy. This means the child
+can continue to execute any queued requests. It also means that a child
+might queue a lot of responses in memory when it generates them and the
+parent process is slow accepting them.
+
+The API is not a straightforward RPC pattern - you have to call a
+"done" callback to pass return values and signal completion. Also, more
+importantly, the API starts jobs as fast as possible - when 1000 jobs
+are queued and the jobs are slow, they will all run concurrently. The
+child must implement some queueing/limiting mechanism if this causes
+problems. Alternatively, the parent could limit the amount of rpc calls
+that are outstanding.
+
+Blocking use of condvars is not supported.
+
+Using event-based modules such as L<IO::AIO>, L<Gtk2>, L<Tk> and so on is
+easy.
+
+=back
+
+=head2 Passing file descriptors
+
+Unlike L<AnyEvent::Fork>, this module has no in-built file handle or file
+descriptor passing abilities.
+
+The reason is that passing file descriptors is extraordinary tricky
+business, and conflicts with efficient batching of messages.
+
+There still is a method you can use: Create a
+C<AnyEvent::Util::portable_socketpair> and C<send_fh> one half of it to
+the process before you pass control to C<AnyEvent::Fork::RPC::run>.
+
+Whenever you want to pass a file descriptor, send an rpc request to the
+child process (so it expects the descriptor), then send it over the other
+half of the socketpair. The child should fetch the descriptor from the
+half it has passed earlier.
+
+Here is some (untested) pseudocode to that effect:
+
+   use AnyEvent::Util;
+   use AnyEvent::Fork::RPC;
+   use IO::FDPass;
+
+   my ($s1, $s2) = AnyEvent::Util::portable_socketpair;
+
+   my $rpc = AnyEvent::Fork
+      ->new
+      ->send_fh ($s2)
+      ->require ("MyWorker")
+      ->AnyEvent::Fork::RPC::run ("MyWorker::run"
+           init => "MyWorker::init",
+        );
+
+   undef $s2; # no need to keep it around
+
+   # pass an fd
+   $rpc->("i'll send some fd now, please expect it!", my $cv = AE::cv);
+
+   IO::FDPass fileno $s1, fileno $handle_to_pass;
+
+   $cv->recv;
+
+The MyWorker module could look like this:
+
+   package MyWorker;
+
+   use IO::FDPass;
+
+   my $s2;
+
+   sub init {
+      $s2 = $_[0];
+   }
+
+   sub run {
+      if ($_[0] eq "i'll send some fd now, please expect it!") {
+         my $fd = IO::FDPass::recv fileno $s2;
+         ...
+      }
+   }
+
+Of course, this might be blocking if you pass a lot of file descriptors,
+so you might want to look into L<AnyEvent::FDpasser> which can handle the
+gory details.
+
 =head1 SEE ALSO
 
-L<AnyEvent::Fork> (to create the processes in the first place),
-L<AnyEvent::Fork::Pool> (to manage whole pools of processes).
+L<AnyEvent::Fork>, to create the processes in the first place.
+
+L<AnyEvent::Fork::Pool>, to manage whole pools of processes.
 
 =head1 AUTHOR AND CONTACT INFORMATION