|
|
1 | NAME |
|
|
2 | AnyEvent::Fork::RPC - simple RPC extension for AnyEvent::Fork |
|
|
3 | |
|
|
4 | SYNOPSIS |
|
|
5 | use AnyEvent::Fork; |
|
|
6 | use AnyEvent::Fork::RPC; |
|
|
7 | |
|
|
8 | my $rpc = AnyEvent::Fork |
|
|
9 | ->new |
|
|
10 | ->require ("MyModule") |
|
|
11 | ->AnyEvent::Fork::RPC::run ( |
|
|
12 | "MyModule::server", |
|
|
13 | ); |
|
|
14 | |
|
|
15 | use AnyEvent; |
|
|
16 | |
|
|
17 | my $cv = AE::cv; |
|
|
18 | |
|
|
19 | $rpc->(1, 2, 3, sub { |
|
|
20 | print "MyModule::server returned @_\n"; |
|
|
21 | $cv->send; |
|
|
22 | }); |
|
|
23 | |
|
|
24 | $cv->recv; |
|
|
25 | |
|
|
26 | DESCRIPTION |
|
|
27 | This module implements a simple RPC protocol and backend for processes |
|
|
28 | created via AnyEvent::Fork or AnyEvent::Fork::Remote, allowing you to |
|
|
29 | call a function in the child process and receive its return values (up |
|
|
30 | to 4GB serialised). |
|
|
31 | |
|
|
32 | It implements two different backends: a synchronous one that works like |
|
|
33 | a normal function call, and an asynchronous one that can run multiple |
|
|
34 | jobs concurrently in the child, using AnyEvent. |
|
|
35 | |
|
|
36 | It also implements an asynchronous event mechanism from the child to the |
|
|
37 | parent, that could be used for progress indications or other |
|
|
38 | information. |
|
|
39 | |
|
|
40 | EXAMPLES |
|
|
41 | Example 1: Synchronous Backend |
|
|
42 | Here is a simple example that implements a backend that executes |
|
|
43 | "unlink" and "rmdir" calls, and reports their status back. It also |
|
|
44 | reports the number of requests it has processed every three requests, |
|
|
45 | which is clearly silly, but illustrates the use of events. |
|
|
46 | |
|
|
47 | First the parent process: |
|
|
48 | |
|
|
49 | use AnyEvent; |
|
|
50 | use AnyEvent::Fork; |
|
|
51 | use AnyEvent::Fork::RPC; |
|
|
52 | |
|
|
53 | my $done = AE::cv; |
|
|
54 | |
|
|
55 | my $rpc = AnyEvent::Fork |
|
|
56 | ->new |
|
|
57 | ->require ("MyWorker") |
|
|
58 | ->AnyEvent::Fork::RPC::run ("MyWorker::run", |
|
|
59 | on_error => sub { warn "ERROR: $_[0]"; exit 1 }, |
|
|
60 | on_event => sub { warn "$_[0] requests handled\n" }, |
|
|
61 | on_destroy => $done, |
|
|
62 | ); |
|
|
63 | |
|
|
64 | for my $id (1..6) { |
|
|
65 | $rpc->(rmdir => "/tmp/somepath/$id", sub { |
|
|
66 | $_[0] |
|
|
67 | or warn "/tmp/somepath/$id: $_[1]\n"; |
|
|
68 | }); |
|
|
69 | } |
|
|
70 | |
|
|
71 | undef $rpc; |
|
|
72 | |
|
|
73 | $done->recv; |
|
|
74 | |
|
|
75 | The parent creates the process, queues a few rmdir's. It then forgets |
|
|
76 | about the $rpc object, so that the child exits after it has handled the |
|
|
77 | requests, and then it waits till the requests have been handled. |
|
|
78 | |
|
|
79 | The child is implemented using a separate module, "MyWorker", shown |
|
|
80 | here: |
|
|
81 | |
|
|
82 | package MyWorker; |
|
|
83 | |
|
|
84 | my $count; |
|
|
85 | |
|
|
86 | sub run { |
|
|
87 | my ($cmd, $path) = @_; |
|
|
88 | |
|
|
89 | AnyEvent::Fork::RPC::event ($count) |
|
|
90 | unless ++$count % 3; |
|
|
91 | |
|
|
92 | my $status = $cmd eq "rmdir" ? rmdir $path |
|
|
93 | : $cmd eq "unlink" ? unlink $path |
|
|
94 | : die "fatal error, illegal command '$cmd'"; |
|
|
95 | |
|
|
96 | $status or (0, "$!") |
|
|
97 | } |
|
|
98 | |
|
|
99 | 1 |
|
|
100 | |
|
|
101 | The "run" function first sends a "progress" event every three calls, and |
|
|
102 | then executes "rmdir" or "unlink", depending on the first parameter (or |
|
|
103 | dies with a fatal error - obviously, you must never let this happen :). |
|
|
104 | |
|
|
105 | Eventually it returns the status value true if the command was |
|
|
106 | successful, or the status value 0 and the stringified error message. |
|
|
107 | |
|
|
108 | On my system, running the first code fragment with the given MyWorker.pm |
|
|
109 | in the current directory yields: |
|
|
110 | |
|
|
111 | /tmp/somepath/1: No such file or directory |
|
|
112 | /tmp/somepath/2: No such file or directory |
|
|
113 | 3 requests handled |
|
|
114 | /tmp/somepath/3: No such file or directory |
|
|
115 | /tmp/somepath/4: No such file or directory |
|
|
116 | /tmp/somepath/5: No such file or directory |
|
|
117 | 6 requests handled |
|
|
118 | /tmp/somepath/6: No such file or directory |
|
|
119 | |
|
|
120 | Obviously, none of the directories I am trying to delete even exist. |
|
|
121 | Also, the events and responses are processed in exactly the same order |
|
|
122 | as they were created in the child, which is true for both synchronous |
|
|
123 | and asynchronous backends. |
|
|
124 | |
|
|
125 | Note that the parentheses in the call to "AnyEvent::Fork::RPC::event" |
|
|
126 | are not optional. That is because the function isn't defined when the |
|
|
127 | code is compiled. You can make sure it is visible by pre-loading the |
|
|
128 | correct backend module in the call to "require": |
|
|
129 | |
|
|
130 | ->require ("AnyEvent::Fork::RPC::Sync", "MyWorker") |
|
|
131 | |
|
|
132 | Since the backend module declares the "event" function, loading it first |
|
|
133 | ensures that perl will correctly interpret calls to it. |
|
|
134 | |
|
|
135 | And as a final remark, there is a fine module on CPAN that can |
|
|
136 | asynchronously "rmdir" and "unlink" and a lot more, and more efficiently |
|
|
137 | than this example, namely IO::AIO. |
|
|
138 | |
|
|
139 | Example 1a: the same with the asynchronous backend |
|
|
140 | This example only shows what needs to be changed to use the async |
|
|
141 | backend instead. Doing this is not very useful, the purpose of this |
|
|
142 | example is to show the minimum amount of change that is required to go |
|
|
143 | from the synchronous to the asynchronous backend. |
|
|
144 | |
|
|
145 | To use the async backend in the previous example, you need to add the |
|
|
146 | "async" parameter to the "AnyEvent::Fork::RPC::run" call: |
|
|
147 | |
|
|
148 | ->AnyEvent::Fork::RPC::run ("MyWorker::run", |
|
|
149 | async => 1, |
|
|
150 | ... |
|
|
151 | |
|
|
152 | And since the function call protocol is now changed, you need to adopt |
|
|
153 | "MyWorker::run" to the async API. |
|
|
154 | |
|
|
155 | First, you need to accept the extra initial $done callback: |
|
|
156 | |
|
|
157 | sub run { |
|
|
158 | my ($done, $cmd, $path) = @_; |
|
|
159 | |
|
|
160 | And since a response is now generated when $done is called, as opposed |
|
|
161 | to when the function returns, we need to call the $done function with |
|
|
162 | the status: |
|
|
163 | |
|
|
164 | $done->($status or (0, "$!")); |
|
|
165 | |
|
|
166 | A few remarks are in order. First, it's quite pointless to use the async |
|
|
167 | backend for this example - but it *is* possible. Second, you can call |
|
|
168 | $done before or after returning from the function. Third, having both |
|
|
169 | returned from the function and having called the $done callback, the |
|
|
170 | child process may exit at any time, so you should call $done only when |
|
|
171 | you really *are* done. |
|
|
172 | |
|
|
173 | Example 2: Asynchronous Backend |
|
|
174 | This example implements multiple count-downs in the child, using |
|
|
175 | AnyEvent timers. While this is a bit silly (one could use timers in the |
|
|
176 | parent just as well), it illustrates the ability to use AnyEvent in the |
|
|
177 | child and the fact that responses can arrive in a different order then |
|
|
178 | the requests. |
|
|
179 | |
|
|
180 | It also shows how to embed the actual child code into a "__DATA__" |
|
|
181 | section, so it doesn't need any external files at all. |
|
|
182 | |
|
|
183 | And when your parent process is often busy, and you have stricter timing |
|
|
184 | requirements, then running timers in a child process suddenly doesn't |
|
|
185 | look so silly anymore. |
|
|
186 | |
|
|
187 | Without further ado, here is the code: |
|
|
188 | |
|
|
189 | use AnyEvent; |
|
|
190 | use AnyEvent::Fork; |
|
|
191 | use AnyEvent::Fork::RPC; |
|
|
192 | |
|
|
193 | my $done = AE::cv; |
|
|
194 | |
|
|
195 | my $rpc = AnyEvent::Fork |
|
|
196 | ->new |
|
|
197 | ->require ("AnyEvent::Fork::RPC::Async") |
|
|
198 | ->eval (do { local $/; <DATA> }) |
|
|
199 | ->AnyEvent::Fork::RPC::run ("run", |
|
|
200 | async => 1, |
|
|
201 | on_error => sub { warn "ERROR: $_[0]"; exit 1 }, |
|
|
202 | on_event => sub { print $_[0] }, |
|
|
203 | on_destroy => $done, |
|
|
204 | ); |
|
|
205 | |
|
|
206 | for my $count (3, 2, 1) { |
|
|
207 | $rpc->($count, sub { |
|
|
208 | warn "job $count finished\n"; |
|
|
209 | }); |
|
|
210 | } |
|
|
211 | |
|
|
212 | undef $rpc; |
|
|
213 | |
|
|
214 | $done->recv; |
|
|
215 | |
|
|
216 | __DATA__ |
|
|
217 | |
|
|
218 | # this ends up in main, as we don't use a package declaration |
|
|
219 | |
|
|
220 | use AnyEvent; |
|
|
221 | |
|
|
222 | sub run { |
|
|
223 | my ($done, $count) = @_; |
|
|
224 | |
|
|
225 | my $n; |
|
|
226 | |
|
|
227 | AnyEvent::Fork::RPC::event "starting to count up to $count\n"; |
|
|
228 | |
|
|
229 | my $w; $w = AE::timer 1, 1, sub { |
|
|
230 | ++$n; |
|
|
231 | |
|
|
232 | AnyEvent::Fork::RPC::event "count $n of $count\n"; |
|
|
233 | |
|
|
234 | if ($n == $count) { |
|
|
235 | undef $w; |
|
|
236 | $done->(); |
|
|
237 | } |
|
|
238 | }; |
|
|
239 | } |
|
|
240 | |
|
|
241 | The parent part (the one before the "__DATA__" section) isn't very |
|
|
242 | different from the earlier examples. It sets async mode, preloads the |
|
|
243 | backend module (so the "AnyEvent::Fork::RPC::event" function is |
|
|
244 | declared), uses a slightly different "on_event" handler (which we use |
|
|
245 | simply for logging purposes) and then, instead of loading a module with |
|
|
246 | the actual worker code, it "eval"'s the code from the data section in |
|
|
247 | the child process. |
|
|
248 | |
|
|
249 | It then starts three countdowns, from 3 to 1 seconds downwards, destroys |
|
|
250 | the rpc object so the example finishes eventually, and then just waits |
|
|
251 | for the stuff to trickle in. |
|
|
252 | |
|
|
253 | The worker code uses the event function to log some progress messages, |
|
|
254 | but mostly just creates a recurring one-second timer. |
|
|
255 | |
|
|
256 | The timer callback increments a counter, logs a message, and eventually, |
|
|
257 | when the count has been reached, calls the finish callback. |
|
|
258 | |
|
|
259 | On my system, this results in the following output. Since all timers |
|
|
260 | fire at roughly the same time, the actual order isn't guaranteed, but |
|
|
261 | the order shown is very likely what you would get, too. |
|
|
262 | |
|
|
263 | starting to count up to 3 |
|
|
264 | starting to count up to 2 |
|
|
265 | starting to count up to 1 |
|
|
266 | count 1 of 3 |
|
|
267 | count 1 of 2 |
|
|
268 | count 1 of 1 |
|
|
269 | job 1 finished |
|
|
270 | count 2 of 2 |
|
|
271 | job 2 finished |
|
|
272 | count 2 of 3 |
|
|
273 | count 3 of 3 |
|
|
274 | job 3 finished |
|
|
275 | |
|
|
276 | While the overall ordering isn't guaranteed, the async backend still |
|
|
277 | guarantees that events and responses are delivered to the parent process |
|
|
278 | in the exact same ordering as they were generated in the child process. |
|
|
279 | |
|
|
280 | And unless your system is *very* busy, it should clearly show that the |
|
|
281 | job started last will finish first, as it has the lowest count. |
|
|
282 | |
|
|
283 | This concludes the async example. Since AnyEvent::Fork does not actually |
|
|
284 | fork, you are free to use about any module in the child, not just |
|
|
285 | AnyEvent, but also IO::AIO, or Tk for example. |
|
|
286 | |
|
|
287 | Example 3: Asynchronous backend with Coro |
|
|
288 | With Coro you can create a nice asynchronous backend implementation by |
|
|
289 | defining an rpc server function that creates a new Coro thread for every |
|
|
290 | request that calls a function "normally", i.e. the parameters from the |
|
|
291 | parent process are passed to it, and any return values are returned to |
|
|
292 | the parent process, e.g.: |
|
|
293 | |
|
|
294 | package My::Arith; |
|
|
295 | |
|
|
296 | sub add { |
|
|
297 | return $_[0] + $_[1]; |
|
|
298 | } |
|
|
299 | |
|
|
300 | sub mul { |
|
|
301 | return $_[0] * $_[1]; |
|
|
302 | } |
|
|
303 | |
|
|
304 | sub run { |
|
|
305 | my ($done, $func, @arg) = @_; |
|
|
306 | |
|
|
307 | Coro::async_pool { |
|
|
308 | $done->($func->(@arg)); |
|
|
309 | }; |
|
|
310 | } |
|
|
311 | |
|
|
312 | The "run" function creates a new thread for every invocation, using the |
|
|
313 | first argument as function name, and calls the $done callback on it's |
|
|
314 | return values. This makes it quite natural to define the "add" and "mul" |
|
|
315 | functions to add or multiply two numbers and return the result. |
|
|
316 | |
|
|
317 | Since this is the asynchronous backend, it's quite possible to define |
|
|
318 | RPC function that do I/O or wait for external events - their execution |
|
|
319 | will overlap as needed. |
|
|
320 | |
|
|
321 | The above could be used like this: |
|
|
322 | |
|
|
323 | my $rpc = AnyEvent::Fork |
|
|
324 | ->new |
|
|
325 | ->require ("MyWorker") |
|
|
326 | ->AnyEvent::Fork::RPC::run ("My::Arith::run", |
|
|
327 | on_error => ..., on_event => ..., on_destroy => ..., |
|
|
328 | ); |
|
|
329 | |
|
|
330 | $rpc->(add => 1, 3, Coro::rouse_cb); say Coro::rouse_wait; |
|
|
331 | $rpc->(mul => 3, 2, Coro::rouse_cb); say Coro::rouse_wait; |
|
|
332 | |
|
|
333 | The "say"'s will print 4 and 6. |
|
|
334 | |
|
|
335 | Example 4: Forward AnyEvent::Log messages using "on_event" |
|
|
336 | This partial example shows how to use the "event" function to forward |
|
|
337 | AnyEvent::Log messages to the parent. |
|
|
338 | |
|
|
339 | For this, the parent needs to provide a suitable "on_event": |
|
|
340 | |
|
|
341 | ->AnyEvent::Fork::RPC::run ( |
|
|
342 | on_event => sub { |
|
|
343 | if ($_[0] eq "ae_log") { |
|
|
344 | my (undef, $level, $message) = @_; |
|
|
345 | AE::log $level, $message; |
|
|
346 | } else { |
|
|
347 | # other event types |
|
|
348 | } |
|
|
349 | }, |
|
|
350 | ) |
|
|
351 | |
|
|
352 | In the child, as early as possible, the following code should |
|
|
353 | reconfigure AnyEvent::Log to log via "AnyEvent::Fork::RPC::event": |
|
|
354 | |
|
|
355 | $AnyEvent::Log::LOG->log_cb (sub { |
|
|
356 | my ($timestamp, $orig_ctx, $level, $message) = @{+shift}; |
|
|
357 | |
|
|
358 | if (defined &AnyEvent::Fork::RPC::event) { |
|
|
359 | AnyEvent::Fork::RPC::event (ae_log => $level, $message); |
|
|
360 | } else { |
|
|
361 | warn "[$$ before init] $message\n"; |
|
|
362 | } |
|
|
363 | }); |
|
|
364 | |
|
|
365 | There is an important twist - the "AnyEvent::Fork::RPC::event" function |
|
|
366 | is only defined when the child is fully initialised. If you redirect the |
|
|
367 | log messages in your "init" function for example, then the "event" |
|
|
368 | function might not yet be available. This is why the log callback checks |
|
|
369 | whether the fucntion is there using "defined", and only then uses it to |
|
|
370 | log the message. |
|
|
371 | |
|
|
372 | PARENT PROCESS USAGE |
|
|
373 | This module exports nothing, and only implements a single function: |
|
|
374 | |
|
|
375 | my $rpc = AnyEvent::Fork::RPC::run $fork, $function, [key => value...] |
|
|
376 | The traditional way to call it. But it is way cooler to call it in |
|
|
377 | the following way: |
|
|
378 | |
|
|
379 | my $rpc = $fork->AnyEvent::Fork::RPC::run ($function, [key => value...]) |
|
|
380 | This "run" function/method can be used in place of the |
|
|
381 | AnyEvent::Fork::run method. Just like that method, it takes over the |
|
|
382 | AnyEvent::Fork process, but instead of calling the specified |
|
|
383 | $function directly, it runs a server that accepts RPC calls and |
|
|
384 | handles responses. |
|
|
385 | |
|
|
386 | It returns a function reference that can be used to call the |
|
|
387 | function in the child process, handling serialisation and data |
|
|
388 | transfers. |
|
|
389 | |
|
|
390 | The following key/value pairs are allowed. It is recommended to have |
|
|
391 | at least an "on_error" or "on_event" handler set. |
|
|
392 | |
|
|
393 | on_error => $cb->($msg) |
|
|
394 | Called on (fatal) errors, with a descriptive (hopefully) |
|
|
395 | message. If this callback is not provided, but "on_event" is, |
|
|
396 | then the "on_event" callback is called with the first argument |
|
|
397 | being the string "error", followed by the error message. |
|
|
398 | |
|
|
399 | If neither handler is provided, then the error is reported with |
|
|
400 | loglevel "error" via "AE::log". |
|
|
401 | |
|
|
402 | on_event => $cb->(...) |
|
|
403 | Called for every call to the "AnyEvent::Fork::RPC::event" |
|
|
404 | function in the child, with the arguments of that function |
|
|
405 | passed to the callback. |
|
|
406 | |
|
|
407 | Also called on errors when no "on_error" handler is provided. |
|
|
408 | |
|
|
409 | on_destroy => $cb->() |
|
|
410 | Called when the $rpc object has been destroyed and all requests |
|
|
411 | have been successfully handled. This is useful when you queue |
|
|
412 | some requests and want the child to go away after it has handled |
|
|
413 | them. The problem is that the parent must not exit either until |
|
|
414 | all requests have been handled, and this can be accomplished by |
|
|
415 | waiting for this callback. |
|
|
416 | |
|
|
417 | init => $function (default none) |
|
|
418 | When specified (by name), this function is called in the child |
|
|
419 | as the very first thing when taking over the process, with all |
|
|
420 | the arguments normally passed to the "AnyEvent::Fork::run" |
|
|
421 | function, except the communications socket. |
|
|
422 | |
|
|
423 | It can be used to do one-time things in the child such as |
|
|
424 | storing passed parameters or opening database connections. |
|
|
425 | |
|
|
426 | It is called very early - before the serialisers are created or |
|
|
427 | the $function name is resolved into a function reference, so it |
|
|
428 | could be used to load any modules that provide the serialiser or |
|
|
429 | function. It can not, however, create events. |
|
|
430 | |
|
|
431 | done => $function (default "CORE::exit") |
|
|
432 | The function to call when the asynchronous backend detects an |
|
|
433 | end of file condition when reading from the communications |
|
|
434 | socket *and* there are no outstanding requests. It's ignored by |
|
|
435 | the synchronous backend. |
|
|
436 | |
|
|
437 | By overriding this you can prolong the life of a RPC process |
|
|
438 | after e.g. the parent has exited by running the event loop in |
|
|
439 | the provided function (or simply calling it, for example, when |
|
|
440 | your child process uses EV you could provide EV::run as "done" |
|
|
441 | function). |
|
|
442 | |
|
|
443 | Of course, in that case you are responsible for exiting at the |
|
|
444 | appropriate time and not returning from |
|
|
445 | |
|
|
446 | async => $boolean (default: 0) |
|
|
447 | The default server used in the child does all I/O blockingly, |
|
|
448 | and only allows a single RPC call to execute concurrently. |
|
|
449 | |
|
|
450 | Setting "async" to a true value switches to another |
|
|
451 | implementation that uses AnyEvent in the child and allows |
|
|
452 | multiple concurrent RPC calls (it does not support recursion in |
|
|
453 | the event loop however, blocking condvar calls will fail). |
|
|
454 | |
|
|
455 | The actual API in the child is documented in the section that |
|
|
456 | describes the calling semantics of the returned $rpc function. |
|
|
457 | |
|
|
458 | If you want to pre-load the actual back-end modules to enable |
|
|
459 | memory sharing, then you should load "AnyEvent::Fork::RPC::Sync" |
|
|
460 | for synchronous, and "AnyEvent::Fork::RPC::Async" for |
|
|
461 | asynchronous mode. |
|
|
462 | |
|
|
463 | If you use a template process and want to fork both sync and |
|
|
464 | async children, then it is permissible to load both modules. |
|
|
465 | |
|
|
466 | serialiser => $string (default: |
|
|
467 | $AnyEvent::Fork::RPC::STRING_SERIALISER) |
|
|
468 | All arguments, result data and event data have to be serialised |
|
|
469 | to be transferred between the processes. For this, they have to |
|
|
470 | be frozen and thawed in both parent and child processes. |
|
|
471 | |
|
|
472 | By default, only octet strings can be passed between the |
|
|
473 | processes, which is reasonably fast and efficient and requires |
|
|
474 | no extra modules (the "AnyEvent::Fork::RPC" distribution does |
|
|
475 | not provide these extra serialiser modules). |
|
|
476 | |
|
|
477 | For more complicated use cases, you can provide your own freeze |
|
|
478 | and thaw functions, by specifying a string with perl source |
|
|
479 | code. It's supposed to return two code references when |
|
|
480 | evaluated: the first receives a list of perl values and must |
|
|
481 | return an octet string. The second receives the octet string and |
|
|
482 | must return the original list of values. |
|
|
483 | |
|
|
484 | If you need an external module for serialisation, then you can |
|
|
485 | either pre-load it into your AnyEvent::Fork process, or you can |
|
|
486 | add a "use" or "require" statement into the serialiser string. |
|
|
487 | Or both. |
|
|
488 | |
|
|
489 | Here are some examples - all of them are also available as |
|
|
490 | global variables that make them easier to use. |
|
|
491 | |
|
|
492 | $AnyEvent::Fork::RPC::STRING_SERIALISER - octet strings only |
|
|
493 | This serialiser (currently the default) concatenates |
|
|
494 | length-prefixes octet strings, and is the default. That |
|
|
495 | means you can only pass (and return) strings containing |
|
|
496 | character codes 0-255. |
|
|
497 | |
|
|
498 | The main advantages of this serialiser are the high speed |
|
|
499 | and that it doesn't need another module. The main |
|
|
500 | disadvantage is that you are very limited in what you can |
|
|
501 | pass - only octet strings. |
|
|
502 | |
|
|
503 | Implementation: |
|
|
504 | |
|
|
505 | ( |
|
|
506 | sub { pack "(w/a*)*", @_ }, |
|
|
507 | sub { unpack "(w/a*)*", shift } |
|
|
508 | ) |
|
|
509 | |
|
|
510 | $AnyEvent::Fork::RPC::CBOR_XS_SERIALISER - uses CBOR::XS |
|
|
511 | This serialiser creates CBOR::XS arrays - you have to make |
|
|
512 | sure the CBOR::XS module is installed for this serialiser to |
|
|
513 | work. It can be beneficial for sharing when you preload the |
|
|
514 | CBOR::XS module in a template process. |
|
|
515 | |
|
|
516 | CBOR::XS is about as fast as the octet string serialiser, |
|
|
517 | but supports complex data structures (similar to JSON) and |
|
|
518 | is faster than any of the other serialisers. If you have the |
|
|
519 | CBOR::XS module available, it's the best choice. |
|
|
520 | |
|
|
521 | The encoder enables "allow_sharing" (so this serialisation |
|
|
522 | method can encode cyclic and self-referencing data |
|
|
523 | structures). |
|
|
524 | |
|
|
525 | Implementation: |
|
|
526 | |
|
|
527 | use CBOR::XS (); |
|
|
528 | ( |
|
|
529 | sub { CBOR::XS::encode_cbor_sharing \@_ }, |
|
|
530 | sub { @{ CBOR::XS::decode_cbor shift } } |
|
|
531 | ) |
|
|
532 | |
|
|
533 | $AnyEvent::Fork::RPC::JSON_SERIALISER - uses JSON::XS or JSON |
|
|
534 | This serialiser creates JSON arrays - you have to make sure |
|
|
535 | the JSON module is installed for this serialiser to work. It |
|
|
536 | can be beneficial for sharing when you preload the JSON |
|
|
537 | module in a template process. |
|
|
538 | |
|
|
539 | JSON (with JSON::XS installed) is slower than the octet |
|
|
540 | string serialiser, but usually much faster than Storable, |
|
|
541 | unless big chunks of binary data need to be transferred. |
|
|
542 | |
|
|
543 | Implementation: |
|
|
544 | |
|
|
545 | use JSON (); |
|
|
546 | ( |
|
|
547 | sub { JSON::encode_json \@_ }, |
|
|
548 | sub { @{ JSON::decode_json shift } } |
|
|
549 | ) |
|
|
550 | |
|
|
551 | $AnyEvent::Fork::RPC::STORABLE_SERIALISER - Storable |
|
|
552 | This serialiser uses Storable, which means it has high |
|
|
553 | chance of serialising just about anything you throw at it, |
|
|
554 | at the cost of having very high overhead per operation. It |
|
|
555 | also comes with perl. It should be used when you need to |
|
|
556 | serialise complex data structures. |
|
|
557 | |
|
|
558 | Implementation: |
|
|
559 | |
|
|
560 | use Storable (); |
|
|
561 | ( |
|
|
562 | sub { Storable::freeze \@_ }, |
|
|
563 | sub { @{ Storable::thaw shift } } |
|
|
564 | ) |
|
|
565 | |
|
|
566 | $AnyEvent::Fork::RPC::NSTORABLE_SERIALISER - portable Storable |
|
|
567 | This serialiser also uses Storable, but uses it's "network" |
|
|
568 | format to serialise data, which makes it possible to talk to |
|
|
569 | different perl binaries (for example, when talking to a |
|
|
570 | process created with AnyEvent::Fork::Remote). |
|
|
571 | |
|
|
572 | Implementation: |
|
|
573 | |
|
|
574 | use Storable (); |
|
|
575 | ( |
|
|
576 | sub { Storable::nfreeze \@_ }, |
|
|
577 | sub { @{ Storable::thaw shift } } |
|
|
578 | ) |
|
|
579 | |
|
|
580 | See the examples section earlier in this document for some actual |
|
|
581 | examples. |
|
|
582 | |
|
|
583 | $rpc->(..., $cb->(...)) |
|
|
584 | The RPC object returned by "AnyEvent::Fork::RPC::run" is actually a |
|
|
585 | code reference. There are two things you can do with it: call it, |
|
|
586 | and let it go out of scope (let it get destroyed). |
|
|
587 | |
|
|
588 | If "async" was false when $rpc was created (the default), then, if |
|
|
589 | you call $rpc, the $function is invoked with all arguments passed to |
|
|
590 | $rpc except the last one (the callback). When the function returns, |
|
|
591 | the callback will be invoked with all the return values. |
|
|
592 | |
|
|
593 | If "async" was true, then the $function receives an additional |
|
|
594 | initial argument, the result callback. In this case, returning from |
|
|
595 | $function does nothing - the function only counts as "done" when the |
|
|
596 | result callback is called, and any arguments passed to it are |
|
|
597 | considered the return values. This makes it possible to "return" |
|
|
598 | from event handlers or e.g. Coro threads. |
|
|
599 | |
|
|
600 | The other thing that can be done with the RPC object is to destroy |
|
|
601 | it. In this case, the child process will execute all remaining RPC |
|
|
602 | calls, report their results, and then exit. |
|
|
603 | |
|
|
604 | See the examples section earlier in this document for some actual |
|
|
605 | examples. |
|
|
606 | |
|
|
607 | CHILD PROCESS USAGE |
|
|
608 | The following function is not available in this module. They are only |
|
|
609 | available in the namespace of this module when the child is running, |
|
|
610 | without having to load any extra modules. They are part of the |
|
|
611 | child-side API of AnyEvent::Fork::RPC. |
|
|
612 | |
|
|
613 | AnyEvent::Fork::RPC::event ... |
|
|
614 | Send an event to the parent. Events are a bit like RPC calls made by |
|
|
615 | the child process to the parent, except that there is no notion of |
|
|
616 | return values. |
|
|
617 | |
|
|
618 | See the examples section earlier in this document for some actual |
|
|
619 | examples. |
|
|
620 | |
|
|
621 | PROCESS EXIT |
|
|
622 | If and when the child process exits depends on the backend and |
|
|
623 | configuration. Apart from explicit exits (e.g. by calling "exit") or |
|
|
624 | runtime conditions (uncaught exceptions, signals etc.), the backends |
|
|
625 | exit under these conditions: |
|
|
626 | |
|
|
627 | Synchronous Backend |
|
|
628 | The synchronous backend is very simple: when the process waits for |
|
|
629 | another request to arrive and the writing side (usually in the |
|
|
630 | parent) is closed, it will exit normally, i.e. as if your main |
|
|
631 | program reached the end of the file. |
|
|
632 | |
|
|
633 | That means that if your parent process exits, the RPC process will |
|
|
634 | usually exit as well, either because it is idle anyway, or because |
|
|
635 | it executes a request. In the latter case, you will likely get an |
|
|
636 | error when the RPc process tries to send the results to the parent |
|
|
637 | (because agruably, you shouldn't exit your parent while there are |
|
|
638 | still outstanding requests). |
|
|
639 | |
|
|
640 | The process is usually quiescent when it happens, so it should |
|
|
641 | rarely be a problem, and "END" handlers can be used to clean up. |
|
|
642 | |
|
|
643 | Asynchronous Backend |
|
|
644 | For the asynchronous backend, things are more complicated: Whenever |
|
|
645 | it listens for another request by the parent, it might detect that |
|
|
646 | the socket was closed (e.g. because the parent exited). It will sotp |
|
|
647 | listening for new requests and instead try to write out any |
|
|
648 | remaining data (if any) or simply check whether the socket can be |
|
|
649 | written to. After this, the RPC process is effectively done - no new |
|
|
650 | requests are incoming, no outstanding request data can be written |
|
|
651 | back. |
|
|
652 | |
|
|
653 | Since chances are high that there are event watchers that the RPC |
|
|
654 | server knows nothing about (why else would one use the async backend |
|
|
655 | if not for the ability to register watchers?), the event loop would |
|
|
656 | often happily continue. |
|
|
657 | |
|
|
658 | This is why the asynchronous backend explicitly calls "CORE::exit" |
|
|
659 | when it is done (under other circumstances, such as when there is an |
|
|
660 | I/O error and there is outstanding data to write, it will log a |
|
|
661 | fatal message via AnyEvent::Log, also causing the program to exit). |
|
|
662 | |
|
|
663 | You can override this by specifying a function name to call via the |
|
|
664 | "done" parameter instead. |
|
|
665 | |
|
|
666 | ADVANCED TOPICS |
|
|
667 | Choosing a backend |
|
|
668 | So how do you decide which backend to use? Well, that's your problem to |
|
|
669 | solve, but here are some thoughts on the matter: |
|
|
670 | |
|
|
671 | Synchronous |
|
|
672 | The synchronous backend does not rely on any external modules (well, |
|
|
673 | except common::sense, which works around a bug in how perl's warning |
|
|
674 | system works). This keeps the process very small, for example, on my |
|
|
675 | system, an empty perl interpreter uses 1492kB RSS, which becomes |
|
|
676 | 2020kB after "use warnings; use strict" (for people who grew up with |
|
|
677 | C64s around them this is probably shocking every single time they |
|
|
678 | see it). The worker process in the first example in this document |
|
|
679 | uses 1792kB. |
|
|
680 | |
|
|
681 | Since the calls are done synchronously, slow jobs will keep newer |
|
|
682 | jobs from executing. |
|
|
683 | |
|
|
684 | The synchronous backend also has no overhead due to running an event |
|
|
685 | loop - reading requests is therefore very efficient, while writing |
|
|
686 | responses is less so, as every response results in a write syscall. |
|
|
687 | |
|
|
688 | If the parent process is busy and a bit slow reading responses, the |
|
|
689 | child waits instead of processing further requests. This also limits |
|
|
690 | the amount of memory needed for buffering, as never more than one |
|
|
691 | response has to be buffered. |
|
|
692 | |
|
|
693 | The API in the child is simple - you just have to define a function |
|
|
694 | that does something and returns something. |
|
|
695 | |
|
|
696 | It's hard to use modules or code that relies on an event loop, as |
|
|
697 | the child cannot execute anything while it waits for more input. |
|
|
698 | |
|
|
699 | Asynchronous |
|
|
700 | The asynchronous backend relies on AnyEvent, which tries to be |
|
|
701 | small, but still comes at a price: On my system, the worker from |
|
|
702 | example 1a uses 3420kB RSS (for AnyEvent, which loads EV, which |
|
|
703 | needs XSLoader which in turn loads a lot of other modules such as |
|
|
704 | warnings, strict, vars, Exporter...). |
|
|
705 | |
|
|
706 | It batches requests and responses reasonably efficiently, doing only |
|
|
707 | as few reads and writes as needed, but needs to poll for events via |
|
|
708 | the event loop. |
|
|
709 | |
|
|
710 | Responses are queued when the parent process is busy. This means the |
|
|
711 | child can continue to execute any queued requests. It also means |
|
|
712 | that a child might queue a lot of responses in memory when it |
|
|
713 | generates them and the parent process is slow accepting them. |
|
|
714 | |
|
|
715 | The API is not a straightforward RPC pattern - you have to call a |
|
|
716 | "done" callback to pass return values and signal completion. Also, |
|
|
717 | more importantly, the API starts jobs as fast as possible - when |
|
|
718 | 1000 jobs are queued and the jobs are slow, they will all run |
|
|
719 | concurrently. The child must implement some queueing/limiting |
|
|
720 | mechanism if this causes problems. Alternatively, the parent could |
|
|
721 | limit the amount of rpc calls that are outstanding. |
|
|
722 | |
|
|
723 | Blocking use of condvars is not supported (in the main thread, |
|
|
724 | outside of e.g. Coro threads). |
|
|
725 | |
|
|
726 | Using event-based modules such as IO::AIO, Gtk2, Tk and so on is |
|
|
727 | easy. |
|
|
728 | |
|
|
729 | Passing file descriptors |
|
|
730 | Unlike AnyEvent::Fork, this module has no in-built file handle or file |
|
|
731 | descriptor passing abilities. |
|
|
732 | |
|
|
733 | The reason is that passing file descriptors is extraordinary tricky |
|
|
734 | business, and conflicts with efficient batching of messages. |
|
|
735 | |
|
|
736 | There still is a method you can use: Create a |
|
|
737 | "AnyEvent::Util::portable_socketpair" and "send_fh" one half of it to |
|
|
738 | the process before you pass control to "AnyEvent::Fork::RPC::run". |
|
|
739 | |
|
|
740 | Whenever you want to pass a file descriptor, send an rpc request to the |
|
|
741 | child process (so it expects the descriptor), then send it over the |
|
|
742 | other half of the socketpair. The child should fetch the descriptor from |
|
|
743 | the half it has passed earlier. |
|
|
744 | |
|
|
745 | Here is some (untested) pseudocode to that effect: |
|
|
746 | |
|
|
747 | use AnyEvent::Util; |
|
|
748 | use AnyEvent::Fork; |
|
|
749 | use AnyEvent::Fork::RPC; |
|
|
750 | use IO::FDPass; |
|
|
751 | |
|
|
752 | my ($s1, $s2) = AnyEvent::Util::portable_socketpair; |
|
|
753 | |
|
|
754 | my $rpc = AnyEvent::Fork |
|
|
755 | ->new |
|
|
756 | ->send_fh ($s2) |
|
|
757 | ->require ("MyWorker") |
|
|
758 | ->AnyEvent::Fork::RPC::run ("MyWorker::run" |
|
|
759 | init => "MyWorker::init", |
|
|
760 | ); |
|
|
761 | |
|
|
762 | undef $s2; # no need to keep it around |
|
|
763 | |
|
|
764 | # pass an fd |
|
|
765 | $rpc->("i'll send some fd now, please expect it!", my $cv = AE::cv); |
|
|
766 | |
|
|
767 | IO::FDPass fileno $s1, fileno $handle_to_pass; |
|
|
768 | |
|
|
769 | $cv->recv; |
|
|
770 | |
|
|
771 | The MyWorker module could look like this: |
|
|
772 | |
|
|
773 | package MyWorker; |
|
|
774 | |
|
|
775 | use IO::FDPass; |
|
|
776 | |
|
|
777 | my $s2; |
|
|
778 | |
|
|
779 | sub init { |
|
|
780 | $s2 = $_[0]; |
|
|
781 | } |
|
|
782 | |
|
|
783 | sub run { |
|
|
784 | if ($_[0] eq "i'll send some fd now, please expect it!") { |
|
|
785 | my $fd = IO::FDPass::recv fileno $s2; |
|
|
786 | ... |
|
|
787 | } |
|
|
788 | } |
|
|
789 | |
|
|
790 | Of course, this might be blocking if you pass a lot of file descriptors, |
|
|
791 | so you might want to look into AnyEvent::FDpasser which can handle the |
|
|
792 | gory details. |
|
|
793 | |
|
|
794 | EXCEPTIONS |
|
|
795 | There are no provisions whatsoever for catching exceptions at this time |
|
|
796 | - in the child, exceptions might kill the process, causing calls to be |
|
|
797 | lost and the parent encountering a fatal error. In the parent, |
|
|
798 | exceptions in the result callback will not be caught and cause undefined |
|
|
799 | behaviour. |
|
|
800 | |
|
|
801 | SEE ALSO |
|
|
802 | AnyEvent::Fork, to create the processes in the first place. |
|
|
803 | |
|
|
804 | AnyEvent::Fork::Remote, likewise, but helpful for remote processes. |
|
|
805 | |
|
|
806 | AnyEvent::Fork::Pool, to manage whole pools of processes. |
|
|
807 | |
|
|
808 | AUTHOR AND CONTACT INFORMATION |
|
|
809 | Marc Lehmann <schmorp@schmorp.de> |
|
|
810 | http://software.schmorp.de/pkg/AnyEvent-Fork-RPC |
|
|
811 | |