ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Coro-Multicore/perlmulticore.h
Revision: 1.7
Committed: Sun Jun 28 18:33:13 2015 UTC (8 years, 10 months ago) by root
Content type: text/plain
Branch: MAIN
Changes since 1.6: +9 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 /*
2     * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
3     * License: public domain, or where this is not possible/at your option,
4     * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
5     */
6    
7     #ifndef PERL_MULTICORE_H
8     #define PERL_MULTICORE_H
9    
10 root 1.3 /*
11 root 1.5
12 root 1.1 =head1 NAME
13    
14 root 1.6 perlmulticore.h - the Perl Multicore Specification and Implementation
15 root 1.1
16     =head1 SYNOPSIS
17    
18     #include "perlmultiore.h"
19    
20     // in your XS function:
21    
22     perlinterp_release ();
23     do_the_C_thing ();
24     perlinterp_acquire ();
25    
26     =head1 DESCRIPTION
27    
28     This header file implements a simple mechanism for XS modules to allow
29     re-use of the perl interpreter for other threads while doing some lengthy
30     operation, such as cryptography, SQL queries, disk I/O and so on.
31    
32     The design goals for this mechanism were to be simple to use, very
33     efficient when not needed, low code and data size overhead and broad
34     applicability.
35    
36 root 1.2 The newest version of this document can be found at
37     L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
38    
39 root 1.6 The newest version of the header file itself, which
40 root 1.2 includes this documentation, can be downloaded from
41     L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
42 root 1.1
43     =head1 HOW DO I USE THIS IN MY MODULES?
44    
45 root 1.2 The usage is very simple - you include this header file in your XS module. Then, before you
46 root 1.1 do your lengthy operation, you release the perl interpreter:
47    
48     perlinterp_release ();
49    
50     And when you are done with your computation, you acquire it again:
51    
52     perlinterp_acquire ();
53    
54     And that's it. This doesn't load any modules and consists of only a few
55 root 1.2 machine instructions when no module to take advantage of it is loaded.
56 root 1.1
57     Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
58     perl's built-in C<flock>, it allows other threads (for example, those
59     provided by L<Coro>) to execute, instead of blocking the whole perl
60     interpreter. For the sake of this example, it requires a file descriptor
61     instead of a handle.
62    
63 root 1.3 #include "perlmulticore.h" // this header file
64 root 1.1
65     // and in the XS portion
66     int flock (int fd, int operation)
67     CODE:
68     perlinterp_release ();
69     RETVAL = flock (fd, operation);
70     perlinterp_acquire ();
71     OUTPUT:
72     RETVAL
73    
74     Another example would be to modify L<DBD::mysql> to allow other
75     threads to execute while executing SQL queries. One way to do this
76     is find all C<mysql_st_internal_execute> and similar calls (such as
77     C<mysql_st_internal_execute41>), and adorn them with release/acquire
78     calls:
79    
80     {
81     perlinterp_release ();
82     imp_sth->row_num= mysql_st_internal_execute(sth, ...);
83     perlinterp_acquire ();
84     }
85    
86     =head2 HOW ABOUT NOT-SO LONG WORK?
87    
88     Sometimes you don't know how long your code will take - in a compression
89     library for example, compressing a few hundred Kilobyte of data can take
90 root 1.2 a while, while 50 Bytes will compress so fast that even attempting to do
91 root 1.1 something else could be more costly than just doing it.
92    
93     This is a very hard problem to solve. The best you can do at the moment is
94     to release the perl interpreter only when you think the work to be done
95     justifies the expense.
96    
97     As a rule of thumb, if you expect to need more than a few thousand cycles,
98     you should release the interpreter, else you shouldn't. When in doubt,
99     release.
100    
101     For example, in a compression library, you might want to do this:
102    
103     if (bytes_to_be_compressed > 2000) perlinterp_release ();
104     do_compress (...);
105     if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
106    
107     Make sure the if conditions are exactly the same and don't change, so you
108     always call acquire when you release, and vice versa.
109    
110     When you don't have a handy indicator, you might still do something
111     useful. For example, if you do some file locking with C<fcntl> and you
112 root 1.2 expect the lock to be available immediately in most cases, you could try
113 root 1.1 with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
114     the lock couldn't be set:
115    
116     int res = fcntl (fd, F_SETLK, &flock);
117    
118     if (res)
119     {
120     // error, assume lock is held by another process and do it the slow way
121     perlinterp_release ();
122     res = fcntl (fd, F_SETLKW, &flock);
123 root 1.6 perlinterp_acquire ();
124 root 1.1 }
125    
126     =head1 THE HARD AND FAST RULES
127    
128     As with everything, there are a number of rules to follow.
129    
130     =over 4
131    
132     =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
133    
134     Possibly the most important rule of them all, anything perl is
135     completely off-limits after C<perlinterp_release>, until you call
136     C<perlinterp_acquire>, after which you can access perl stuff again.
137    
138     That includes anything in the perl interpreter that you didn't prove to be
139     safe, and didn't prove to be safe in older and future versions of perl:
140     global variables, local perl scalars, even if you are sure nobody accesses
141     them and you only try to "read" their value, and so on.
142    
143     If you need to access perl things, do it before releasing the
144     interpreter with C<perlinterp_release>, or after acquiring it again with
145     C<perlinterp_acquire>.
146    
147     =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
148    
149     For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
150     call. They don't have to be in the same function, and you can have
151     multiple calls to them, as long as every C<perlinterp_release> call is
152     followed by exactly one C<perlinterp_acquire> call.
153    
154     For example., this would be fine:
155    
156     perlinterp_release ();
157    
158     if (!function_that_fails_with_0_return_value ())
159     {
160     perlinterp_acquire ();
161     croak ("error");
162 root 1.2 // croak doesn't return
163 root 1.1 }
164    
165     perlinterp_acquire ();
166     // do other stuff
167    
168     =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
169    
170     That simply means that after calling C<perlinterp_release>, you must
171     call C<perlinterp_acquire> before calling C<perlinterp_release>
172     again. Likewise, after C<perlinterp_acquire>, you can call
173     C<perlinterp_release> but not another C<perlinterp_acquire>.
174    
175     =item I<Always> call C<perlinterp_release> first.
176    
177     Also simple: you I<must not> call C<perlinterp_acquire> without having
178     called C<perlinterp_release> before.
179    
180     =item I<Never> underestimate threads.
181    
182     While it's easy to add parallel execution ability to your XS module, it
183     doesn't mean it is safe. After you release the perl interpreter, it's
184     perfectly possible that it will call your XS function in another thread,
185     even while your original function still executes. In other words: your C
186     code must be thread safe, and if you use any library, that library must be
187     thread-safe, too.
188    
189     Always assume that the code between C<perlinterp_release> and
190     C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
191     time. If your code can't cope with that, you could consider using a mutex
192 root 1.2 to only allow one such execution, which is still better than blocking
193 root 1.1 everybody else from doing anything:
194    
195     static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
196    
197     perlinterp_release ();
198     pthread_mutex_lock (&my_mutex);
199     do_your_non_thread_safe_thing ();
200     pthread_mutex_unlock (&my_mutex);
201     perlinterp_acquire ();
202    
203     =item I<Don't> get confused by having to release first.
204    
205     In many real world scenarios, you acquire a resource, do something, then
206     release it again. Don't let this confuse you, with this, you already own
207     the resource (the perl interpreter) so you have to I<release> first, and
208     I<acquire> it again later, not the other way around.
209    
210     =back
211    
212    
213     =head1 DESIGN PRINCIPLES
214    
215     This section discusses how the design goals were reached (you be the
216     judge), how it is implemented, and what overheads this implies.
217    
218     =over 4
219    
220     =item Simple to Use
221    
222     All you have to do is identify the place in your existing code where you
223 root 1.2 stop touching perl stuff, do your actual work, and start touching perl
224 root 1.1 stuff again.
225    
226     Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
227     actual work code.
228    
229     You have to include F<perlmulticore.h> and distribute it with your XS
230     code, but all these things border on the trivial.
231    
232     =item Very Efficient
233    
234     The definition for C<perlinterp_release> and C<perlinterp_release> is very
235     short:
236    
237     #define perlinterp_release() perl_multicore_api->pmapi_release ()
238     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
239    
240     Both are macros that read a pointer from memory (perl_multicore_api),
241     dereference a function pointer stored at that place, and call the
242     function, which takes no arguments and returns nothing.
243    
244     The first call to C<perlinterp_release> will check for the presence
245     of any supporting module, and if none is loaded, will create a dummy
246     implementation where both C<pmapi_release> and C<pmapi_acquire> execute
247     this function:
248    
249     static void perl_multicore_nop (void) { }
250    
251     So in the case of no magical module being loaded, all calls except the
252     first are two memory accesses and a predictable function call of an empty
253     function.
254    
255     Of course, the overhead is much higher when these functions actually
256     implement anything useful, but you always get what you pay for.
257    
258 root 1.4 With L<Coro::Multicore>, every release/acquire involves two pthread
259     switches, two coro thread switches, a bunch of syscalls, and sometimes
260     interacting with the event loop.
261    
262     A dedicated thread pool such as the one L<IO::AIO> uses could reduce
263     these overheads, and would also reduce the dependencies (L<AnyEvent> is a
264     smaller and more portable dependency than L<Coro>), but it would require a
265     lot more work on the side of the module author wanting to support it than
266     this solution.
267    
268 root 1.1 =item Low Code and Data Size Overhead
269    
270     On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
271     pointer) of your data segment, to store the C<perl_multicore_api>
272     pointer. In addition it creates a C<16> octet perl string to store the
273     function pointers in, and stores it in a hash provided by perl for this
274     purpose.
275    
276     This is pretty much the equivalent of executing this code:
277    
278     $existing_hash{perl_multicore_api} = "123456781234567812345678";
279    
280     And that's it, which is, as I think, indeed very little.
281    
282     As for code size, on my amd64 system, every call to C<perlinterp_release>
283     or C<perlinterp_acquire> results in a variation of the following 9-10
284     octet sequence:
285    
286     150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
287     157> callq *0x8(%rax)
288    
289     The biggest part if the initialisation code, which consists of 11 lines of
290     typical XS code. On my system, all the code in F<perlmulticore.h> compiles
291     to less than 160 octets of read-only data.
292    
293     =item Broad Applicability
294    
295     While there are alternative ways to achieve the goal of parallel execution
296     with threads that might be more efficient, this mechanism was chosen
297     because it is very simple to retrofit existing modules with it, and it
298    
299     The design goals for this mechanism were to be simple to use, very
300     efficient when not needed, low code and data size overhead and broad
301     applicability.
302    
303     =back
304    
305     =head1 AUTHOR
306    
307     Marc A. Lehmann <perlmulticore@schmorp.de>
308 root 1.6 http://perlmulticore.schmorp.de/
309 root 1.1
310     =head1 LICENSE
311    
312 root 1.6 The F<perlmulticore.h> header file is put into the public
313     domain. Where this is legally not possible, or at your
314     option, it can be licensed under creativecommons CC0
315     license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
316 root 1.1
317     =cut
318 root 1.6
319 root 1.3 */
320 root 1.1
321 root 1.7 #if PERL_MULTICORE_DISABLE
322    
323     #define perlinterp_release() do { } while (0)
324     #define perlinterp_acquire() do { } while (0)
325    
326     #else
327    
328 root 1.6 /* this struct is shared between all modules, and currently */
329     /* contain only the two function pointers for release/acquire */
330 root 1.1 struct perl_multicore_api
331     {
332     void (*pmapi_release)(void);
333     void (*pmapi_acquire)(void);
334     };
335    
336     static void perl_multicore_init (void);
337    
338     const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
339    
340     static struct perl_multicore_api *perl_multicore_api
341     = (struct perl_multicore_api *)&perl_multicore_api_init;
342    
343     #define perlinterp_release() perl_multicore_api->pmapi_release ()
344     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
345    
346 root 1.6 /* this is the release/acquire implementation used as fallback */
347 root 1.1 static void
348     perl_multicore_nop (void)
349     {
350     }
351    
352 root 1.6 /* this is the initial implementation of "release" - it initialises */
353     /* the api and then calls the real release function */
354 root 1.1 static void
355     perl_multicore_init (void)
356     {
357     dTHX;
358    
359     /* check for existing API struct in PL_modglobal */
360     SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
361    
362     if (SvPOKp (*api_svp))
363     perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
364     else
365     {
366     /* create a new one with a dummy nop implementation */
367     SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
368     SvCUR_set (api_sv, sizeof (*perl_multicore_api));
369     SvPOK_only (api_sv);
370     perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
371     perl_multicore_api->pmapi_release =
372     perl_multicore_api->pmapi_acquire = perl_multicore_nop;
373     *api_svp = api_sv;
374     }
375    
376     /* call the real (or dummy) implementation now */
377     perlinterp_release ();
378     }
379    
380     #endif
381 root 1.7
382     #endif