ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Coro-Multicore/perlmulticore.h
Revision: 1.1
Committed: Sat Jun 27 19:32:15 2015 UTC (8 years, 11 months ago) by root
Content type: text/plain
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 TODO: if () acquire example
2     /*
3     * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
4     * License: public domain, or where this is not possible/at your option,
5     * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
6     */
7    
8     #ifndef PERL_MULTICORE_H
9     #define PERL_MULTICORE_H
10    
11     #if 0
12    
13     =head1 NAME
14    
15     perlmulticore.h - release the perl interpreter for other uses while doing hard work
16    
17     =head1 SYNOPSIS
18    
19     #include "perlmultiore.h"
20    
21     // in your XS function:
22    
23     perlinterp_release ();
24     do_the_C_thing ();
25     perlinterp_acquire ();
26    
27     =head1 DESCRIPTION
28    
29     This header file implements a simple mechanism for XS modules to allow
30     re-use of the perl interpreter for other threads while doing some lengthy
31     operation, such as cryptography, SQL queries, disk I/O and so on.
32    
33     The design goals for this mechanism were to be simple to use, very
34     efficient when not needed, low code and data size overhead and broad
35     applicability.
36    
37    
38     =head1 HOW DO I USE THIS IN MY MODULES?
39    
40     The suage is very simple - you include this header file in your XS module. Then, before you
41     do your lengthy operation, you release the perl interpreter:
42    
43     perlinterp_release ();
44    
45     And when you are done with your computation, you acquire it again:
46    
47     perlinterp_acquire ();
48    
49     And that's it. This doesn't load any modules and consists of only a few
50     machine instructions when no module tot ake advantage of it is loaded.
51    
52     Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
53     perl's built-in C<flock>, it allows other threads (for example, those
54     provided by L<Coro>) to execute, instead of blocking the whole perl
55     interpreter. For the sake of this example, it requires a file descriptor
56     instead of a handle.
57    
58     #include "perlmulticore.h" /* this header file */
59    
60     // and in the XS portion
61     int flock (int fd, int operation)
62     CODE:
63     perlinterp_release ();
64     RETVAL = flock (fd, operation);
65     perlinterp_acquire ();
66     OUTPUT:
67     RETVAL
68    
69     Another example would be to modify L<DBD::mysql> to allow other
70     threads to execute while executing SQL queries. One way to do this
71     is find all C<mysql_st_internal_execute> and similar calls (such as
72     C<mysql_st_internal_execute41>), and adorn them with release/acquire
73     calls:
74    
75     {
76     perlinterp_release ();
77     imp_sth->row_num= mysql_st_internal_execute(sth, ...);
78     perlinterp_acquire ();
79     }
80    
81     =head2 HOW ABOUT NOT-SO LONG WORK?
82    
83     Sometimes you don't know how long your code will take - in a compression
84     library for example, compressing a few hundred Kilobyte of data can take
85     a while, while 50 Bytes will comptess so fast that even attempting to do
86     something else could be more costly than just doing it.
87    
88     This is a very hard problem to solve. The best you can do at the moment is
89     to release the perl interpreter only when you think the work to be done
90     justifies the expense.
91    
92     As a rule of thumb, if you expect to need more than a few thousand cycles,
93     you should release the interpreter, else you shouldn't. When in doubt,
94     release.
95    
96     For example, in a compression library, you might want to do this:
97    
98     if (bytes_to_be_compressed > 2000) perlinterp_release ();
99     do_compress (...);
100     if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
101    
102     Make sure the if conditions are exactly the same and don't change, so you
103     always call acquire when you release, and vice versa.
104    
105     When you don't have a handy indicator, you might still do something
106     useful. For example, if you do some file locking with C<fcntl> and you
107     expect the lock to be available immediatelly in most cases, you could try
108     with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
109     the lock couldn't be set:
110    
111     int res = fcntl (fd, F_SETLK, &flock);
112    
113     if (res)
114     {
115     // error, assume lock is held by another process and do it the slow way
116     perlinterp_release ();
117     res = fcntl (fd, F_SETLKW, &flock);
118     perlinterp_release ();
119     }
120    
121     =head1 THE HARD AND FAST RULES
122    
123     As with everything, there are a number of rules to follow.
124    
125     =over 4
126    
127     =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
128    
129     Possibly the most important rule of them all, anything perl is
130     completely off-limits after C<perlinterp_release>, until you call
131     C<perlinterp_acquire>, after which you can access perl stuff again.
132    
133     That includes anything in the perl interpreter that you didn't prove to be
134     safe, and didn't prove to be safe in older and future versions of perl:
135     global variables, local perl scalars, even if you are sure nobody accesses
136     them and you only try to "read" their value, and so on.
137    
138     If you need to access perl things, do it before releasing the
139     interpreter with C<perlinterp_release>, or after acquiring it again with
140     C<perlinterp_acquire>.
141    
142     =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
143    
144     For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
145     call. They don't have to be in the same function, and you can have
146     multiple calls to them, as long as every C<perlinterp_release> call is
147     followed by exactly one C<perlinterp_acquire> call.
148    
149     For example., this would be fine:
150    
151     perlinterp_release ();
152    
153     if (!function_that_fails_with_0_return_value ())
154     {
155     perlinterp_acquire ();
156     croak ("error");
157     }
158    
159     perlinterp_acquire ();
160     // do other stuff
161    
162     =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
163    
164     That simply means that after calling C<perlinterp_release>, you must
165     call C<perlinterp_acquire> before calling C<perlinterp_release>
166     again. Likewise, after C<perlinterp_acquire>, you can call
167     C<perlinterp_release> but not another C<perlinterp_acquire>.
168    
169     =item I<Always> call C<perlinterp_release> first.
170    
171     Also simple: you I<must not> call C<perlinterp_acquire> without having
172     called C<perlinterp_release> before.
173    
174     =item I<Never> underestimate threads.
175    
176     While it's easy to add parallel execution ability to your XS module, it
177     doesn't mean it is safe. After you release the perl interpreter, it's
178     perfectly possible that it will call your XS function in another thread,
179     even while your original function still executes. In other words: your C
180     code must be thread safe, and if you use any library, that library must be
181     thread-safe, too.
182    
183     Always assume that the code between C<perlinterp_release> and
184     C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
185     time. If your code can't cope with that, you could consider using a mutex
186     to only allow one such execution, which is sitll better than blocking
187     everybody else from doing anything:
188    
189     static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
190    
191     perlinterp_release ();
192     pthread_mutex_lock (&my_mutex);
193     do_your_non_thread_safe_thing ();
194     pthread_mutex_unlock (&my_mutex);
195     perlinterp_acquire ();
196    
197     This isn't as trivial as it looks though, as you need to find out which
198     threading system is in use (with L<Coro::Multicore>, it currently is
199     always pthreads).
200    
201     =item I<Don't> get confused by having to release first.
202    
203     In many real world scenarios, you acquire a resource, do something, then
204     release it again. Don't let this confuse you, with this, you already own
205     the resource (the perl interpreter) so you have to I<release> first, and
206     I<acquire> it again later, not the other way around.
207    
208     =back
209    
210    
211     =head1 DESIGN PRINCIPLES
212    
213     This section discusses how the design goals were reached (you be the
214     judge), how it is implemented, and what overheads this implies.
215    
216     =over 4
217    
218     =item Simple to Use
219    
220     All you have to do is identify the place in your existing code where you
221     stop touching perl stuff, do your actual work, and strat touching perl
222     stuff again.
223    
224     Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
225     actual work code.
226    
227     You have to include F<perlmulticore.h> and distribute it with your XS
228     code, but all these things border on the trivial.
229    
230     =item Very Efficient
231    
232     The definition for C<perlinterp_release> and C<perlinterp_release> is very
233     short:
234    
235     #define perlinterp_release() perl_multicore_api->pmapi_release ()
236     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
237    
238     Both are macros that read a pointer from memory (perl_multicore_api),
239     dereference a function pointer stored at that place, and call the
240     function, which takes no arguments and returns nothing.
241    
242     The first call to C<perlinterp_release> will check for the presence
243     of any supporting module, and if none is loaded, will create a dummy
244     implementation where both C<pmapi_release> and C<pmapi_acquire> execute
245     this function:
246    
247     static void perl_multicore_nop (void) { }
248    
249     So in the case of no magical module being loaded, all calls except the
250     first are two memory accesses and a predictable function call of an empty
251     function.
252    
253     Of course, the overhead is much higher when these functions actually
254     implement anything useful, but you always get what you pay for.
255    
256     =item Low Code and Data Size Overhead
257    
258     On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
259     pointer) of your data segment, to store the C<perl_multicore_api>
260     pointer. In addition it creates a C<16> octet perl string to store the
261     function pointers in, and stores it in a hash provided by perl for this
262     purpose.
263    
264     This is pretty much the equivalent of executing this code:
265    
266     $existing_hash{perl_multicore_api} = "123456781234567812345678";
267    
268     And that's it, which is, as I think, indeed very little.
269    
270     As for code size, on my amd64 system, every call to C<perlinterp_release>
271     or C<perlinterp_acquire> results in a variation of the following 9-10
272     octet sequence:
273    
274     150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
275     157> callq *0x8(%rax)
276    
277     amd64 code sure is bloated.
278    
279     The biggest part if the initialisation code, which consists of 11 lines of
280     typical XS code. On my system, all the code in F<perlmulticore.h> compiles
281     to less than 160 octets of read-only data.
282    
283     =item Broad Applicability
284    
285     While there are alternative ways to achieve the goal of parallel execution
286     with threads that might be more efficient, this mechanism was chosen
287     because it is very simple to retrofit existing modules with it, and it
288    
289     The design goals for this mechanism were to be simple to use, very
290     efficient when not needed, low code and data size overhead and broad
291     applicability.
292    
293     =back
294    
295     =head1 AUTHOR
296    
297     Marc A. Lehmann <perlmulticore@schmorp.de>
298    
299     =head1 LICENSE
300    
301     The F<perlmulticore.h> is put into the public domain. Where this is legally
302     not possible, or at your option, it can be licensed under creativecommons
303     CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
304    
305     =cut
306    
307     #endif
308    
309     struct perl_multicore_api
310     {
311     void (*pmapi_release)(void);
312     void (*pmapi_acquire)(void);
313     };
314    
315     static void perl_multicore_init (void);
316    
317     const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
318    
319     static struct perl_multicore_api *perl_multicore_api
320     = (struct perl_multicore_api *)&perl_multicore_api_init;
321    
322     #define perlinterp_release() perl_multicore_api->pmapi_release ()
323     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
324    
325     static void
326     perl_multicore_nop (void)
327     {
328     }
329    
330     static void
331     perl_multicore_init (void)
332     {
333     dTHX;
334    
335     /* check for existing API struct in PL_modglobal */
336     SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
337    
338     if (SvPOKp (*api_svp))
339     perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
340     else
341     {
342     /* create a new one with a dummy nop implementation */
343     SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
344     SvCUR_set (api_sv, sizeof (*perl_multicore_api));
345     SvPOK_only (api_sv);
346     perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
347     perl_multicore_api->pmapi_release =
348     perl_multicore_api->pmapi_acquire = perl_multicore_nop;
349     *api_svp = api_sv;
350     }
351    
352     /* call the real (or dummy) implementation now */
353     perlinterp_release ();
354     }
355    
356     #endif