ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Compress-LZF/perlmulticore.h
Revision: 1.1
Committed: Sat Jun 27 19:53:44 2015 UTC (8 years, 10 months ago) by root
Content type: text/plain
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 /*
2     * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
3     * License: public domain, or where this is not possible/at your option,
4     * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
5     */
6    
7     #ifndef PERL_MULTICORE_H
8     #define PERL_MULTICORE_H
9    
10     /*
11     =head1 NAME
12    
13     perlmulticore.h - release the perl interpreter for other uses while doing hard work
14    
15     =head1 SYNOPSIS
16    
17     #include "perlmultiore.h"
18    
19     // in your XS function:
20    
21     perlinterp_release ();
22     do_the_C_thing ();
23     perlinterp_acquire ();
24    
25     =head1 DESCRIPTION
26    
27     This header file implements a simple mechanism for XS modules to allow
28     re-use of the perl interpreter for other threads while doing some lengthy
29     operation, such as cryptography, SQL queries, disk I/O and so on.
30    
31     The design goals for this mechanism were to be simple to use, very
32     efficient when not needed, low code and data size overhead and broad
33     applicability.
34    
35     The newest version of this document can be found at
36     L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
37    
38     The nwest version of the header fgile itself, which
39     includes this documentation, can be downloaded from
40     L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
41    
42     =head1 HOW DO I USE THIS IN MY MODULES?
43    
44     The usage is very simple - you include this header file in your XS module. Then, before you
45     do your lengthy operation, you release the perl interpreter:
46    
47     perlinterp_release ();
48    
49     And when you are done with your computation, you acquire it again:
50    
51     perlinterp_acquire ();
52    
53     And that's it. This doesn't load any modules and consists of only a few
54     machine instructions when no module to take advantage of it is loaded.
55    
56     Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
57     perl's built-in C<flock>, it allows other threads (for example, those
58     provided by L<Coro>) to execute, instead of blocking the whole perl
59     interpreter. For the sake of this example, it requires a file descriptor
60     instead of a handle.
61    
62     #include "perlmulticore.h" // this header file
63    
64     // and in the XS portion
65     int flock (int fd, int operation)
66     CODE:
67     perlinterp_release ();
68     RETVAL = flock (fd, operation);
69     perlinterp_acquire ();
70     OUTPUT:
71     RETVAL
72    
73     Another example would be to modify L<DBD::mysql> to allow other
74     threads to execute while executing SQL queries. One way to do this
75     is find all C<mysql_st_internal_execute> and similar calls (such as
76     C<mysql_st_internal_execute41>), and adorn them with release/acquire
77     calls:
78    
79     {
80     perlinterp_release ();
81     imp_sth->row_num= mysql_st_internal_execute(sth, ...);
82     perlinterp_acquire ();
83     }
84    
85     =head2 HOW ABOUT NOT-SO LONG WORK?
86    
87     Sometimes you don't know how long your code will take - in a compression
88     library for example, compressing a few hundred Kilobyte of data can take
89     a while, while 50 Bytes will compress so fast that even attempting to do
90     something else could be more costly than just doing it.
91    
92     This is a very hard problem to solve. The best you can do at the moment is
93     to release the perl interpreter only when you think the work to be done
94     justifies the expense.
95    
96     As a rule of thumb, if you expect to need more than a few thousand cycles,
97     you should release the interpreter, else you shouldn't. When in doubt,
98     release.
99    
100     For example, in a compression library, you might want to do this:
101    
102     if (bytes_to_be_compressed > 2000) perlinterp_release ();
103     do_compress (...);
104     if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
105    
106     Make sure the if conditions are exactly the same and don't change, so you
107     always call acquire when you release, and vice versa.
108    
109     When you don't have a handy indicator, you might still do something
110     useful. For example, if you do some file locking with C<fcntl> and you
111     expect the lock to be available immediately in most cases, you could try
112     with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
113     the lock couldn't be set:
114    
115     int res = fcntl (fd, F_SETLK, &flock);
116    
117     if (res)
118     {
119     // error, assume lock is held by another process and do it the slow way
120     perlinterp_release ();
121     res = fcntl (fd, F_SETLKW, &flock);
122     perlinterp_release ();
123     }
124    
125     =head1 THE HARD AND FAST RULES
126    
127     As with everything, there are a number of rules to follow.
128    
129     =over 4
130    
131     =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
132    
133     Possibly the most important rule of them all, anything perl is
134     completely off-limits after C<perlinterp_release>, until you call
135     C<perlinterp_acquire>, after which you can access perl stuff again.
136    
137     That includes anything in the perl interpreter that you didn't prove to be
138     safe, and didn't prove to be safe in older and future versions of perl:
139     global variables, local perl scalars, even if you are sure nobody accesses
140     them and you only try to "read" their value, and so on.
141    
142     If you need to access perl things, do it before releasing the
143     interpreter with C<perlinterp_release>, or after acquiring it again with
144     C<perlinterp_acquire>.
145    
146     =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
147    
148     For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
149     call. They don't have to be in the same function, and you can have
150     multiple calls to them, as long as every C<perlinterp_release> call is
151     followed by exactly one C<perlinterp_acquire> call.
152    
153     For example., this would be fine:
154    
155     perlinterp_release ();
156    
157     if (!function_that_fails_with_0_return_value ())
158     {
159     perlinterp_acquire ();
160     croak ("error");
161     // croak doesn't return
162     }
163    
164     perlinterp_acquire ();
165     // do other stuff
166    
167     =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
168    
169     That simply means that after calling C<perlinterp_release>, you must
170     call C<perlinterp_acquire> before calling C<perlinterp_release>
171     again. Likewise, after C<perlinterp_acquire>, you can call
172     C<perlinterp_release> but not another C<perlinterp_acquire>.
173    
174     =item I<Always> call C<perlinterp_release> first.
175    
176     Also simple: you I<must not> call C<perlinterp_acquire> without having
177     called C<perlinterp_release> before.
178    
179     =item I<Never> underestimate threads.
180    
181     While it's easy to add parallel execution ability to your XS module, it
182     doesn't mean it is safe. After you release the perl interpreter, it's
183     perfectly possible that it will call your XS function in another thread,
184     even while your original function still executes. In other words: your C
185     code must be thread safe, and if you use any library, that library must be
186     thread-safe, too.
187    
188     Always assume that the code between C<perlinterp_release> and
189     C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
190     time. If your code can't cope with that, you could consider using a mutex
191     to only allow one such execution, which is still better than blocking
192     everybody else from doing anything:
193    
194     static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
195    
196     perlinterp_release ();
197     pthread_mutex_lock (&my_mutex);
198     do_your_non_thread_safe_thing ();
199     pthread_mutex_unlock (&my_mutex);
200     perlinterp_acquire ();
201    
202     This isn't as trivial as it looks though, as you need to find out which
203     threading system is in use (with L<Coro::Multicore>, it currently is
204     always pthreads).
205    
206     =item I<Don't> get confused by having to release first.
207    
208     In many real world scenarios, you acquire a resource, do something, then
209     release it again. Don't let this confuse you, with this, you already own
210     the resource (the perl interpreter) so you have to I<release> first, and
211     I<acquire> it again later, not the other way around.
212    
213     =back
214    
215    
216     =head1 DESIGN PRINCIPLES
217    
218     This section discusses how the design goals were reached (you be the
219     judge), how it is implemented, and what overheads this implies.
220    
221     =over 4
222    
223     =item Simple to Use
224    
225     All you have to do is identify the place in your existing code where you
226     stop touching perl stuff, do your actual work, and start touching perl
227     stuff again.
228    
229     Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
230     actual work code.
231    
232     You have to include F<perlmulticore.h> and distribute it with your XS
233     code, but all these things border on the trivial.
234    
235     =item Very Efficient
236    
237     The definition for C<perlinterp_release> and C<perlinterp_release> is very
238     short:
239    
240     #define perlinterp_release() perl_multicore_api->pmapi_release ()
241     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
242    
243     Both are macros that read a pointer from memory (perl_multicore_api),
244     dereference a function pointer stored at that place, and call the
245     function, which takes no arguments and returns nothing.
246    
247     The first call to C<perlinterp_release> will check for the presence
248     of any supporting module, and if none is loaded, will create a dummy
249     implementation where both C<pmapi_release> and C<pmapi_acquire> execute
250     this function:
251    
252     static void perl_multicore_nop (void) { }
253    
254     So in the case of no magical module being loaded, all calls except the
255     first are two memory accesses and a predictable function call of an empty
256     function.
257    
258     Of course, the overhead is much higher when these functions actually
259     implement anything useful, but you always get what you pay for.
260    
261     With L<Coro::Multicore>, every release/acquire involves two pthread
262     switches, two coro thread switches, a bunch of syscalls, and sometimes
263     interacting with the event loop.
264    
265     A dedicated thread pool such as the one L<IO::AIO> uses could reduce
266     these overheads, and would also reduce the dependencies (L<AnyEvent> is a
267     smaller and more portable dependency than L<Coro>), but it would require a
268     lot more work on the side of the module author wanting to support it than
269     this solution.
270    
271     =item Low Code and Data Size Overhead
272    
273     On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
274     pointer) of your data segment, to store the C<perl_multicore_api>
275     pointer. In addition it creates a C<16> octet perl string to store the
276     function pointers in, and stores it in a hash provided by perl for this
277     purpose.
278    
279     This is pretty much the equivalent of executing this code:
280    
281     $existing_hash{perl_multicore_api} = "123456781234567812345678";
282    
283     And that's it, which is, as I think, indeed very little.
284    
285     As for code size, on my amd64 system, every call to C<perlinterp_release>
286     or C<perlinterp_acquire> results in a variation of the following 9-10
287     octet sequence:
288    
289     150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
290     157> callq *0x8(%rax)
291    
292     The biggest part if the initialisation code, which consists of 11 lines of
293     typical XS code. On my system, all the code in F<perlmulticore.h> compiles
294     to less than 160 octets of read-only data.
295    
296     =item Broad Applicability
297    
298     While there are alternative ways to achieve the goal of parallel execution
299     with threads that might be more efficient, this mechanism was chosen
300     because it is very simple to retrofit existing modules with it, and it
301    
302     The design goals for this mechanism were to be simple to use, very
303     efficient when not needed, low code and data size overhead and broad
304     applicability.
305    
306     =back
307    
308     =head1 AUTHOR
309    
310     Marc A. Lehmann <perlmulticore@schmorp.de>
311    
312     =head1 LICENSE
313    
314     The F<perlmulticore.h> is put into the public domain. Where this is legally
315     not possible, or at your option, it can be licensed under creativecommons
316     CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
317    
318     =cut
319     */
320    
321     struct perl_multicore_api
322     {
323     void (*pmapi_release)(void);
324     void (*pmapi_acquire)(void);
325     };
326    
327     static void perl_multicore_init (void);
328    
329     const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
330    
331     static struct perl_multicore_api *perl_multicore_api
332     = (struct perl_multicore_api *)&perl_multicore_api_init;
333    
334     #define perlinterp_release() perl_multicore_api->pmapi_release ()
335     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
336    
337     static void
338     perl_multicore_nop (void)
339     {
340     }
341    
342     static void
343     perl_multicore_init (void)
344     {
345     dTHX;
346    
347     /* check for existing API struct in PL_modglobal */
348     SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
349    
350     if (SvPOKp (*api_svp))
351     perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
352     else
353     {
354     /* create a new one with a dummy nop implementation */
355     SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
356     SvCUR_set (api_sv, sizeof (*perl_multicore_api));
357     SvPOK_only (api_sv);
358     perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
359     perl_multicore_api->pmapi_release =
360     perl_multicore_api->pmapi_acquire = perl_multicore_nop;
361     *api_svp = api_sv;
362     }
363    
364     /* call the real (or dummy) implementation now */
365     perlinterp_release ();
366     }
367    
368     #endif