ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Coro-Multicore/perlmulticore.h
Revision: 1.2
Committed: Sat Jun 27 19:36:39 2015 UTC (8 years, 11 months ago) by root
Content type: text/plain
Branch: MAIN
Changes since 1.1: +13 -8 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 TODO: if () acquire example
2     /*
3     * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
4     * License: public domain, or where this is not possible/at your option,
5     * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
6     */
7    
8     #ifndef PERL_MULTICORE_H
9     #define PERL_MULTICORE_H
10    
11     #if 0
12    
13     =head1 NAME
14    
15     perlmulticore.h - release the perl interpreter for other uses while doing hard work
16    
17     =head1 SYNOPSIS
18    
19     #include "perlmultiore.h"
20    
21     // in your XS function:
22    
23     perlinterp_release ();
24     do_the_C_thing ();
25     perlinterp_acquire ();
26    
27     =head1 DESCRIPTION
28    
29     This header file implements a simple mechanism for XS modules to allow
30     re-use of the perl interpreter for other threads while doing some lengthy
31     operation, such as cryptography, SQL queries, disk I/O and so on.
32    
33     The design goals for this mechanism were to be simple to use, very
34     efficient when not needed, low code and data size overhead and broad
35     applicability.
36    
37 root 1.2 The newest version of this document can be found at
38     L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
39    
40     The nwest version of the header fgile itself, which
41     includes this documentation, can be downloaded from
42     L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
43 root 1.1
44     =head1 HOW DO I USE THIS IN MY MODULES?
45    
46 root 1.2 The usage is very simple - you include this header file in your XS module. Then, before you
47 root 1.1 do your lengthy operation, you release the perl interpreter:
48    
49     perlinterp_release ();
50    
51     And when you are done with your computation, you acquire it again:
52    
53     perlinterp_acquire ();
54    
55     And that's it. This doesn't load any modules and consists of only a few
56 root 1.2 machine instructions when no module to take advantage of it is loaded.
57 root 1.1
58     Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
59     perl's built-in C<flock>, it allows other threads (for example, those
60     provided by L<Coro>) to execute, instead of blocking the whole perl
61     interpreter. For the sake of this example, it requires a file descriptor
62     instead of a handle.
63    
64     #include "perlmulticore.h" /* this header file */
65    
66     // and in the XS portion
67     int flock (int fd, int operation)
68     CODE:
69     perlinterp_release ();
70     RETVAL = flock (fd, operation);
71     perlinterp_acquire ();
72     OUTPUT:
73     RETVAL
74    
75     Another example would be to modify L<DBD::mysql> to allow other
76     threads to execute while executing SQL queries. One way to do this
77     is find all C<mysql_st_internal_execute> and similar calls (such as
78     C<mysql_st_internal_execute41>), and adorn them with release/acquire
79     calls:
80    
81     {
82     perlinterp_release ();
83     imp_sth->row_num= mysql_st_internal_execute(sth, ...);
84     perlinterp_acquire ();
85     }
86    
87     =head2 HOW ABOUT NOT-SO LONG WORK?
88    
89     Sometimes you don't know how long your code will take - in a compression
90     library for example, compressing a few hundred Kilobyte of data can take
91 root 1.2 a while, while 50 Bytes will compress so fast that even attempting to do
92 root 1.1 something else could be more costly than just doing it.
93    
94     This is a very hard problem to solve. The best you can do at the moment is
95     to release the perl interpreter only when you think the work to be done
96     justifies the expense.
97    
98     As a rule of thumb, if you expect to need more than a few thousand cycles,
99     you should release the interpreter, else you shouldn't. When in doubt,
100     release.
101    
102     For example, in a compression library, you might want to do this:
103    
104     if (bytes_to_be_compressed > 2000) perlinterp_release ();
105     do_compress (...);
106     if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
107    
108     Make sure the if conditions are exactly the same and don't change, so you
109     always call acquire when you release, and vice versa.
110    
111     When you don't have a handy indicator, you might still do something
112     useful. For example, if you do some file locking with C<fcntl> and you
113 root 1.2 expect the lock to be available immediately in most cases, you could try
114 root 1.1 with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
115     the lock couldn't be set:
116    
117     int res = fcntl (fd, F_SETLK, &flock);
118    
119     if (res)
120     {
121     // error, assume lock is held by another process and do it the slow way
122     perlinterp_release ();
123     res = fcntl (fd, F_SETLKW, &flock);
124     perlinterp_release ();
125     }
126    
127     =head1 THE HARD AND FAST RULES
128    
129     As with everything, there are a number of rules to follow.
130    
131     =over 4
132    
133     =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
134    
135     Possibly the most important rule of them all, anything perl is
136     completely off-limits after C<perlinterp_release>, until you call
137     C<perlinterp_acquire>, after which you can access perl stuff again.
138    
139     That includes anything in the perl interpreter that you didn't prove to be
140     safe, and didn't prove to be safe in older and future versions of perl:
141     global variables, local perl scalars, even if you are sure nobody accesses
142     them and you only try to "read" their value, and so on.
143    
144     If you need to access perl things, do it before releasing the
145     interpreter with C<perlinterp_release>, or after acquiring it again with
146     C<perlinterp_acquire>.
147    
148     =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
149    
150     For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
151     call. They don't have to be in the same function, and you can have
152     multiple calls to them, as long as every C<perlinterp_release> call is
153     followed by exactly one C<perlinterp_acquire> call.
154    
155     For example., this would be fine:
156    
157     perlinterp_release ();
158    
159     if (!function_that_fails_with_0_return_value ())
160     {
161     perlinterp_acquire ();
162     croak ("error");
163 root 1.2 // croak doesn't return
164 root 1.1 }
165    
166     perlinterp_acquire ();
167     // do other stuff
168    
169     =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
170    
171     That simply means that after calling C<perlinterp_release>, you must
172     call C<perlinterp_acquire> before calling C<perlinterp_release>
173     again. Likewise, after C<perlinterp_acquire>, you can call
174     C<perlinterp_release> but not another C<perlinterp_acquire>.
175    
176     =item I<Always> call C<perlinterp_release> first.
177    
178     Also simple: you I<must not> call C<perlinterp_acquire> without having
179     called C<perlinterp_release> before.
180    
181     =item I<Never> underestimate threads.
182    
183     While it's easy to add parallel execution ability to your XS module, it
184     doesn't mean it is safe. After you release the perl interpreter, it's
185     perfectly possible that it will call your XS function in another thread,
186     even while your original function still executes. In other words: your C
187     code must be thread safe, and if you use any library, that library must be
188     thread-safe, too.
189    
190     Always assume that the code between C<perlinterp_release> and
191     C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
192     time. If your code can't cope with that, you could consider using a mutex
193 root 1.2 to only allow one such execution, which is still better than blocking
194 root 1.1 everybody else from doing anything:
195    
196     static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
197    
198     perlinterp_release ();
199     pthread_mutex_lock (&my_mutex);
200     do_your_non_thread_safe_thing ();
201     pthread_mutex_unlock (&my_mutex);
202     perlinterp_acquire ();
203    
204     This isn't as trivial as it looks though, as you need to find out which
205     threading system is in use (with L<Coro::Multicore>, it currently is
206     always pthreads).
207    
208     =item I<Don't> get confused by having to release first.
209    
210     In many real world scenarios, you acquire a resource, do something, then
211     release it again. Don't let this confuse you, with this, you already own
212     the resource (the perl interpreter) so you have to I<release> first, and
213     I<acquire> it again later, not the other way around.
214    
215     =back
216    
217    
218     =head1 DESIGN PRINCIPLES
219    
220     This section discusses how the design goals were reached (you be the
221     judge), how it is implemented, and what overheads this implies.
222    
223     =over 4
224    
225     =item Simple to Use
226    
227     All you have to do is identify the place in your existing code where you
228 root 1.2 stop touching perl stuff, do your actual work, and start touching perl
229 root 1.1 stuff again.
230    
231     Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
232     actual work code.
233    
234     You have to include F<perlmulticore.h> and distribute it with your XS
235     code, but all these things border on the trivial.
236    
237     =item Very Efficient
238    
239     The definition for C<perlinterp_release> and C<perlinterp_release> is very
240     short:
241    
242     #define perlinterp_release() perl_multicore_api->pmapi_release ()
243     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
244    
245     Both are macros that read a pointer from memory (perl_multicore_api),
246     dereference a function pointer stored at that place, and call the
247     function, which takes no arguments and returns nothing.
248    
249     The first call to C<perlinterp_release> will check for the presence
250     of any supporting module, and if none is loaded, will create a dummy
251     implementation where both C<pmapi_release> and C<pmapi_acquire> execute
252     this function:
253    
254     static void perl_multicore_nop (void) { }
255    
256     So in the case of no magical module being loaded, all calls except the
257     first are two memory accesses and a predictable function call of an empty
258     function.
259    
260     Of course, the overhead is much higher when these functions actually
261     implement anything useful, but you always get what you pay for.
262    
263     =item Low Code and Data Size Overhead
264    
265     On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
266     pointer) of your data segment, to store the C<perl_multicore_api>
267     pointer. In addition it creates a C<16> octet perl string to store the
268     function pointers in, and stores it in a hash provided by perl for this
269     purpose.
270    
271     This is pretty much the equivalent of executing this code:
272    
273     $existing_hash{perl_multicore_api} = "123456781234567812345678";
274    
275     And that's it, which is, as I think, indeed very little.
276    
277     As for code size, on my amd64 system, every call to C<perlinterp_release>
278     or C<perlinterp_acquire> results in a variation of the following 9-10
279     octet sequence:
280    
281     150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
282     157> callq *0x8(%rax)
283    
284     The biggest part if the initialisation code, which consists of 11 lines of
285     typical XS code. On my system, all the code in F<perlmulticore.h> compiles
286     to less than 160 octets of read-only data.
287    
288     =item Broad Applicability
289    
290     While there are alternative ways to achieve the goal of parallel execution
291     with threads that might be more efficient, this mechanism was chosen
292     because it is very simple to retrofit existing modules with it, and it
293    
294     The design goals for this mechanism were to be simple to use, very
295     efficient when not needed, low code and data size overhead and broad
296     applicability.
297    
298     =back
299    
300     =head1 AUTHOR
301    
302     Marc A. Lehmann <perlmulticore@schmorp.de>
303    
304     =head1 LICENSE
305    
306     The F<perlmulticore.h> is put into the public domain. Where this is legally
307     not possible, or at your option, it can be licensed under creativecommons
308     CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
309    
310     =cut
311    
312     #endif
313    
314     struct perl_multicore_api
315     {
316     void (*pmapi_release)(void);
317     void (*pmapi_acquire)(void);
318     };
319    
320     static void perl_multicore_init (void);
321    
322     const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
323    
324     static struct perl_multicore_api *perl_multicore_api
325     = (struct perl_multicore_api *)&perl_multicore_api_init;
326    
327     #define perlinterp_release() perl_multicore_api->pmapi_release ()
328     #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
329    
330     static void
331     perl_multicore_nop (void)
332     {
333     }
334    
335     static void
336     perl_multicore_init (void)
337     {
338     dTHX;
339    
340     /* check for existing API struct in PL_modglobal */
341     SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
342    
343     if (SvPOKp (*api_svp))
344     perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
345     else
346     {
347     /* create a new one with a dummy nop implementation */
348     SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
349     SvCUR_set (api_sv, sizeof (*perl_multicore_api));
350     SvPOK_only (api_sv);
351     perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
352     perl_multicore_api->pmapi_release =
353     perl_multicore_api->pmapi_acquire = perl_multicore_nop;
354     *api_svp = api_sv;
355     }
356    
357     /* call the real (or dummy) implementation now */
358     perlinterp_release ();
359     }
360    
361     #endif