1 | TODO: if () acquire example |
|
|
2 | /* |
1 | /* |
3 | * Author: Marc A. Lehmann <xsthreadpool@schmorp.de> |
2 | * Author: Marc A. Lehmann <xsthreadpool@schmorp.de> |
4 | * License: public domain, or where this is not possible/at your option, |
3 | * License: public domain, or where this is not possible/at your option, |
5 | * CC0 (https://creativecommons.org/publicdomain/zero/1.0/) |
4 | * CC0 (https://creativecommons.org/publicdomain/zero/1.0/) |
6 | */ |
5 | */ |
7 | |
6 | |
8 | #ifndef PERL_MULTICORE_H |
7 | #ifndef PERL_MULTICORE_H |
9 | #define PERL_MULTICORE_H |
8 | #define PERL_MULTICORE_H |
10 | |
9 | |
11 | #if 0 |
10 | /* |
12 | |
11 | |
13 | =head1 NAME |
12 | =head1 NAME |
14 | |
13 | |
15 | perlmulticore.h - release the perl interpreter for other uses while doing hard work |
14 | perlmulticore.h - the Perl Multicore Specification and Implementation |
16 | |
15 | |
17 | =head1 SYNOPSIS |
16 | =head1 SYNOPSIS |
18 | |
17 | |
19 | #include "perlmultiore.h" |
18 | #include "perlmultiore.h" |
20 | |
19 | |
… | |
… | |
32 | |
31 | |
33 | The design goals for this mechanism were to be simple to use, very |
32 | The design goals for this mechanism were to be simple to use, very |
34 | efficient when not needed, low code and data size overhead and broad |
33 | efficient when not needed, low code and data size overhead and broad |
35 | applicability. |
34 | applicability. |
36 | |
35 | |
|
|
36 | The newest version of this document can be found at |
|
|
37 | L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>. |
|
|
38 | |
|
|
39 | The newest version of the header file itself, which |
|
|
40 | includes this documentation, can be downloaded from |
|
|
41 | L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>. |
37 | |
42 | |
38 | =head1 HOW DO I USE THIS IN MY MODULES? |
43 | =head1 HOW DO I USE THIS IN MY MODULES? |
39 | |
44 | |
40 | The suage is very simple - you include this header file in your XS module. Then, before you |
45 | The usage is very simple - you include this header file in your XS module. Then, before you |
41 | do your lengthy operation, you release the perl interpreter: |
46 | do your lengthy operation, you release the perl interpreter: |
42 | |
47 | |
43 | perlinterp_release (); |
48 | perlinterp_release (); |
44 | |
49 | |
45 | And when you are done with your computation, you acquire it again: |
50 | And when you are done with your computation, you acquire it again: |
46 | |
51 | |
47 | perlinterp_acquire (); |
52 | perlinterp_acquire (); |
48 | |
53 | |
49 | And that's it. This doesn't load any modules and consists of only a few |
54 | And that's it. This doesn't load any modules and consists of only a few |
50 | machine instructions when no module tot ake advantage of it is loaded. |
55 | machine instructions when no module to take advantage of it is loaded. |
51 | |
56 | |
52 | Here is a simple example, an C<flock> wrapper implemented in XS. Unlike |
57 | Here is a simple example, an C<flock> wrapper implemented in XS. Unlike |
53 | perl's built-in C<flock>, it allows other threads (for example, those |
58 | perl's built-in C<flock>, it allows other threads (for example, those |
54 | provided by L<Coro>) to execute, instead of blocking the whole perl |
59 | provided by L<Coro>) to execute, instead of blocking the whole perl |
55 | interpreter. For the sake of this example, it requires a file descriptor |
60 | interpreter. For the sake of this example, it requires a file descriptor |
56 | instead of a handle. |
61 | instead of a handle. |
57 | |
62 | |
58 | #include "perlmulticore.h" /* this header file */ |
63 | #include "perlmulticore.h" // this header file |
59 | |
64 | |
60 | // and in the XS portion |
65 | // and in the XS portion |
61 | int flock (int fd, int operation) |
66 | int flock (int fd, int operation) |
62 | CODE: |
67 | CODE: |
63 | perlinterp_release (); |
68 | perlinterp_release (); |
… | |
… | |
80 | |
85 | |
81 | =head2 HOW ABOUT NOT-SO LONG WORK? |
86 | =head2 HOW ABOUT NOT-SO LONG WORK? |
82 | |
87 | |
83 | Sometimes you don't know how long your code will take - in a compression |
88 | Sometimes you don't know how long your code will take - in a compression |
84 | library for example, compressing a few hundred Kilobyte of data can take |
89 | library for example, compressing a few hundred Kilobyte of data can take |
85 | a while, while 50 Bytes will comptess so fast that even attempting to do |
90 | a while, while 50 Bytes will compress so fast that even attempting to do |
86 | something else could be more costly than just doing it. |
91 | something else could be more costly than just doing it. |
87 | |
92 | |
88 | This is a very hard problem to solve. The best you can do at the moment is |
93 | This is a very hard problem to solve. The best you can do at the moment is |
89 | to release the perl interpreter only when you think the work to be done |
94 | to release the perl interpreter only when you think the work to be done |
90 | justifies the expense. |
95 | justifies the expense. |
… | |
… | |
102 | Make sure the if conditions are exactly the same and don't change, so you |
107 | Make sure the if conditions are exactly the same and don't change, so you |
103 | always call acquire when you release, and vice versa. |
108 | always call acquire when you release, and vice versa. |
104 | |
109 | |
105 | When you don't have a handy indicator, you might still do something |
110 | When you don't have a handy indicator, you might still do something |
106 | useful. For example, if you do some file locking with C<fcntl> and you |
111 | useful. For example, if you do some file locking with C<fcntl> and you |
107 | expect the lock to be available immediatelly in most cases, you could try |
112 | expect the lock to be available immediately in most cases, you could try |
108 | with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when |
113 | with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when |
109 | the lock couldn't be set: |
114 | the lock couldn't be set: |
110 | |
115 | |
111 | int res = fcntl (fd, F_SETLK, &flock); |
116 | int res = fcntl (fd, F_SETLK, &flock); |
112 | |
117 | |
113 | if (res) |
118 | if (res) |
114 | { |
119 | { |
115 | // error, assume lock is held by another process and do it the slow way |
120 | // error, assume lock is held by another process and do it the slow way |
116 | perlinterp_release (); |
121 | perlinterp_release (); |
117 | res = fcntl (fd, F_SETLKW, &flock); |
122 | res = fcntl (fd, F_SETLKW, &flock); |
118 | perlinterp_release (); |
123 | perlinterp_acquire (); |
119 | } |
124 | } |
120 | |
125 | |
121 | =head1 THE HARD AND FAST RULES |
126 | =head1 THE HARD AND FAST RULES |
122 | |
127 | |
123 | As with everything, there are a number of rules to follow. |
128 | As with everything, there are a number of rules to follow. |
… | |
… | |
152 | |
157 | |
153 | if (!function_that_fails_with_0_return_value ()) |
158 | if (!function_that_fails_with_0_return_value ()) |
154 | { |
159 | { |
155 | perlinterp_acquire (); |
160 | perlinterp_acquire (); |
156 | croak ("error"); |
161 | croak ("error"); |
|
|
162 | // croak doesn't return |
157 | } |
163 | } |
158 | |
164 | |
159 | perlinterp_acquire (); |
165 | perlinterp_acquire (); |
160 | // do other stuff |
166 | // do other stuff |
161 | |
167 | |
… | |
… | |
181 | thread-safe, too. |
187 | thread-safe, too. |
182 | |
188 | |
183 | Always assume that the code between C<perlinterp_release> and |
189 | Always assume that the code between C<perlinterp_release> and |
184 | C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same |
190 | C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same |
185 | time. If your code can't cope with that, you could consider using a mutex |
191 | time. If your code can't cope with that, you could consider using a mutex |
186 | to only allow one such execution, which is sitll better than blocking |
192 | to only allow one such execution, which is still better than blocking |
187 | everybody else from doing anything: |
193 | everybody else from doing anything: |
188 | |
194 | |
189 | static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER; |
195 | static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER; |
190 | |
196 | |
191 | perlinterp_release (); |
197 | perlinterp_release (); |
192 | pthread_mutex_lock (&my_mutex); |
198 | pthread_mutex_lock (&my_mutex); |
193 | do_your_non_thread_safe_thing (); |
199 | do_your_non_thread_safe_thing (); |
194 | pthread_mutex_unlock (&my_mutex); |
200 | pthread_mutex_unlock (&my_mutex); |
195 | perlinterp_acquire (); |
201 | perlinterp_acquire (); |
196 | |
202 | |
197 | This isn't as trivial as it looks though, as you need to find out which |
|
|
198 | threading system is in use (with L<Coro::Multicore>, it currently is |
|
|
199 | always pthreads). |
|
|
200 | |
|
|
201 | =item I<Don't> get confused by having to release first. |
203 | =item I<Don't> get confused by having to release first. |
202 | |
204 | |
203 | In many real world scenarios, you acquire a resource, do something, then |
205 | In many real world scenarios, you acquire a resource, do something, then |
204 | release it again. Don't let this confuse you, with this, you already own |
206 | release it again. Don't let this confuse you, with this, you already own |
205 | the resource (the perl interpreter) so you have to I<release> first, and |
207 | the resource (the perl interpreter) so you have to I<release> first, and |
… | |
… | |
216 | =over 4 |
218 | =over 4 |
217 | |
219 | |
218 | =item Simple to Use |
220 | =item Simple to Use |
219 | |
221 | |
220 | All you have to do is identify the place in your existing code where you |
222 | All you have to do is identify the place in your existing code where you |
221 | stop touching perl stuff, do your actual work, and strat touching perl |
223 | stop touching perl stuff, do your actual work, and start touching perl |
222 | stuff again. |
224 | stuff again. |
223 | |
225 | |
224 | Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the |
226 | Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the |
225 | actual work code. |
227 | actual work code. |
226 | |
228 | |
… | |
… | |
250 | first are two memory accesses and a predictable function call of an empty |
252 | first are two memory accesses and a predictable function call of an empty |
251 | function. |
253 | function. |
252 | |
254 | |
253 | Of course, the overhead is much higher when these functions actually |
255 | Of course, the overhead is much higher when these functions actually |
254 | implement anything useful, but you always get what you pay for. |
256 | implement anything useful, but you always get what you pay for. |
|
|
257 | |
|
|
258 | With L<Coro::Multicore>, every release/acquire involves two pthread |
|
|
259 | switches, two coro thread switches, a bunch of syscalls, and sometimes |
|
|
260 | interacting with the event loop. |
|
|
261 | |
|
|
262 | A dedicated thread pool such as the one L<IO::AIO> uses could reduce |
|
|
263 | these overheads, and would also reduce the dependencies (L<AnyEvent> is a |
|
|
264 | smaller and more portable dependency than L<Coro>), but it would require a |
|
|
265 | lot more work on the side of the module author wanting to support it than |
|
|
266 | this solution. |
255 | |
267 | |
256 | =item Low Code and Data Size Overhead |
268 | =item Low Code and Data Size Overhead |
257 | |
269 | |
258 | On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one |
270 | On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one |
259 | pointer) of your data segment, to store the C<perl_multicore_api> |
271 | pointer) of your data segment, to store the C<perl_multicore_api> |
… | |
… | |
272 | octet sequence: |
284 | octet sequence: |
273 | |
285 | |
274 | 150> mov 0x200f23(%rip),%rax # <perl_multicore_api> |
286 | 150> mov 0x200f23(%rip),%rax # <perl_multicore_api> |
275 | 157> callq *0x8(%rax) |
287 | 157> callq *0x8(%rax) |
276 | |
288 | |
277 | amd64 code sure is bloated. |
|
|
278 | |
|
|
279 | The biggest part if the initialisation code, which consists of 11 lines of |
289 | The biggest part if the initialisation code, which consists of 11 lines of |
280 | typical XS code. On my system, all the code in F<perlmulticore.h> compiles |
290 | typical XS code. On my system, all the code in F<perlmulticore.h> compiles |
281 | to less than 160 octets of read-only data. |
291 | to less than 160 octets of read-only data. |
282 | |
292 | |
283 | =item Broad Applicability |
293 | =item Broad Applicability |
… | |
… | |
293 | =back |
303 | =back |
294 | |
304 | |
295 | =head1 AUTHOR |
305 | =head1 AUTHOR |
296 | |
306 | |
297 | Marc A. Lehmann <perlmulticore@schmorp.de> |
307 | Marc A. Lehmann <perlmulticore@schmorp.de> |
|
|
308 | http://perlmulticore.schmorp.de/ |
298 | |
309 | |
299 | =head1 LICENSE |
310 | =head1 LICENSE |
300 | |
311 | |
301 | The F<perlmulticore.h> is put into the public domain. Where this is legally |
312 | The F<perlmulticore.h> header file is put into the public |
|
|
313 | domain. Where this is legally not possible, or at your |
302 | not possible, or at your option, it can be licensed under creativecommons |
314 | option, it can be licensed under creativecommons CC0 |
303 | CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>. |
315 | license: L<https://creativecommons.org/publicdomain/zero/1.0/>. |
304 | |
316 | |
305 | =cut |
317 | =cut |
306 | |
318 | |
307 | #endif |
319 | */ |
308 | |
320 | |
|
|
321 | /* this struct is shared between all modules, and currently */ |
|
|
322 | /* contain only the two function pointers for release/acquire */ |
309 | struct perl_multicore_api |
323 | struct perl_multicore_api |
310 | { |
324 | { |
311 | void (*pmapi_release)(void); |
325 | void (*pmapi_release)(void); |
312 | void (*pmapi_acquire)(void); |
326 | void (*pmapi_acquire)(void); |
313 | }; |
327 | }; |
… | |
… | |
320 | = (struct perl_multicore_api *)&perl_multicore_api_init; |
334 | = (struct perl_multicore_api *)&perl_multicore_api_init; |
321 | |
335 | |
322 | #define perlinterp_release() perl_multicore_api->pmapi_release () |
336 | #define perlinterp_release() perl_multicore_api->pmapi_release () |
323 | #define perlinterp_acquire() perl_multicore_api->pmapi_acquire () |
337 | #define perlinterp_acquire() perl_multicore_api->pmapi_acquire () |
324 | |
338 | |
|
|
339 | /* this is the release/acquire implementation used as fallback */ |
325 | static void |
340 | static void |
326 | perl_multicore_nop (void) |
341 | perl_multicore_nop (void) |
327 | { |
342 | { |
328 | } |
343 | } |
329 | |
344 | |
|
|
345 | /* this is the initial implementation of "release" - it initialises */ |
|
|
346 | /* the api and then calls the real release function */ |
330 | static void |
347 | static void |
331 | perl_multicore_init (void) |
348 | perl_multicore_init (void) |
332 | { |
349 | { |
333 | dTHX; |
350 | dTHX; |
334 | |
351 | |