1 | /* |
1 | /* |
2 | * Author: Marc A. Lehmann <xsthreadpool@schmorp.de> |
2 | * Author: Marc A. Lehmann <xsthreadpool@schmorp.de> |
3 | * License: public domain, or where this is not possible/at your option, |
3 | * License: public domain, or where this is not possible/at your option, |
4 | * CC0 (https://creativecommons.org/publicdomain/zero/1.0/) |
4 | * CC0 (https://creativecommons.org/publicdomain/zero/1.0/) |
|
|
5 | * |
|
|
6 | * Full documentation can be found at http://perlmulticore.schmorp.de/ |
|
|
7 | * The newest version of this header can be downloaded from |
|
|
8 | * http://perlmulticore.schmorp.de/perlmulticore.h |
5 | */ |
9 | */ |
6 | |
10 | |
7 | #ifndef PERL_MULTICORE_H |
11 | #ifndef PERL_MULTICORE_H |
8 | #define PERL_MULTICORE_H |
12 | #define PERL_MULTICORE_H |
9 | |
13 | |
10 | /* |
14 | /* |
11 | |
15 | |
12 | =head1 NAME |
16 | =head1 NAME |
13 | |
17 | |
14 | perlmulticore.h - the Perl Multicore Specification and Implementation |
18 | perlmulticore.h - implements the Perl Multicore Specification |
15 | |
19 | |
16 | =head1 SYNOPSIS |
20 | =head1 SYNOPSIS |
17 | |
21 | |
18 | #include "perlmultiore.h" |
22 | #include "perlmulticore.h" |
19 | |
23 | |
20 | // in your XS function: |
24 | // in your XS function: |
21 | |
25 | |
22 | perlinterp_release (); |
26 | perlinterp_release (); |
23 | do_the_C_thing (); |
27 | do_the_C_thing (); |
24 | perlinterp_acquire (); |
28 | perlinterp_acquire (); |
25 | |
29 | |
26 | =head1 DESCRIPTION |
30 | =head1 DESCRIPTION |
27 | |
31 | |
28 | This header file implements a simple mechanism for XS modules to allow |
32 | This documentation is the abridged version of the full documention at |
29 | re-use of the perl interpreter for other threads while doing some lengthy |
33 | L<http://perlmulticore.schmorp.de/>. It's recommended to go there instead |
30 | operation, such as cryptography, SQL queries, disk I/O and so on. |
34 | of reading this document. |
31 | |
35 | |
32 | The design goals for this mechanism were to be simple to use, very |
36 | This header file implements a very low overhead (both in code and runtime) |
33 | efficient when not needed, low code and data size overhead and broad |
37 | mechanism for XS modules to allow re-use of the perl interpreter for other |
34 | applicability. |
38 | threads while doing some lengthy operation, such as cryptography, SQL |
|
|
39 | queries, disk I/O and so on. |
35 | |
40 | |
36 | The newest version of this document can be found at |
|
|
37 | L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>. |
|
|
38 | |
|
|
39 | The newest version of the header file itself, which |
41 | The newest version of the header file itself, can be downloaded from |
40 | includes this documentation, can be downloaded from |
42 | L<http://perlmulticore.schmorp.de/perlmulticore.h>. |
41 | L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>. |
|
|
42 | |
43 | |
43 | =head1 HOW DO I USE THIS IN MY MODULES? |
44 | =head1 HOW DO I USE THIS IN MY MODULES? |
44 | |
45 | |
45 | The usage is very simple - you include this header file in your XS module. Then, before you |
46 | The usage is very simple - you include this header file in your XS module. Then, before you |
46 | do your lengthy operation, you release the perl interpreter: |
47 | do your lengthy operation, you release the perl interpreter: |
… | |
… | |
52 | perlinterp_acquire (); |
53 | perlinterp_acquire (); |
53 | |
54 | |
54 | And that's it. This doesn't load any modules and consists of only a few |
55 | And that's it. This doesn't load any modules and consists of only a few |
55 | machine instructions when no module to take advantage of it is loaded. |
56 | machine instructions when no module to take advantage of it is loaded. |
56 | |
57 | |
57 | Here is a simple example, an C<flock> wrapper implemented in XS. Unlike |
58 | More documentation and examples can be found at the perl multicore site at |
58 | perl's built-in C<flock>, it allows other threads (for example, those |
59 | L<http://perlmulticore.schmorp.de>. |
59 | provided by L<Coro>) to execute, instead of blocking the whole perl |
|
|
60 | interpreter. For the sake of this example, it requires a file descriptor |
|
|
61 | instead of a handle. |
|
|
62 | |
|
|
63 | #include "perlmulticore.h" // this header file |
|
|
64 | |
|
|
65 | // and in the XS portion |
|
|
66 | int flock (int fd, int operation) |
|
|
67 | CODE: |
|
|
68 | perlinterp_release (); |
|
|
69 | RETVAL = flock (fd, operation); |
|
|
70 | perlinterp_acquire (); |
|
|
71 | OUTPUT: |
|
|
72 | RETVAL |
|
|
73 | |
|
|
74 | Another example would be to modify L<DBD::mysql> to allow other |
|
|
75 | threads to execute while executing SQL queries. One way to do this |
|
|
76 | is find all C<mysql_st_internal_execute> and similar calls (such as |
|
|
77 | C<mysql_st_internal_execute41>), and adorn them with release/acquire |
|
|
78 | calls: |
|
|
79 | |
|
|
80 | { |
|
|
81 | perlinterp_release (); |
|
|
82 | imp_sth->row_num= mysql_st_internal_execute(sth, ...); |
|
|
83 | perlinterp_acquire (); |
|
|
84 | } |
|
|
85 | |
|
|
86 | =head2 HOW ABOUT NOT-SO LONG WORK? |
|
|
87 | |
|
|
88 | Sometimes you don't know how long your code will take - in a compression |
|
|
89 | library for example, compressing a few hundred Kilobyte of data can take |
|
|
90 | a while, while 50 Bytes will compress so fast that even attempting to do |
|
|
91 | something else could be more costly than just doing it. |
|
|
92 | |
|
|
93 | This is a very hard problem to solve. The best you can do at the moment is |
|
|
94 | to release the perl interpreter only when you think the work to be done |
|
|
95 | justifies the expense. |
|
|
96 | |
|
|
97 | As a rule of thumb, if you expect to need more than a few thousand cycles, |
|
|
98 | you should release the interpreter, else you shouldn't. When in doubt, |
|
|
99 | release. |
|
|
100 | |
|
|
101 | For example, in a compression library, you might want to do this: |
|
|
102 | |
|
|
103 | if (bytes_to_be_compressed > 2000) perlinterp_release (); |
|
|
104 | do_compress (...); |
|
|
105 | if (bytes_to_be_compressed > 2000) perlinterp_acquire (); |
|
|
106 | |
|
|
107 | Make sure the if conditions are exactly the same and don't change, so you |
|
|
108 | always call acquire when you release, and vice versa. |
|
|
109 | |
|
|
110 | When you don't have a handy indicator, you might still do something |
|
|
111 | useful. For example, if you do some file locking with C<fcntl> and you |
|
|
112 | expect the lock to be available immediately in most cases, you could try |
|
|
113 | with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when |
|
|
114 | the lock couldn't be set: |
|
|
115 | |
|
|
116 | int res = fcntl (fd, F_SETLK, &flock); |
|
|
117 | |
|
|
118 | if (res) |
|
|
119 | { |
|
|
120 | // error, assume lock is held by another process and do it the slow way |
|
|
121 | perlinterp_release (); |
|
|
122 | res = fcntl (fd, F_SETLKW, &flock); |
|
|
123 | perlinterp_acquire (); |
|
|
124 | } |
|
|
125 | |
60 | |
126 | =head1 THE HARD AND FAST RULES |
61 | =head1 THE HARD AND FAST RULES |
127 | |
62 | |
128 | As with everything, there are a number of rules to follow. |
63 | As with everything, there are a number of rules to follow. |
129 | |
64 | |
130 | =over 4 |
65 | =over 4 |
131 | |
66 | |
132 | =item I<Never> touch any perl data structures after calling C<perlinterp_release>. |
67 | =item I<Never> touch any perl data structures after calling C<perlinterp_release>. |
133 | |
68 | |
134 | Possibly the most important rule of them all, anything perl is |
|
|
135 | completely off-limits after C<perlinterp_release>, until you call |
69 | Anything perl is completely off-limits after C<perlinterp_release>, until |
136 | C<perlinterp_acquire>, after which you can access perl stuff again. |
70 | you call C<perlinterp_acquire>, after which you can access perl stuff |
|
|
71 | again. |
137 | |
72 | |
138 | That includes anything in the perl interpreter that you didn't prove to be |
73 | That includes anything in the perl interpreter that you didn't prove to be |
139 | safe, and didn't prove to be safe in older and future versions of perl: |
74 | safe, and didn't prove to be safe in older and future versions of perl: |
140 | global variables, local perl scalars, even if you are sure nobody accesses |
75 | global variables, local perl scalars, even if you are sure nobody accesses |
141 | them and you only try to "read" their value, and so on. |
76 | them and you only try to "read" their value. |
142 | |
|
|
143 | If you need to access perl things, do it before releasing the |
|
|
144 | interpreter with C<perlinterp_release>, or after acquiring it again with |
|
|
145 | C<perlinterp_acquire>. |
|
|
146 | |
77 | |
147 | =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs. |
78 | =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs. |
148 | |
79 | |
149 | For each C<perlinterp_release> call there must be a C<perlinterp_acquire> |
80 | For each C<perlinterp_release> call there must be a C<perlinterp_acquire> |
150 | call. They don't have to be in the same function, and you can have |
81 | call. They don't have to be in the same function, and you can have |
151 | multiple calls to them, as long as every C<perlinterp_release> call is |
82 | multiple calls to them, as long as every C<perlinterp_release> call is |
152 | followed by exactly one C<perlinterp_acquire> call. |
83 | followed by exactly one C<perlinterp_acquire> call at runtime. |
153 | |
|
|
154 | For example., this would be fine: |
|
|
155 | |
|
|
156 | perlinterp_release (); |
|
|
157 | |
|
|
158 | if (!function_that_fails_with_0_return_value ()) |
|
|
159 | { |
|
|
160 | perlinterp_acquire (); |
|
|
161 | croak ("error"); |
|
|
162 | // croak doesn't return |
|
|
163 | } |
|
|
164 | |
|
|
165 | perlinterp_acquire (); |
|
|
166 | // do other stuff |
|
|
167 | |
84 | |
168 | =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>. |
85 | =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>. |
169 | |
86 | |
170 | That simply means that after calling C<perlinterp_release>, you must |
87 | That simply means that after calling C<perlinterp_release>, you must |
171 | call C<perlinterp_acquire> before calling C<perlinterp_release> |
88 | call C<perlinterp_acquire> before calling C<perlinterp_release> |
172 | again. Likewise, after C<perlinterp_acquire>, you can call |
89 | again. Likewise, after C<perlinterp_acquire>, you can call |
173 | C<perlinterp_release> but not another C<perlinterp_acquire>. |
90 | C<perlinterp_release> but not another C<perlinterp_acquire>. |
174 | |
91 | |
175 | =item I<Always> call C<perlinterp_release> first. |
92 | =item I<Always> call C<perlinterp_release> first. |
176 | |
93 | |
177 | Also simple: you I<must not> call C<perlinterp_acquire> without having |
94 | You I<must not> call C<perlinterp_acquire> without having called |
178 | called C<perlinterp_release> before. |
95 | C<perlinterp_release> before. |
179 | |
96 | |
180 | =item I<Never> underestimate threads. |
97 | =item I<Never> underestimate threads. |
181 | |
98 | |
182 | While it's easy to add parallel execution ability to your XS module, it |
99 | While it's easy to add parallel execution ability to your XS module, it |
183 | doesn't mean it is safe. After you release the perl interpreter, it's |
100 | doesn't mean it is safe. After you release the perl interpreter, it's |
… | |
… | |
186 | code must be thread safe, and if you use any library, that library must be |
103 | code must be thread safe, and if you use any library, that library must be |
187 | thread-safe, too. |
104 | thread-safe, too. |
188 | |
105 | |
189 | Always assume that the code between C<perlinterp_release> and |
106 | Always assume that the code between C<perlinterp_release> and |
190 | C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same |
107 | C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same |
191 | time. If your code can't cope with that, you could consider using a mutex |
108 | time. |
192 | to only allow one such execution, which is still better than blocking |
|
|
193 | everybody else from doing anything: |
|
|
194 | |
|
|
195 | static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER; |
|
|
196 | |
|
|
197 | perlinterp_release (); |
|
|
198 | pthread_mutex_lock (&my_mutex); |
|
|
199 | do_your_non_thread_safe_thing (); |
|
|
200 | pthread_mutex_unlock (&my_mutex); |
|
|
201 | perlinterp_acquire (); |
|
|
202 | |
|
|
203 | =item I<Don't> get confused by having to release first. |
|
|
204 | |
|
|
205 | In many real world scenarios, you acquire a resource, do something, then |
|
|
206 | release it again. Don't let this confuse you, with this, you already own |
|
|
207 | the resource (the perl interpreter) so you have to I<release> first, and |
|
|
208 | I<acquire> it again later, not the other way around. |
|
|
209 | |
|
|
210 | =back |
|
|
211 | |
|
|
212 | |
|
|
213 | =head1 DESIGN PRINCIPLES |
|
|
214 | |
|
|
215 | This section discusses how the design goals were reached (you be the |
|
|
216 | judge), how it is implemented, and what overheads this implies. |
|
|
217 | |
|
|
218 | =over 4 |
|
|
219 | |
|
|
220 | =item Simple to Use |
|
|
221 | |
|
|
222 | All you have to do is identify the place in your existing code where you |
|
|
223 | stop touching perl stuff, do your actual work, and start touching perl |
|
|
224 | stuff again. |
|
|
225 | |
|
|
226 | Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the |
|
|
227 | actual work code. |
|
|
228 | |
|
|
229 | You have to include F<perlmulticore.h> and distribute it with your XS |
|
|
230 | code, but all these things border on the trivial. |
|
|
231 | |
|
|
232 | =item Very Efficient |
|
|
233 | |
|
|
234 | The definition for C<perlinterp_release> and C<perlinterp_release> is very |
|
|
235 | short: |
|
|
236 | |
|
|
237 | #define perlinterp_release() perl_multicore_api->pmapi_release () |
|
|
238 | #define perlinterp_acquire() perl_multicore_api->pmapi_acquire () |
|
|
239 | |
|
|
240 | Both are macros that read a pointer from memory (perl_multicore_api), |
|
|
241 | dereference a function pointer stored at that place, and call the |
|
|
242 | function, which takes no arguments and returns nothing. |
|
|
243 | |
|
|
244 | The first call to C<perlinterp_release> will check for the presence |
|
|
245 | of any supporting module, and if none is loaded, will create a dummy |
|
|
246 | implementation where both C<pmapi_release> and C<pmapi_acquire> execute |
|
|
247 | this function: |
|
|
248 | |
|
|
249 | static void perl_multicore_nop (void) { } |
|
|
250 | |
|
|
251 | So in the case of no magical module being loaded, all calls except the |
|
|
252 | first are two memory accesses and a predictable function call of an empty |
|
|
253 | function. |
|
|
254 | |
|
|
255 | Of course, the overhead is much higher when these functions actually |
|
|
256 | implement anything useful, but you always get what you pay for. |
|
|
257 | |
|
|
258 | With L<Coro::Multicore>, every release/acquire involves two pthread |
|
|
259 | switches, two coro thread switches, a bunch of syscalls, and sometimes |
|
|
260 | interacting with the event loop. |
|
|
261 | |
|
|
262 | A dedicated thread pool such as the one L<IO::AIO> uses could reduce |
|
|
263 | these overheads, and would also reduce the dependencies (L<AnyEvent> is a |
|
|
264 | smaller and more portable dependency than L<Coro>), but it would require a |
|
|
265 | lot more work on the side of the module author wanting to support it than |
|
|
266 | this solution. |
|
|
267 | |
|
|
268 | =item Low Code and Data Size Overhead |
|
|
269 | |
|
|
270 | On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one |
|
|
271 | pointer) of your data segment, to store the C<perl_multicore_api> |
|
|
272 | pointer. In addition it creates a C<16> octet perl string to store the |
|
|
273 | function pointers in, and stores it in a hash provided by perl for this |
|
|
274 | purpose. |
|
|
275 | |
|
|
276 | This is pretty much the equivalent of executing this code: |
|
|
277 | |
|
|
278 | $existing_hash{perl_multicore_api} = "123456781234567812345678"; |
|
|
279 | |
|
|
280 | And that's it, which is, as I think, indeed very little. |
|
|
281 | |
|
|
282 | As for code size, on my amd64 system, every call to C<perlinterp_release> |
|
|
283 | or C<perlinterp_acquire> results in a variation of the following 9-10 |
|
|
284 | octet sequence: |
|
|
285 | |
|
|
286 | 150> mov 0x200f23(%rip),%rax # <perl_multicore_api> |
|
|
287 | 157> callq *0x8(%rax) |
|
|
288 | |
|
|
289 | The biggest part if the initialisation code, which consists of 11 lines of |
|
|
290 | typical XS code. On my system, all the code in F<perlmulticore.h> compiles |
|
|
291 | to less than 160 octets of read-only data. |
|
|
292 | |
|
|
293 | =item Broad Applicability |
|
|
294 | |
|
|
295 | While there are alternative ways to achieve the goal of parallel execution |
|
|
296 | with threads that might be more efficient, this mechanism was chosen |
|
|
297 | because it is very simple to retrofit existing modules with it, and it |
|
|
298 | |
|
|
299 | The design goals for this mechanism were to be simple to use, very |
|
|
300 | efficient when not needed, low code and data size overhead and broad |
|
|
301 | applicability. |
|
|
302 | |
109 | |
303 | =back |
110 | =back |
304 | |
111 | |
305 | |
112 | |
306 | =head1 DISABLING PERL MULTICORE AT COMPILE TIME |
113 | =head1 DISABLING PERL MULTICORE AT COMPILE TIME |
307 | |
114 | |
308 | You can disable the complete perl multicore API by defining the |
115 | You can disable the complete perl multicore API by defining the |
309 | symbol C<PERL_MULTICORE_DISABLE> to C<1> (e.g. by specifying |
116 | symbol C<PERL_MULTICORE_DISABLE> to C<1> (e.g. by specifying |
310 | F<-DPERL_MULTICORE_DISABLE> as compiler argument). |
117 | F<-DPERL_MULTICORE_DISABLE> as compiler argument). |
311 | |
|
|
312 | This will leave no traces of the API in the compiled code, suitable |
|
|
313 | "empty" C<perl_release> and C<perl_acquire> definitions will be provided. |
|
|
314 | |
118 | |
315 | This could be added to perl's C<CPPFLAGS> when configuring perl on |
119 | This could be added to perl's C<CPPFLAGS> when configuring perl on |
316 | platforms that do not support threading at all for example. |
120 | platforms that do not support threading at all for example. |
317 | |
121 | |
318 | |
122 | |
… | |
… | |
339 | |
143 | |
340 | #define perlinterp_release() do { } while (0) |
144 | #define perlinterp_release() do { } while (0) |
341 | #define perlinterp_acquire() do { } while (0) |
145 | #define perlinterp_acquire() do { } while (0) |
342 | |
146 | |
343 | #else |
147 | #else |
|
|
148 | |
|
|
149 | START_EXTERN_C |
344 | |
150 | |
345 | /* this struct is shared between all modules, and currently */ |
151 | /* this struct is shared between all modules, and currently */ |
346 | /* contain only the two function pointers for release/acquire */ |
152 | /* contain only the two function pointers for release/acquire */ |
347 | struct perl_multicore_api |
153 | struct perl_multicore_api |
348 | { |
154 | { |
… | |
… | |
350 | void (*pmapi_acquire)(void); |
156 | void (*pmapi_acquire)(void); |
351 | }; |
157 | }; |
352 | |
158 | |
353 | static void perl_multicore_init (void); |
159 | static void perl_multicore_init (void); |
354 | |
160 | |
355 | const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort }; |
161 | static const struct perl_multicore_api perl_multicore_api_init |
|
|
162 | = { perl_multicore_init, 0 }; |
356 | |
163 | |
357 | static struct perl_multicore_api *perl_multicore_api |
164 | static struct perl_multicore_api *perl_multicore_api |
358 | = (struct perl_multicore_api *)&perl_multicore_api_init; |
165 | = (struct perl_multicore_api *)&perl_multicore_api_init; |
359 | |
166 | |
360 | #define perlinterp_release() perl_multicore_api->pmapi_release () |
167 | #define perlinterp_release() perl_multicore_api->pmapi_release () |
… | |
… | |
363 | /* this is the release/acquire implementation used as fallback */ |
170 | /* this is the release/acquire implementation used as fallback */ |
364 | static void |
171 | static void |
365 | perl_multicore_nop (void) |
172 | perl_multicore_nop (void) |
366 | { |
173 | { |
367 | } |
174 | } |
|
|
175 | |
|
|
176 | static const char perl_multicore_api_key[] = "perl_multicore_api"; |
368 | |
177 | |
369 | /* this is the initial implementation of "release" - it initialises */ |
178 | /* this is the initial implementation of "release" - it initialises */ |
370 | /* the api and then calls the real release function */ |
179 | /* the api and then calls the real release function */ |
371 | static void |
180 | static void |
372 | perl_multicore_init (void) |
181 | perl_multicore_init (void) |
373 | { |
182 | { |
374 | dTHX; |
183 | dTHX; |
375 | |
184 | |
376 | /* check for existing API struct in PL_modglobal */ |
185 | /* check for existing API struct in PL_modglobal */ |
377 | SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1); |
186 | SV **api_svp = hv_fetch (PL_modglobal, perl_multicore_api_key, |
|
|
187 | sizeof (perl_multicore_api_key) - 1, 1); |
378 | |
188 | |
379 | if (SvPOKp (*api_svp)) |
189 | if (SvPOKp (*api_svp)) |
380 | perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */ |
190 | perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */ |
381 | else |
191 | else |
382 | { |
192 | { |
… | |
… | |
392 | |
202 | |
393 | /* call the real (or dummy) implementation now */ |
203 | /* call the real (or dummy) implementation now */ |
394 | perlinterp_release (); |
204 | perlinterp_release (); |
395 | } |
205 | } |
396 | |
206 | |
|
|
207 | END_EXTERN_C |
|
|
208 | |
397 | #endif |
209 | #endif |
398 | |
210 | |
399 | #endif |
211 | #endif |