ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Compress-LZF/perlmulticore.h
Revision: 1.2
Committed: Sun Jun 28 17:40:38 2015 UTC (8 years, 10 months ago) by root
Content type: text/plain
Branch: MAIN
Changes since 1.1: +2 -1 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 /*
2 * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
3 * License: public domain, or where this is not possible/at your option,
4 * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
5 */
6
7 #ifndef PERL_MULTICORE_H
8 #define PERL_MULTICORE_H
9
10 /*
11
12 =head1 NAME
13
14 perlmulticore.h - the Perl Multicore Specification and Implementation
15
16 =head1 SYNOPSIS
17
18 #include "perlmultiore.h"
19
20 // in your XS function:
21
22 perlinterp_release ();
23 do_the_C_thing ();
24 perlinterp_acquire ();
25
26 =head1 DESCRIPTION
27
28 This header file implements a simple mechanism for XS modules to allow
29 re-use of the perl interpreter for other threads while doing some lengthy
30 operation, such as cryptography, SQL queries, disk I/O and so on.
31
32 The design goals for this mechanism were to be simple to use, very
33 efficient when not needed, low code and data size overhead and broad
34 applicability.
35
36 The newest version of this document can be found at
37 L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
38
39 The nwest version of the header fgile itself, which
40 includes this documentation, can be downloaded from
41 L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
42
43 =head1 HOW DO I USE THIS IN MY MODULES?
44
45 The usage is very simple - you include this header file in your XS module. Then, before you
46 do your lengthy operation, you release the perl interpreter:
47
48 perlinterp_release ();
49
50 And when you are done with your computation, you acquire it again:
51
52 perlinterp_acquire ();
53
54 And that's it. This doesn't load any modules and consists of only a few
55 machine instructions when no module to take advantage of it is loaded.
56
57 Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
58 perl's built-in C<flock>, it allows other threads (for example, those
59 provided by L<Coro>) to execute, instead of blocking the whole perl
60 interpreter. For the sake of this example, it requires a file descriptor
61 instead of a handle.
62
63 #include "perlmulticore.h" // this header file
64
65 // and in the XS portion
66 int flock (int fd, int operation)
67 CODE:
68 perlinterp_release ();
69 RETVAL = flock (fd, operation);
70 perlinterp_acquire ();
71 OUTPUT:
72 RETVAL
73
74 Another example would be to modify L<DBD::mysql> to allow other
75 threads to execute while executing SQL queries. One way to do this
76 is find all C<mysql_st_internal_execute> and similar calls (such as
77 C<mysql_st_internal_execute41>), and adorn them with release/acquire
78 calls:
79
80 {
81 perlinterp_release ();
82 imp_sth->row_num= mysql_st_internal_execute(sth, ...);
83 perlinterp_acquire ();
84 }
85
86 =head2 HOW ABOUT NOT-SO LONG WORK?
87
88 Sometimes you don't know how long your code will take - in a compression
89 library for example, compressing a few hundred Kilobyte of data can take
90 a while, while 50 Bytes will compress so fast that even attempting to do
91 something else could be more costly than just doing it.
92
93 This is a very hard problem to solve. The best you can do at the moment is
94 to release the perl interpreter only when you think the work to be done
95 justifies the expense.
96
97 As a rule of thumb, if you expect to need more than a few thousand cycles,
98 you should release the interpreter, else you shouldn't. When in doubt,
99 release.
100
101 For example, in a compression library, you might want to do this:
102
103 if (bytes_to_be_compressed > 2000) perlinterp_release ();
104 do_compress (...);
105 if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
106
107 Make sure the if conditions are exactly the same and don't change, so you
108 always call acquire when you release, and vice versa.
109
110 When you don't have a handy indicator, you might still do something
111 useful. For example, if you do some file locking with C<fcntl> and you
112 expect the lock to be available immediately in most cases, you could try
113 with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
114 the lock couldn't be set:
115
116 int res = fcntl (fd, F_SETLK, &flock);
117
118 if (res)
119 {
120 // error, assume lock is held by another process and do it the slow way
121 perlinterp_release ();
122 res = fcntl (fd, F_SETLKW, &flock);
123 perlinterp_release ();
124 }
125
126 =head1 THE HARD AND FAST RULES
127
128 As with everything, there are a number of rules to follow.
129
130 =over 4
131
132 =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
133
134 Possibly the most important rule of them all, anything perl is
135 completely off-limits after C<perlinterp_release>, until you call
136 C<perlinterp_acquire>, after which you can access perl stuff again.
137
138 That includes anything in the perl interpreter that you didn't prove to be
139 safe, and didn't prove to be safe in older and future versions of perl:
140 global variables, local perl scalars, even if you are sure nobody accesses
141 them and you only try to "read" their value, and so on.
142
143 If you need to access perl things, do it before releasing the
144 interpreter with C<perlinterp_release>, or after acquiring it again with
145 C<perlinterp_acquire>.
146
147 =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
148
149 For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
150 call. They don't have to be in the same function, and you can have
151 multiple calls to them, as long as every C<perlinterp_release> call is
152 followed by exactly one C<perlinterp_acquire> call.
153
154 For example., this would be fine:
155
156 perlinterp_release ();
157
158 if (!function_that_fails_with_0_return_value ())
159 {
160 perlinterp_acquire ();
161 croak ("error");
162 // croak doesn't return
163 }
164
165 perlinterp_acquire ();
166 // do other stuff
167
168 =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
169
170 That simply means that after calling C<perlinterp_release>, you must
171 call C<perlinterp_acquire> before calling C<perlinterp_release>
172 again. Likewise, after C<perlinterp_acquire>, you can call
173 C<perlinterp_release> but not another C<perlinterp_acquire>.
174
175 =item I<Always> call C<perlinterp_release> first.
176
177 Also simple: you I<must not> call C<perlinterp_acquire> without having
178 called C<perlinterp_release> before.
179
180 =item I<Never> underestimate threads.
181
182 While it's easy to add parallel execution ability to your XS module, it
183 doesn't mean it is safe. After you release the perl interpreter, it's
184 perfectly possible that it will call your XS function in another thread,
185 even while your original function still executes. In other words: your C
186 code must be thread safe, and if you use any library, that library must be
187 thread-safe, too.
188
189 Always assume that the code between C<perlinterp_release> and
190 C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
191 time. If your code can't cope with that, you could consider using a mutex
192 to only allow one such execution, which is still better than blocking
193 everybody else from doing anything:
194
195 static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
196
197 perlinterp_release ();
198 pthread_mutex_lock (&my_mutex);
199 do_your_non_thread_safe_thing ();
200 pthread_mutex_unlock (&my_mutex);
201 perlinterp_acquire ();
202
203 This isn't as trivial as it looks though, as you need to find out which
204 threading system is in use (with L<Coro::Multicore>, it currently is
205 always pthreads).
206
207 =item I<Don't> get confused by having to release first.
208
209 In many real world scenarios, you acquire a resource, do something, then
210 release it again. Don't let this confuse you, with this, you already own
211 the resource (the perl interpreter) so you have to I<release> first, and
212 I<acquire> it again later, not the other way around.
213
214 =back
215
216
217 =head1 DESIGN PRINCIPLES
218
219 This section discusses how the design goals were reached (you be the
220 judge), how it is implemented, and what overheads this implies.
221
222 =over 4
223
224 =item Simple to Use
225
226 All you have to do is identify the place in your existing code where you
227 stop touching perl stuff, do your actual work, and start touching perl
228 stuff again.
229
230 Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
231 actual work code.
232
233 You have to include F<perlmulticore.h> and distribute it with your XS
234 code, but all these things border on the trivial.
235
236 =item Very Efficient
237
238 The definition for C<perlinterp_release> and C<perlinterp_release> is very
239 short:
240
241 #define perlinterp_release() perl_multicore_api->pmapi_release ()
242 #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
243
244 Both are macros that read a pointer from memory (perl_multicore_api),
245 dereference a function pointer stored at that place, and call the
246 function, which takes no arguments and returns nothing.
247
248 The first call to C<perlinterp_release> will check for the presence
249 of any supporting module, and if none is loaded, will create a dummy
250 implementation where both C<pmapi_release> and C<pmapi_acquire> execute
251 this function:
252
253 static void perl_multicore_nop (void) { }
254
255 So in the case of no magical module being loaded, all calls except the
256 first are two memory accesses and a predictable function call of an empty
257 function.
258
259 Of course, the overhead is much higher when these functions actually
260 implement anything useful, but you always get what you pay for.
261
262 With L<Coro::Multicore>, every release/acquire involves two pthread
263 switches, two coro thread switches, a bunch of syscalls, and sometimes
264 interacting with the event loop.
265
266 A dedicated thread pool such as the one L<IO::AIO> uses could reduce
267 these overheads, and would also reduce the dependencies (L<AnyEvent> is a
268 smaller and more portable dependency than L<Coro>), but it would require a
269 lot more work on the side of the module author wanting to support it than
270 this solution.
271
272 =item Low Code and Data Size Overhead
273
274 On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
275 pointer) of your data segment, to store the C<perl_multicore_api>
276 pointer. In addition it creates a C<16> octet perl string to store the
277 function pointers in, and stores it in a hash provided by perl for this
278 purpose.
279
280 This is pretty much the equivalent of executing this code:
281
282 $existing_hash{perl_multicore_api} = "123456781234567812345678";
283
284 And that's it, which is, as I think, indeed very little.
285
286 As for code size, on my amd64 system, every call to C<perlinterp_release>
287 or C<perlinterp_acquire> results in a variation of the following 9-10
288 octet sequence:
289
290 150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
291 157> callq *0x8(%rax)
292
293 The biggest part if the initialisation code, which consists of 11 lines of
294 typical XS code. On my system, all the code in F<perlmulticore.h> compiles
295 to less than 160 octets of read-only data.
296
297 =item Broad Applicability
298
299 While there are alternative ways to achieve the goal of parallel execution
300 with threads that might be more efficient, this mechanism was chosen
301 because it is very simple to retrofit existing modules with it, and it
302
303 The design goals for this mechanism were to be simple to use, very
304 efficient when not needed, low code and data size overhead and broad
305 applicability.
306
307 =back
308
309 =head1 AUTHOR
310
311 Marc A. Lehmann <perlmulticore@schmorp.de>
312
313 =head1 LICENSE
314
315 The F<perlmulticore.h> is put into the public domain. Where this is legally
316 not possible, or at your option, it can be licensed under creativecommons
317 CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
318
319 =cut
320 */
321
322 struct perl_multicore_api
323 {
324 void (*pmapi_release)(void);
325 void (*pmapi_acquire)(void);
326 };
327
328 static void perl_multicore_init (void);
329
330 const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
331
332 static struct perl_multicore_api *perl_multicore_api
333 = (struct perl_multicore_api *)&perl_multicore_api_init;
334
335 #define perlinterp_release() perl_multicore_api->pmapi_release ()
336 #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
337
338 static void
339 perl_multicore_nop (void)
340 {
341 }
342
343 static void
344 perl_multicore_init (void)
345 {
346 dTHX;
347
348 /* check for existing API struct in PL_modglobal */
349 SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
350
351 if (SvPOKp (*api_svp))
352 perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
353 else
354 {
355 /* create a new one with a dummy nop implementation */
356 SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
357 SvCUR_set (api_sv, sizeof (*perl_multicore_api));
358 SvPOK_only (api_sv);
359 perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
360 perl_multicore_api->pmapi_release =
361 perl_multicore_api->pmapi_acquire = perl_multicore_nop;
362 *api_svp = api_sv;
363 }
364
365 /* call the real (or dummy) implementation now */
366 perlinterp_release ();
367 }
368
369 #endif