ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Compress-LZF/perlmulticore.h
Revision: 1.1
Committed: Sat Jun 27 19:53:44 2015 UTC (8 years, 10 months ago) by root
Content type: text/plain
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# Content
1 /*
2 * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
3 * License: public domain, or where this is not possible/at your option,
4 * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
5 */
6
7 #ifndef PERL_MULTICORE_H
8 #define PERL_MULTICORE_H
9
10 /*
11 =head1 NAME
12
13 perlmulticore.h - release the perl interpreter for other uses while doing hard work
14
15 =head1 SYNOPSIS
16
17 #include "perlmultiore.h"
18
19 // in your XS function:
20
21 perlinterp_release ();
22 do_the_C_thing ();
23 perlinterp_acquire ();
24
25 =head1 DESCRIPTION
26
27 This header file implements a simple mechanism for XS modules to allow
28 re-use of the perl interpreter for other threads while doing some lengthy
29 operation, such as cryptography, SQL queries, disk I/O and so on.
30
31 The design goals for this mechanism were to be simple to use, very
32 efficient when not needed, low code and data size overhead and broad
33 applicability.
34
35 The newest version of this document can be found at
36 L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
37
38 The nwest version of the header fgile itself, which
39 includes this documentation, can be downloaded from
40 L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
41
42 =head1 HOW DO I USE THIS IN MY MODULES?
43
44 The usage is very simple - you include this header file in your XS module. Then, before you
45 do your lengthy operation, you release the perl interpreter:
46
47 perlinterp_release ();
48
49 And when you are done with your computation, you acquire it again:
50
51 perlinterp_acquire ();
52
53 And that's it. This doesn't load any modules and consists of only a few
54 machine instructions when no module to take advantage of it is loaded.
55
56 Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
57 perl's built-in C<flock>, it allows other threads (for example, those
58 provided by L<Coro>) to execute, instead of blocking the whole perl
59 interpreter. For the sake of this example, it requires a file descriptor
60 instead of a handle.
61
62 #include "perlmulticore.h" // this header file
63
64 // and in the XS portion
65 int flock (int fd, int operation)
66 CODE:
67 perlinterp_release ();
68 RETVAL = flock (fd, operation);
69 perlinterp_acquire ();
70 OUTPUT:
71 RETVAL
72
73 Another example would be to modify L<DBD::mysql> to allow other
74 threads to execute while executing SQL queries. One way to do this
75 is find all C<mysql_st_internal_execute> and similar calls (such as
76 C<mysql_st_internal_execute41>), and adorn them with release/acquire
77 calls:
78
79 {
80 perlinterp_release ();
81 imp_sth->row_num= mysql_st_internal_execute(sth, ...);
82 perlinterp_acquire ();
83 }
84
85 =head2 HOW ABOUT NOT-SO LONG WORK?
86
87 Sometimes you don't know how long your code will take - in a compression
88 library for example, compressing a few hundred Kilobyte of data can take
89 a while, while 50 Bytes will compress so fast that even attempting to do
90 something else could be more costly than just doing it.
91
92 This is a very hard problem to solve. The best you can do at the moment is
93 to release the perl interpreter only when you think the work to be done
94 justifies the expense.
95
96 As a rule of thumb, if you expect to need more than a few thousand cycles,
97 you should release the interpreter, else you shouldn't. When in doubt,
98 release.
99
100 For example, in a compression library, you might want to do this:
101
102 if (bytes_to_be_compressed > 2000) perlinterp_release ();
103 do_compress (...);
104 if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
105
106 Make sure the if conditions are exactly the same and don't change, so you
107 always call acquire when you release, and vice versa.
108
109 When you don't have a handy indicator, you might still do something
110 useful. For example, if you do some file locking with C<fcntl> and you
111 expect the lock to be available immediately in most cases, you could try
112 with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
113 the lock couldn't be set:
114
115 int res = fcntl (fd, F_SETLK, &flock);
116
117 if (res)
118 {
119 // error, assume lock is held by another process and do it the slow way
120 perlinterp_release ();
121 res = fcntl (fd, F_SETLKW, &flock);
122 perlinterp_release ();
123 }
124
125 =head1 THE HARD AND FAST RULES
126
127 As with everything, there are a number of rules to follow.
128
129 =over 4
130
131 =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
132
133 Possibly the most important rule of them all, anything perl is
134 completely off-limits after C<perlinterp_release>, until you call
135 C<perlinterp_acquire>, after which you can access perl stuff again.
136
137 That includes anything in the perl interpreter that you didn't prove to be
138 safe, and didn't prove to be safe in older and future versions of perl:
139 global variables, local perl scalars, even if you are sure nobody accesses
140 them and you only try to "read" their value, and so on.
141
142 If you need to access perl things, do it before releasing the
143 interpreter with C<perlinterp_release>, or after acquiring it again with
144 C<perlinterp_acquire>.
145
146 =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
147
148 For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
149 call. They don't have to be in the same function, and you can have
150 multiple calls to them, as long as every C<perlinterp_release> call is
151 followed by exactly one C<perlinterp_acquire> call.
152
153 For example., this would be fine:
154
155 perlinterp_release ();
156
157 if (!function_that_fails_with_0_return_value ())
158 {
159 perlinterp_acquire ();
160 croak ("error");
161 // croak doesn't return
162 }
163
164 perlinterp_acquire ();
165 // do other stuff
166
167 =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
168
169 That simply means that after calling C<perlinterp_release>, you must
170 call C<perlinterp_acquire> before calling C<perlinterp_release>
171 again. Likewise, after C<perlinterp_acquire>, you can call
172 C<perlinterp_release> but not another C<perlinterp_acquire>.
173
174 =item I<Always> call C<perlinterp_release> first.
175
176 Also simple: you I<must not> call C<perlinterp_acquire> without having
177 called C<perlinterp_release> before.
178
179 =item I<Never> underestimate threads.
180
181 While it's easy to add parallel execution ability to your XS module, it
182 doesn't mean it is safe. After you release the perl interpreter, it's
183 perfectly possible that it will call your XS function in another thread,
184 even while your original function still executes. In other words: your C
185 code must be thread safe, and if you use any library, that library must be
186 thread-safe, too.
187
188 Always assume that the code between C<perlinterp_release> and
189 C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
190 time. If your code can't cope with that, you could consider using a mutex
191 to only allow one such execution, which is still better than blocking
192 everybody else from doing anything:
193
194 static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
195
196 perlinterp_release ();
197 pthread_mutex_lock (&my_mutex);
198 do_your_non_thread_safe_thing ();
199 pthread_mutex_unlock (&my_mutex);
200 perlinterp_acquire ();
201
202 This isn't as trivial as it looks though, as you need to find out which
203 threading system is in use (with L<Coro::Multicore>, it currently is
204 always pthreads).
205
206 =item I<Don't> get confused by having to release first.
207
208 In many real world scenarios, you acquire a resource, do something, then
209 release it again. Don't let this confuse you, with this, you already own
210 the resource (the perl interpreter) so you have to I<release> first, and
211 I<acquire> it again later, not the other way around.
212
213 =back
214
215
216 =head1 DESIGN PRINCIPLES
217
218 This section discusses how the design goals were reached (you be the
219 judge), how it is implemented, and what overheads this implies.
220
221 =over 4
222
223 =item Simple to Use
224
225 All you have to do is identify the place in your existing code where you
226 stop touching perl stuff, do your actual work, and start touching perl
227 stuff again.
228
229 Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
230 actual work code.
231
232 You have to include F<perlmulticore.h> and distribute it with your XS
233 code, but all these things border on the trivial.
234
235 =item Very Efficient
236
237 The definition for C<perlinterp_release> and C<perlinterp_release> is very
238 short:
239
240 #define perlinterp_release() perl_multicore_api->pmapi_release ()
241 #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
242
243 Both are macros that read a pointer from memory (perl_multicore_api),
244 dereference a function pointer stored at that place, and call the
245 function, which takes no arguments and returns nothing.
246
247 The first call to C<perlinterp_release> will check for the presence
248 of any supporting module, and if none is loaded, will create a dummy
249 implementation where both C<pmapi_release> and C<pmapi_acquire> execute
250 this function:
251
252 static void perl_multicore_nop (void) { }
253
254 So in the case of no magical module being loaded, all calls except the
255 first are two memory accesses and a predictable function call of an empty
256 function.
257
258 Of course, the overhead is much higher when these functions actually
259 implement anything useful, but you always get what you pay for.
260
261 With L<Coro::Multicore>, every release/acquire involves two pthread
262 switches, two coro thread switches, a bunch of syscalls, and sometimes
263 interacting with the event loop.
264
265 A dedicated thread pool such as the one L<IO::AIO> uses could reduce
266 these overheads, and would also reduce the dependencies (L<AnyEvent> is a
267 smaller and more portable dependency than L<Coro>), but it would require a
268 lot more work on the side of the module author wanting to support it than
269 this solution.
270
271 =item Low Code and Data Size Overhead
272
273 On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
274 pointer) of your data segment, to store the C<perl_multicore_api>
275 pointer. In addition it creates a C<16> octet perl string to store the
276 function pointers in, and stores it in a hash provided by perl for this
277 purpose.
278
279 This is pretty much the equivalent of executing this code:
280
281 $existing_hash{perl_multicore_api} = "123456781234567812345678";
282
283 And that's it, which is, as I think, indeed very little.
284
285 As for code size, on my amd64 system, every call to C<perlinterp_release>
286 or C<perlinterp_acquire> results in a variation of the following 9-10
287 octet sequence:
288
289 150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
290 157> callq *0x8(%rax)
291
292 The biggest part if the initialisation code, which consists of 11 lines of
293 typical XS code. On my system, all the code in F<perlmulticore.h> compiles
294 to less than 160 octets of read-only data.
295
296 =item Broad Applicability
297
298 While there are alternative ways to achieve the goal of parallel execution
299 with threads that might be more efficient, this mechanism was chosen
300 because it is very simple to retrofit existing modules with it, and it
301
302 The design goals for this mechanism were to be simple to use, very
303 efficient when not needed, low code and data size overhead and broad
304 applicability.
305
306 =back
307
308 =head1 AUTHOR
309
310 Marc A. Lehmann <perlmulticore@schmorp.de>
311
312 =head1 LICENSE
313
314 The F<perlmulticore.h> is put into the public domain. Where this is legally
315 not possible, or at your option, it can be licensed under creativecommons
316 CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
317
318 =cut
319 */
320
321 struct perl_multicore_api
322 {
323 void (*pmapi_release)(void);
324 void (*pmapi_acquire)(void);
325 };
326
327 static void perl_multicore_init (void);
328
329 const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
330
331 static struct perl_multicore_api *perl_multicore_api
332 = (struct perl_multicore_api *)&perl_multicore_api_init;
333
334 #define perlinterp_release() perl_multicore_api->pmapi_release ()
335 #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
336
337 static void
338 perl_multicore_nop (void)
339 {
340 }
341
342 static void
343 perl_multicore_init (void)
344 {
345 dTHX;
346
347 /* check for existing API struct in PL_modglobal */
348 SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
349
350 if (SvPOKp (*api_svp))
351 perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
352 else
353 {
354 /* create a new one with a dummy nop implementation */
355 SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
356 SvCUR_set (api_sv, sizeof (*perl_multicore_api));
357 SvPOK_only (api_sv);
358 perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
359 perl_multicore_api->pmapi_release =
360 perl_multicore_api->pmapi_acquire = perl_multicore_nop;
361 *api_svp = api_sv;
362 }
363
364 /* call the real (or dummy) implementation now */
365 perlinterp_release ();
366 }
367
368 #endif