ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Coro-Multicore/perlmulticore.h
Revision: 1.1
Committed: Sat Jun 27 19:32:15 2015 UTC (8 years, 11 months ago) by root
Content type: text/plain
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# Content
1 TODO: if () acquire example
2 /*
3 * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
4 * License: public domain, or where this is not possible/at your option,
5 * CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
6 */
7
8 #ifndef PERL_MULTICORE_H
9 #define PERL_MULTICORE_H
10
11 #if 0
12
13 =head1 NAME
14
15 perlmulticore.h - release the perl interpreter for other uses while doing hard work
16
17 =head1 SYNOPSIS
18
19 #include "perlmultiore.h"
20
21 // in your XS function:
22
23 perlinterp_release ();
24 do_the_C_thing ();
25 perlinterp_acquire ();
26
27 =head1 DESCRIPTION
28
29 This header file implements a simple mechanism for XS modules to allow
30 re-use of the perl interpreter for other threads while doing some lengthy
31 operation, such as cryptography, SQL queries, disk I/O and so on.
32
33 The design goals for this mechanism were to be simple to use, very
34 efficient when not needed, low code and data size overhead and broad
35 applicability.
36
37
38 =head1 HOW DO I USE THIS IN MY MODULES?
39
40 The suage is very simple - you include this header file in your XS module. Then, before you
41 do your lengthy operation, you release the perl interpreter:
42
43 perlinterp_release ();
44
45 And when you are done with your computation, you acquire it again:
46
47 perlinterp_acquire ();
48
49 And that's it. This doesn't load any modules and consists of only a few
50 machine instructions when no module tot ake advantage of it is loaded.
51
52 Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
53 perl's built-in C<flock>, it allows other threads (for example, those
54 provided by L<Coro>) to execute, instead of blocking the whole perl
55 interpreter. For the sake of this example, it requires a file descriptor
56 instead of a handle.
57
58 #include "perlmulticore.h" /* this header file */
59
60 // and in the XS portion
61 int flock (int fd, int operation)
62 CODE:
63 perlinterp_release ();
64 RETVAL = flock (fd, operation);
65 perlinterp_acquire ();
66 OUTPUT:
67 RETVAL
68
69 Another example would be to modify L<DBD::mysql> to allow other
70 threads to execute while executing SQL queries. One way to do this
71 is find all C<mysql_st_internal_execute> and similar calls (such as
72 C<mysql_st_internal_execute41>), and adorn them with release/acquire
73 calls:
74
75 {
76 perlinterp_release ();
77 imp_sth->row_num= mysql_st_internal_execute(sth, ...);
78 perlinterp_acquire ();
79 }
80
81 =head2 HOW ABOUT NOT-SO LONG WORK?
82
83 Sometimes you don't know how long your code will take - in a compression
84 library for example, compressing a few hundred Kilobyte of data can take
85 a while, while 50 Bytes will comptess so fast that even attempting to do
86 something else could be more costly than just doing it.
87
88 This is a very hard problem to solve. The best you can do at the moment is
89 to release the perl interpreter only when you think the work to be done
90 justifies the expense.
91
92 As a rule of thumb, if you expect to need more than a few thousand cycles,
93 you should release the interpreter, else you shouldn't. When in doubt,
94 release.
95
96 For example, in a compression library, you might want to do this:
97
98 if (bytes_to_be_compressed > 2000) perlinterp_release ();
99 do_compress (...);
100 if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
101
102 Make sure the if conditions are exactly the same and don't change, so you
103 always call acquire when you release, and vice versa.
104
105 When you don't have a handy indicator, you might still do something
106 useful. For example, if you do some file locking with C<fcntl> and you
107 expect the lock to be available immediatelly in most cases, you could try
108 with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
109 the lock couldn't be set:
110
111 int res = fcntl (fd, F_SETLK, &flock);
112
113 if (res)
114 {
115 // error, assume lock is held by another process and do it the slow way
116 perlinterp_release ();
117 res = fcntl (fd, F_SETLKW, &flock);
118 perlinterp_release ();
119 }
120
121 =head1 THE HARD AND FAST RULES
122
123 As with everything, there are a number of rules to follow.
124
125 =over 4
126
127 =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
128
129 Possibly the most important rule of them all, anything perl is
130 completely off-limits after C<perlinterp_release>, until you call
131 C<perlinterp_acquire>, after which you can access perl stuff again.
132
133 That includes anything in the perl interpreter that you didn't prove to be
134 safe, and didn't prove to be safe in older and future versions of perl:
135 global variables, local perl scalars, even if you are sure nobody accesses
136 them and you only try to "read" their value, and so on.
137
138 If you need to access perl things, do it before releasing the
139 interpreter with C<perlinterp_release>, or after acquiring it again with
140 C<perlinterp_acquire>.
141
142 =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
143
144 For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
145 call. They don't have to be in the same function, and you can have
146 multiple calls to them, as long as every C<perlinterp_release> call is
147 followed by exactly one C<perlinterp_acquire> call.
148
149 For example., this would be fine:
150
151 perlinterp_release ();
152
153 if (!function_that_fails_with_0_return_value ())
154 {
155 perlinterp_acquire ();
156 croak ("error");
157 }
158
159 perlinterp_acquire ();
160 // do other stuff
161
162 =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
163
164 That simply means that after calling C<perlinterp_release>, you must
165 call C<perlinterp_acquire> before calling C<perlinterp_release>
166 again. Likewise, after C<perlinterp_acquire>, you can call
167 C<perlinterp_release> but not another C<perlinterp_acquire>.
168
169 =item I<Always> call C<perlinterp_release> first.
170
171 Also simple: you I<must not> call C<perlinterp_acquire> without having
172 called C<perlinterp_release> before.
173
174 =item I<Never> underestimate threads.
175
176 While it's easy to add parallel execution ability to your XS module, it
177 doesn't mean it is safe. After you release the perl interpreter, it's
178 perfectly possible that it will call your XS function in another thread,
179 even while your original function still executes. In other words: your C
180 code must be thread safe, and if you use any library, that library must be
181 thread-safe, too.
182
183 Always assume that the code between C<perlinterp_release> and
184 C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
185 time. If your code can't cope with that, you could consider using a mutex
186 to only allow one such execution, which is sitll better than blocking
187 everybody else from doing anything:
188
189 static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
190
191 perlinterp_release ();
192 pthread_mutex_lock (&my_mutex);
193 do_your_non_thread_safe_thing ();
194 pthread_mutex_unlock (&my_mutex);
195 perlinterp_acquire ();
196
197 This isn't as trivial as it looks though, as you need to find out which
198 threading system is in use (with L<Coro::Multicore>, it currently is
199 always pthreads).
200
201 =item I<Don't> get confused by having to release first.
202
203 In many real world scenarios, you acquire a resource, do something, then
204 release it again. Don't let this confuse you, with this, you already own
205 the resource (the perl interpreter) so you have to I<release> first, and
206 I<acquire> it again later, not the other way around.
207
208 =back
209
210
211 =head1 DESIGN PRINCIPLES
212
213 This section discusses how the design goals were reached (you be the
214 judge), how it is implemented, and what overheads this implies.
215
216 =over 4
217
218 =item Simple to Use
219
220 All you have to do is identify the place in your existing code where you
221 stop touching perl stuff, do your actual work, and strat touching perl
222 stuff again.
223
224 Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
225 actual work code.
226
227 You have to include F<perlmulticore.h> and distribute it with your XS
228 code, but all these things border on the trivial.
229
230 =item Very Efficient
231
232 The definition for C<perlinterp_release> and C<perlinterp_release> is very
233 short:
234
235 #define perlinterp_release() perl_multicore_api->pmapi_release ()
236 #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
237
238 Both are macros that read a pointer from memory (perl_multicore_api),
239 dereference a function pointer stored at that place, and call the
240 function, which takes no arguments and returns nothing.
241
242 The first call to C<perlinterp_release> will check for the presence
243 of any supporting module, and if none is loaded, will create a dummy
244 implementation where both C<pmapi_release> and C<pmapi_acquire> execute
245 this function:
246
247 static void perl_multicore_nop (void) { }
248
249 So in the case of no magical module being loaded, all calls except the
250 first are two memory accesses and a predictable function call of an empty
251 function.
252
253 Of course, the overhead is much higher when these functions actually
254 implement anything useful, but you always get what you pay for.
255
256 =item Low Code and Data Size Overhead
257
258 On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
259 pointer) of your data segment, to store the C<perl_multicore_api>
260 pointer. In addition it creates a C<16> octet perl string to store the
261 function pointers in, and stores it in a hash provided by perl for this
262 purpose.
263
264 This is pretty much the equivalent of executing this code:
265
266 $existing_hash{perl_multicore_api} = "123456781234567812345678";
267
268 And that's it, which is, as I think, indeed very little.
269
270 As for code size, on my amd64 system, every call to C<perlinterp_release>
271 or C<perlinterp_acquire> results in a variation of the following 9-10
272 octet sequence:
273
274 150> mov 0x200f23(%rip),%rax # <perl_multicore_api>
275 157> callq *0x8(%rax)
276
277 amd64 code sure is bloated.
278
279 The biggest part if the initialisation code, which consists of 11 lines of
280 typical XS code. On my system, all the code in F<perlmulticore.h> compiles
281 to less than 160 octets of read-only data.
282
283 =item Broad Applicability
284
285 While there are alternative ways to achieve the goal of parallel execution
286 with threads that might be more efficient, this mechanism was chosen
287 because it is very simple to retrofit existing modules with it, and it
288
289 The design goals for this mechanism were to be simple to use, very
290 efficient when not needed, low code and data size overhead and broad
291 applicability.
292
293 =back
294
295 =head1 AUTHOR
296
297 Marc A. Lehmann <perlmulticore@schmorp.de>
298
299 =head1 LICENSE
300
301 The F<perlmulticore.h> is put into the public domain. Where this is legally
302 not possible, or at your option, it can be licensed under creativecommons
303 CC0 license: L<https://creativecommons.org/publicdomain/zero/1.0/>.
304
305 =cut
306
307 #endif
308
309 struct perl_multicore_api
310 {
311 void (*pmapi_release)(void);
312 void (*pmapi_acquire)(void);
313 };
314
315 static void perl_multicore_init (void);
316
317 const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
318
319 static struct perl_multicore_api *perl_multicore_api
320 = (struct perl_multicore_api *)&perl_multicore_api_init;
321
322 #define perlinterp_release() perl_multicore_api->pmapi_release ()
323 #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
324
325 static void
326 perl_multicore_nop (void)
327 {
328 }
329
330 static void
331 perl_multicore_init (void)
332 {
333 dTHX;
334
335 /* check for existing API struct in PL_modglobal */
336 SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
337
338 if (SvPOKp (*api_svp))
339 perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
340 else
341 {
342 /* create a new one with a dummy nop implementation */
343 SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
344 SvCUR_set (api_sv, sizeof (*perl_multicore_api));
345 SvPOK_only (api_sv);
346 perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
347 perl_multicore_api->pmapi_release =
348 perl_multicore_api->pmapi_acquire = perl_multicore_nop;
349 *api_svp = api_sv;
350 }
351
352 /* call the real (or dummy) implementation now */
353 perlinterp_release ();
354 }
355
356 #endif