--- Compress-LZF/perlmulticore.h 2015/06/29 23:51:28 1.3 +++ Compress-LZF/perlmulticore.h 2019/03/03 11:33:55 1.4 @@ -2,6 +2,10 @@ * Author: Marc A. Lehmann * License: public domain, or where this is not possible/at your option, * CC0 (https://creativecommons.org/publicdomain/zero/1.0/) + * + * Full documentation can be found at http://perlmulticore.schmorp.de/ + * The newest version of this header can be downloaded from + * http://perlmulticore.schmorp.de/perlmulticore.h */ #ifndef PERL_MULTICORE_H @@ -11,11 +15,11 @@ =head1 NAME -perlmulticore.h - the Perl Multicore Specification and Implementation +perlmulticore.h - implements the Perl Multicore Specification =head1 SYNOPSIS - #include "perlmultiore.h" + #include "perlmulticore.h" // in your XS function: @@ -25,20 +29,17 @@ =head1 DESCRIPTION -This header file implements a simple mechanism for XS modules to allow -re-use of the perl interpreter for other threads while doing some lengthy -operation, such as cryptography, SQL queries, disk I/O and so on. - -The design goals for this mechanism were to be simple to use, very -efficient when not needed, low code and data size overhead and broad -applicability. - -The newest version of this document can be found at -L. - -The newest version of the header file itself, which -includes this documentation, can be downloaded from -L. +This documentation is the abridged version of the full documention at +L. It's recommended to go there instead +of reading this document. + +This header file implements a very low overhead (both in code and runtime) +mechanism for XS modules to allow re-use of the perl interpreter for other +threads while doing some lengthy operation, such as cryptography, SQL +queries, disk I/O and so on. + +The newest version of the header file itself, can be downloaded from +L. =head1 HOW DO I USE THIS IN MY MODULES? @@ -54,74 +55,8 @@ And that's it. This doesn't load any modules and consists of only a few machine instructions when no module to take advantage of it is loaded. -Here is a simple example, an C wrapper implemented in XS. Unlike -perl's built-in C, it allows other threads (for example, those -provided by L) to execute, instead of blocking the whole perl -interpreter. For the sake of this example, it requires a file descriptor -instead of a handle. - - #include "perlmulticore.h" // this header file - - // and in the XS portion - int flock (int fd, int operation) - CODE: - perlinterp_release (); - RETVAL = flock (fd, operation); - perlinterp_acquire (); - OUTPUT: - RETVAL - -Another example would be to modify L to allow other -threads to execute while executing SQL queries. One way to do this -is find all C and similar calls (such as -C), and adorn them with release/acquire -calls: - - { - perlinterp_release (); - imp_sth->row_num= mysql_st_internal_execute(sth, ...); - perlinterp_acquire (); - } - -=head2 HOW ABOUT NOT-SO LONG WORK? - -Sometimes you don't know how long your code will take - in a compression -library for example, compressing a few hundred Kilobyte of data can take -a while, while 50 Bytes will compress so fast that even attempting to do -something else could be more costly than just doing it. - -This is a very hard problem to solve. The best you can do at the moment is -to release the perl interpreter only when you think the work to be done -justifies the expense. - -As a rule of thumb, if you expect to need more than a few thousand cycles, -you should release the interpreter, else you shouldn't. When in doubt, -release. - -For example, in a compression library, you might want to do this: - - if (bytes_to_be_compressed > 2000) perlinterp_release (); - do_compress (...); - if (bytes_to_be_compressed > 2000) perlinterp_acquire (); - -Make sure the if conditions are exactly the same and don't change, so you -always call acquire when you release, and vice versa. - -When you don't have a handy indicator, you might still do something -useful. For example, if you do some file locking with C and you -expect the lock to be available immediately in most cases, you could try -with C (which doesn't wait), and only release/wait/acquire when -the lock couldn't be set: - - int res = fcntl (fd, F_SETLK, &flock); - - if (res) - { - // error, assume lock is held by another process and do it the slow way - perlinterp_release (); - res = fcntl (fd, F_SETLKW, &flock); - perlinterp_acquire (); - } +More documentation and examples can be found at the perl multicore site at +L. =head1 THE HARD AND FAST RULES @@ -131,39 +66,21 @@ =item I touch any perl data structures after calling C. -Possibly the most important rule of them all, anything perl is -completely off-limits after C, until you call -C, after which you can access perl stuff again. +Anything perl is completely off-limits after C, until +you call C, after which you can access perl stuff +again. That includes anything in the perl interpreter that you didn't prove to be safe, and didn't prove to be safe in older and future versions of perl: global variables, local perl scalars, even if you are sure nobody accesses -them and you only try to "read" their value, and so on. - -If you need to access perl things, do it before releasing the -interpreter with C, or after acquiring it again with -C. +them and you only try to "read" their value. =item I call C and C in pairs. For each C call there must be a C call. They don't have to be in the same function, and you can have multiple calls to them, as long as every C call is -followed by exactly one C call. - -For example., this would be fine: - - perlinterp_release (); - - if (!function_that_fails_with_0_return_value ()) - { - perlinterp_acquire (); - croak ("error"); - // croak doesn't return - } - - perlinterp_acquire (); - // do other stuff +followed by exactly one C call at runtime. =item I nest calls to C and C. @@ -174,8 +91,8 @@ =item I call C first. -Also simple: you I call C without having -called C before. +You I call C without having called +C before. =item I underestimate threads. @@ -188,117 +105,7 @@ Always assume that the code between C and C is executed in parallel on multiple CPUs at the same -time. If your code can't cope with that, you could consider using a mutex -to only allow one such execution, which is still better than blocking -everybody else from doing anything: - - static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER; - - perlinterp_release (); - pthread_mutex_lock (&my_mutex); - do_your_non_thread_safe_thing (); - pthread_mutex_unlock (&my_mutex); - perlinterp_acquire (); - -=item I get confused by having to release first. - -In many real world scenarios, you acquire a resource, do something, then -release it again. Don't let this confuse you, with this, you already own -the resource (the perl interpreter) so you have to I first, and -I it again later, not the other way around. - -=back - - -=head1 DESIGN PRINCIPLES - -This section discusses how the design goals were reached (you be the -judge), how it is implemented, and what overheads this implies. - -=over 4 - -=item Simple to Use - -All you have to do is identify the place in your existing code where you -stop touching perl stuff, do your actual work, and start touching perl -stuff again. - -Then slap C and C around the -actual work code. - -You have to include F and distribute it with your XS -code, but all these things border on the trivial. - -=item Very Efficient - -The definition for C and C is very -short: - - #define perlinterp_release() perl_multicore_api->pmapi_release () - #define perlinterp_acquire() perl_multicore_api->pmapi_acquire () - -Both are macros that read a pointer from memory (perl_multicore_api), -dereference a function pointer stored at that place, and call the -function, which takes no arguments and returns nothing. - -The first call to C will check for the presence -of any supporting module, and if none is loaded, will create a dummy -implementation where both C and C execute -this function: - - static void perl_multicore_nop (void) { } - -So in the case of no magical module being loaded, all calls except the -first are two memory accesses and a predictable function call of an empty -function. - -Of course, the overhead is much higher when these functions actually -implement anything useful, but you always get what you pay for. - -With L, every release/acquire involves two pthread -switches, two coro thread switches, a bunch of syscalls, and sometimes -interacting with the event loop. - -A dedicated thread pool such as the one L uses could reduce -these overheads, and would also reduce the dependencies (L is a -smaller and more portable dependency than L), but it would require a -lot more work on the side of the module author wanting to support it than -this solution. - -=item Low Code and Data Size Overhead - -On a 64 bit system, F uses exactly C<8> octets (one -pointer) of your data segment, to store the C -pointer. In addition it creates a C<16> octet perl string to store the -function pointers in, and stores it in a hash provided by perl for this -purpose. - -This is pretty much the equivalent of executing this code: - - $existing_hash{perl_multicore_api} = "123456781234567812345678"; - -And that's it, which is, as I think, indeed very little. - -As for code size, on my amd64 system, every call to C -or C results in a variation of the following 9-10 -octet sequence: - - 150> mov 0x200f23(%rip),%rax # - 157> callq *0x8(%rax) - -The biggest part if the initialisation code, which consists of 11 lines of -typical XS code. On my system, all the code in F compiles -to less than 160 octets of read-only data. - -=item Broad Applicability - -While there are alternative ways to achieve the goal of parallel execution -with threads that might be more efficient, this mechanism was chosen -because it is very simple to retrofit existing modules with it, and it - -The design goals for this mechanism were to be simple to use, very -efficient when not needed, low code and data size overhead and broad -applicability. +time. =back @@ -309,9 +116,6 @@ symbol C to C<1> (e.g. by specifying F<-DPERL_MULTICORE_DISABLE> as compiler argument). -This will leave no traces of the API in the compiled code, suitable -"empty" C and C definitions will be provided. - This could be added to perl's C when configuring perl on platforms that do not support threading at all for example. @@ -333,7 +137,7 @@ */ #define PERL_MULTICORE_MAJOR 1 /* bumped on incompatible changes */ -#define PERL_MULTICORE_MINOR 0 /* bumped on every change */ +#define PERL_MULTICORE_MINOR 1 /* bumped on every change */ #if PERL_MULTICORE_DISABLE @@ -342,6 +146,8 @@ #else +START_EXTERN_C + /* this struct is shared between all modules, and currently */ /* contain only the two function pointers for release/acquire */ struct perl_multicore_api @@ -352,7 +158,8 @@ static void perl_multicore_init (void); -const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort }; +static const struct perl_multicore_api perl_multicore_api_init + = { perl_multicore_init, 0 }; static struct perl_multicore_api *perl_multicore_api = (struct perl_multicore_api *)&perl_multicore_api_init; @@ -366,6 +173,8 @@ { } +static const char perl_multicore_api_key[] = "perl_multicore_api"; + /* this is the initial implementation of "release" - it initialises */ /* the api and then calls the real release function */ static void @@ -374,14 +183,19 @@ dTHX; /* check for existing API struct in PL_modglobal */ - SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1); + SV **api_svp = hv_fetch (PL_modglobal, perl_multicore_api_key, + sizeof (perl_multicore_api_key) - 1, 1); if (SvPOKp (*api_svp)) perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */ else { /* create a new one with a dummy nop implementation */ + #ifdef NEWSV SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api)); + #else + SV *api_sv = newSV ( sizeof (*perl_multicore_api)); + #endif SvCUR_set (api_sv, sizeof (*perl_multicore_api)); SvPOK_only (api_sv); perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv); @@ -394,6 +208,9 @@ perlinterp_release (); } +END_EXTERN_C + #endif #endif +