--- Compress-LZF/perlmulticore.h	2015/06/29 23:51:28	1.3
+++ Compress-LZF/perlmulticore.h	2019/03/03 11:33:55	1.4
@@ -2,6 +2,10 @@
  * Author: Marc A. Lehmann <xsthreadpool@schmorp.de>
  * License: public domain, or where this is not possible/at your option,
  *          CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
+ *
+ * Full documentation can be found at http://perlmulticore.schmorp.de/
+ * The newest version of this header can be downloaded from
+ * http://perlmulticore.schmorp.de/perlmulticore.h
  */
 
 #ifndef PERL_MULTICORE_H
@@ -11,11 +15,11 @@
 
 =head1 NAME
 
-perlmulticore.h - the Perl Multicore Specification and Implementation
+perlmulticore.h - implements the Perl Multicore Specification
 
 =head1 SYNOPSIS
 
-  #include "perlmultiore.h"
+  #include "perlmulticore.h"
 
   // in your XS function:
 
@@ -25,20 +29,17 @@
 
 =head1 DESCRIPTION
 
-This header file implements a simple mechanism for XS modules to allow
-re-use of the perl interpreter for other threads while doing some lengthy
-operation, such as cryptography, SQL queries, disk I/O and so on.
-
-The design goals for this mechanism were to be simple to use, very
-efficient when not needed, low code and data size overhead and broad
-applicability.
-
-The newest version of this document can be found at
-L<http://pod.tst.eu/http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
-
-The newest version of the header file itself, which
-includes this documentation, can be downloaded from
-L<http://cvs.schmorp.de/Coro-Multicore/perlmulticore.h>.
+This documentation is the abridged version of the full documention at
+L<http://perlmulticore.schmorp.de/>. It's recommended to go there instead
+of reading this document.
+
+This header file implements a very low overhead (both in code and runtime)
+mechanism for XS modules to allow re-use of the perl interpreter for other
+threads while doing some lengthy operation, such as cryptography, SQL
+queries, disk I/O and so on.
+
+The newest version of the header file itself, can be downloaded from
+L<http://perlmulticore.schmorp.de/perlmulticore.h>.
 
 =head1 HOW DO I USE THIS IN MY MODULES?
 
@@ -54,74 +55,8 @@
 And that's it. This doesn't load any modules and consists of only a few
 machine instructions when no module to take advantage of it is loaded.
 
-Here is a simple example, an C<flock> wrapper implemented in XS. Unlike
-perl's built-in C<flock>, it allows other threads (for example, those
-provided by L<Coro>) to execute, instead of blocking the whole perl
-interpreter. For the sake of this example, it requires a file descriptor
-instead of a handle.
-
-   #include "perlmulticore.h" // this header file
-
-   // and in the XS portion
-   int flock (int fd, int operation)
-           CODE:
-           perlinterp_release ();
-           RETVAL = flock (fd, operation);
-           perlinterp_acquire ();
-           OUTPUT:
-           RETVAL
-
-Another example would be to modify L<DBD::mysql> to allow other
-threads to execute while executing SQL queries. One way to do this
-is find all C<mysql_st_internal_execute> and similar calls (such as
-C<mysql_st_internal_execute41>), and adorn them with release/acquire
-calls:
-
-   {
-     perlinterp_release ();
-     imp_sth->row_num= mysql_st_internal_execute(sth, ...);
-     perlinterp_acquire ();
-   }
-
-=head2 HOW ABOUT NOT-SO LONG WORK?
-
-Sometimes you don't know how long your code will take - in a compression
-library for example, compressing a few hundred Kilobyte of data can take
-a while, while 50 Bytes will compress so fast that even attempting to do
-something else could be more costly than just doing it.
-
-This is a very hard problem to solve. The best you can do at the moment is
-to release the perl interpreter only when you think the work to be done
-justifies the expense.
-
-As a rule of thumb, if you expect to need more than a few thousand cycles,
-you should release the interpreter, else you shouldn't. When in doubt,
-release.
-
-For example, in a compression library, you might want to do this:
-
-   if (bytes_to_be_compressed > 2000) perlinterp_release ();
-   do_compress (...);
-   if (bytes_to_be_compressed > 2000) perlinterp_acquire ();
-
-Make sure the if conditions are exactly the same and don't change, so you
-always call acquire when you release, and vice versa.
-
-When you don't have a handy indicator, you might still do something
-useful. For example, if you do some file locking with C<fcntl> and you
-expect the lock to be available immediately in most cases, you could try
-with C<F_SETLK> (which doesn't wait), and only release/wait/acquire when
-the lock couldn't be set:
-
-   int res = fcntl (fd, F_SETLK, &flock);
-
-   if (res)
-     {
-       // error, assume lock is held by another process and do it the slow way
-       perlinterp_release ();
-       res = fcntl (fd, F_SETLKW, &flock);
-       perlinterp_acquire ();
-     }
+More documentation and examples can be found at the perl multicore site at
+L<http://perlmulticore.schmorp.de>.
 
 =head1 THE HARD AND FAST RULES
 
@@ -131,39 +66,21 @@
 
 =item I<Never> touch any perl data structures after calling C<perlinterp_release>.
 
-Possibly the most important rule of them all, anything perl is
-completely off-limits after C<perlinterp_release>, until you call
-C<perlinterp_acquire>, after which you can access perl stuff again.
+Anything perl is completely off-limits after C<perlinterp_release>, until
+you call C<perlinterp_acquire>, after which you can access perl stuff
+again.
 
 That includes anything in the perl interpreter that you didn't prove to be
 safe, and didn't prove to be safe in older and future versions of perl:
 global variables, local perl scalars, even if you are sure nobody accesses
-them and you only try to "read" their value, and so on.
-
-If you need to access perl things, do it before releasing the
-interpreter with C<perlinterp_release>, or after acquiring it again with
-C<perlinterp_acquire>.
+them and you only try to "read" their value.
 
 =item I<Always> call C<perlinterp_release> and C<perlinterp_acquire> in pairs.
 
 For each C<perlinterp_release> call there must be a C<perlinterp_acquire>
 call. They don't have to be in the same function, and you can have
 multiple calls to them, as long as every C<perlinterp_release> call is
-followed by exactly one C<perlinterp_acquire> call.
-
-For example., this would be fine:
-
-   perlinterp_release ();
-
-   if (!function_that_fails_with_0_return_value ())
-     {
-       perlinterp_acquire ();
-       croak ("error");
-       // croak doesn't return
-     }
-
-   perlinterp_acquire ();
-   // do other stuff
+followed by exactly one C<perlinterp_acquire> call at runtime.
 
 =item I<Never> nest calls to C<perlinterp_release> and C<perlinterp_acquire>.
 
@@ -174,8 +91,8 @@
 
 =item I<Always> call C<perlinterp_release> first.
 
-Also simple: you I<must not> call C<perlinterp_acquire> without having
-called C<perlinterp_release> before.
+You I<must not> call C<perlinterp_acquire> without having called
+C<perlinterp_release> before.
 
 =item I<Never> underestimate threads.
 
@@ -188,117 +105,7 @@
 
 Always assume that the code between C<perlinterp_release> and
 C<perlinterp_acquire> is executed in parallel on multiple CPUs at the same
-time. If your code can't cope with that, you could consider using a mutex
-to only allow one such execution, which is still better than blocking
-everybody else from doing anything:
-
-   static pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-   perlinterp_release ();
-   pthread_mutex_lock (&my_mutex);
-   do_your_non_thread_safe_thing ();
-   pthread_mutex_unlock (&my_mutex);
-   perlinterp_acquire ();
-
-=item I<Don't> get confused by having to release first.
-
-In many real world scenarios, you acquire a resource, do something, then
-release it again. Don't let this confuse you, with this, you already own
-the resource (the perl interpreter) so you have to I<release> first, and
-I<acquire> it again later, not the other way around.
-
-=back
-
-
-=head1 DESIGN PRINCIPLES
-
-This section discusses how the design goals were reached (you be the
-judge), how it is implemented, and what overheads this implies.
-
-=over 4
-
-=item Simple to Use
-
-All you have to do is identify the place in your existing code where you
-stop touching perl stuff, do your actual work, and start touching perl
-stuff again.
-
-Then slap C<perlinterp_release ()> and C<perlinterp_acquire ()> around the
-actual work code.
-
-You have to include F<perlmulticore.h> and distribute it with your XS
-code, but all these things border on the trivial.
-
-=item Very Efficient
-
-The definition for C<perlinterp_release> and C<perlinterp_release> is very
-short:
-
-   #define perlinterp_release() perl_multicore_api->pmapi_release ()
-   #define perlinterp_acquire() perl_multicore_api->pmapi_acquire ()
-
-Both are macros that read a pointer from memory (perl_multicore_api),
-dereference a function pointer stored at that place, and call the
-function, which takes no arguments and returns nothing.
-
-The first call to C<perlinterp_release> will check for the presence
-of any supporting module, and if none is loaded, will create a dummy
-implementation where both C<pmapi_release> and C<pmapi_acquire> execute
-this function:
-
-  static void perl_multicore_nop (void) { }
-
-So in the case of no magical module being loaded, all calls except the
-first are two memory accesses and a predictable function call of an empty
-function.
-
-Of course, the overhead is much higher when these functions actually
-implement anything useful, but you always get what you pay for.
-
-With L<Coro::Multicore>, every release/acquire involves two pthread
-switches, two coro thread switches, a bunch of syscalls, and sometimes
-interacting with the event loop.
-
-A dedicated thread pool such as the one L<IO::AIO> uses could reduce
-these overheads, and would also reduce the dependencies (L<AnyEvent> is a
-smaller and more portable dependency than L<Coro>), but it would require a
-lot more work on the side of the module author wanting to support it than
-this solution.
-
-=item Low Code and Data Size Overhead
-
-On a 64 bit system, F<perlmulticore.h> uses exactly C<8> octets (one
-pointer) of your data segment, to store the C<perl_multicore_api>
-pointer. In addition it creates a C<16> octet perl string to store the
-function pointers in, and stores it in a hash provided by perl for this
-purpose.
-
-This is pretty much the equivalent of executing this code:
-
-   $existing_hash{perl_multicore_api} = "123456781234567812345678";
-
-And that's it, which is, as I think, indeed very little.
-
-As for code size, on my amd64 system, every call to C<perlinterp_release>
-or C<perlinterp_acquire> results in a variation of the following 9-10
-octet sequence:
-
-   150>   mov    0x200f23(%rip),%rax  # <perl_multicore_api>
-   157>   callq  *0x8(%rax)
-
-The biggest part if the initialisation code, which consists of 11 lines of
-typical XS code. On my system, all the code in F<perlmulticore.h> compiles
-to less than 160 octets of read-only data.
-
-=item Broad Applicability
-
-While there are alternative ways to achieve the goal of parallel execution
-with threads that might be more efficient, this mechanism was chosen
-because it is very simple to retrofit existing modules with it, and it
-
-The design goals for this mechanism were to be simple to use, very
-efficient when not needed, low code and data size overhead and broad
-applicability.
+time.
 
 =back
 
@@ -309,9 +116,6 @@
 symbol C<PERL_MULTICORE_DISABLE> to C<1> (e.g. by specifying
 F<-DPERL_MULTICORE_DISABLE> as compiler argument).
 
-This will leave no traces of the API in the compiled code, suitable
-"empty" C<perl_release> and C<perl_acquire> definitions will be provided.
-
 This could be added to perl's C<CPPFLAGS> when configuring perl on
 platforms that do not support threading at all for example.
 
@@ -333,7 +137,7 @@
 */
 
 #define PERL_MULTICORE_MAJOR 1 /* bumped on incompatible changes */
-#define PERL_MULTICORE_MINOR 0 /* bumped on every change */
+#define PERL_MULTICORE_MINOR 1 /* bumped on every change */
 
 #if PERL_MULTICORE_DISABLE
 
@@ -342,6 +146,8 @@
 
 #else
 
+START_EXTERN_C
+
 /* this struct is shared between all modules, and currently */
 /* contain only the two function pointers for release/acquire */
 struct perl_multicore_api
@@ -352,7 +158,8 @@
 
 static void perl_multicore_init (void);
 
-const struct perl_multicore_api perl_multicore_api_init = { perl_multicore_init, abort };
+static const struct perl_multicore_api perl_multicore_api_init
+   = { perl_multicore_init, 0 };
 
 static struct perl_multicore_api *perl_multicore_api
    = (struct perl_multicore_api *)&perl_multicore_api_init;
@@ -366,6 +173,8 @@
 {
 }
 
+static const char perl_multicore_api_key[] = "perl_multicore_api";
+
 /* this is the initial implementation of "release" - it initialises */
 /* the api and then calls the real release function */
 static void
@@ -374,14 +183,19 @@
   dTHX;
 
   /* check for existing API struct in PL_modglobal */
-  SV **api_svp = hv_fetch (PL_modglobal, "perl_multicore_api", sizeof ("perl_multicore_api") - 1, 1);
+  SV **api_svp = hv_fetch (PL_modglobal, perl_multicore_api_key,
+                           sizeof (perl_multicore_api_key) - 1, 1);
 
   if (SvPOKp (*api_svp))
     perl_multicore_api = (struct perl_multicore_api *)SvPVX (*api_svp); /* we have one, use the existing one */
   else
     {
       /* create a new one with a dummy nop implementation */
+      #ifdef NEWSV
       SV *api_sv = NEWSV (0, sizeof (*perl_multicore_api));
+      #else
+      SV *api_sv = newSV (   sizeof (*perl_multicore_api));
+      #endif
       SvCUR_set (api_sv, sizeof (*perl_multicore_api));
       SvPOK_only (api_sv);
       perl_multicore_api = (struct perl_multicore_api *)SvPVX (api_sv);
@@ -394,6 +208,9 @@
   perlinterp_release ();
 }
 
+END_EXTERN_C
+
 #endif
 
 #endif
+