--- libecb/ecb.pod	2011/05/27 00:18:14	1.22
+++ libecb/ecb.pod	2011/08/25 16:06:08	1.38
@@ -17,8 +17,9 @@
 it provides a number of other lowlevel C utilities, such as endianness
 detection, byte swapping or bit rotations.
 
-Or in other words, things that should be built-in into any standard C
-system, but aren't.
+Or in other words, things that should be built into any standard C system,
+but aren't, implemented as efficient as possible with GCC, and still
+correct with other compilers.
 
 More might come.
 
@@ -58,11 +59,11 @@
 =head2 GCC ATTRIBUTES
 
 A major part of libecb deals with GCC attributes. These are additional
-attributes that you cna assign to functions, variables and sometimes even
+attributes that you can assign to functions, variables and sometimes even
 types - much like C<const> or C<volatile> in C.
 
 While GCC allows declarations to show up in many surprising places,
-but not in many expeted places, the safest way is to put attribute
+but not in many expected places, the safest way is to put attribute
 declarations before the whole declaration:
 
    ecb_const int mysqrt (int a);
@@ -102,6 +103,21 @@
     #endif
   }
 
+=item ecb_inline
+
+This is not actually an attribute, but you use it like one. It expands
+either to C<static inline> or to just C<static>, if inline isn't
+supported. It should be used to declare functions that should be inlined,
+for code size or speed reasons.
+
+Example: inline this function, it surely will reduce codesize.
+
+   ecb_inline int
+   negmul (int a, int b)
+   {
+     return - (a * b);
+   }
+
 =item ecb_noinline
 
 Prevent a function from being inlined - it might be optimised away, but
@@ -185,7 +201,7 @@
 functions), this knowledge can be used in other ways, for example, the
 function will be optimised for size, as opposed to speed, and codepaths
 leading to calls to those functions can automatically be marked as if
-C<ecb_unlikely> had been used to reach them.
+C<ecb_expect_false> had been used to reach them.
 
 Good examples for such functions would be error reporting functions, or
 functions only called in exceptional or rare cases.
@@ -257,12 +273,12 @@
 the C<expr> evaluates to C<value> a lot, which can be used for static
 branch optimisations.
 
-Usually, you want to use the more intuitive C<ecb_likely> and
-C<ecb_unlikely> functions instead.
+Usually, you want to use the more intuitive C<ecb_expect_true> and
+C<ecb_expect_false> functions instead.
 
-=item bool ecb_likely (cond)
+=item bool ecb_expect_true (cond)
 
-=item bool ecb_unlikely (cond)
+=item bool ecb_expect_false (cond)
 
 These two functions expect a expression that is true or false and return
 C<1> or C<0>, respectively, so when used in the condition of an C<if> or
@@ -270,18 +286,18 @@
 
   /* these two do the same thing */
   if (some_condition) ...;
-  if (ecb_likely (some_condition)) ...;
+  if (ecb_expect_true (some_condition)) ...;
 
-However, by using C<ecb_likely>, you tell the compiler that the condition
-is likely to be true (and for C<ecb_unlikely>, that it is unlikely to be
-true).
+However, by using C<ecb_expect_true>, you tell the compiler that the
+condition is likely to be true (and for C<ecb_expect_false>, that it is
+unlikely to be true).
 
 For example, when you check for a null pointer and expect this to be a
-rare, exceptional, case, then use C<ecb_unlikely>:
+rare, exceptional, case, then use C<ecb_expect_false>:
 
   void my_free (void *ptr)
   {
-    if (ecb_unlikely (ptr == 0))
+    if (ecb_expect_false (ptr == 0))
       return;
   }
 
@@ -289,6 +305,12 @@
 tell the compiler what the hot path through a function is can increase
 performance considerably.
 
+You might know these functions under the name C<likely> and C<unlikely>
+- while these are common aliases, we find that the expect name is easier
+to understand when quickly skimming code. If you wish, you can use
+C<ecb_likely> instead of C<ecb_expect_true> and C<ecb_unlikely> instead of
+C<ecb_expect_false> - these are simply aliases.
+
 A very good example is in a function that reserves more space for some
 memory block (for example, inside an implementation of a string stream) -
 each time something is added, you have to check for a buffer overrun, but
@@ -298,7 +320,7 @@
   ecb_inline void
   reserve (int size)
   {
-    if (ecb_unlikely (current + size > end))
+    if (ecb_expect_false (current + size > end))
       real_reserve_method (size); /* presumably noinline */
   }
 
@@ -311,13 +333,13 @@
 conditions that might improve code generation, but which are impossible to
 deduce form the code itself.
 
-For example, the example reservation function from the C<ecb_unlikely>
+For example, the example reservation function from the C<ecb_expect_false>
 description could be written thus (only C<ecb_assume> was added):
 
   ecb_inline void
   reserve (int size)
   {
-    if (ecb_unlikely (current + size > end))
+    if (ecb_expect_false (current + size > end))
       real_reserve_method (size); /* presumably noinline */
 
     ecb_assume (current + size <= end);
@@ -376,7 +398,7 @@
 
 =back
 
-=head2 BIT FIDDLING / BITSTUFFS
+=head2 BIT FIDDLING / BIT WIZARDRY
 
 =over 4
 
@@ -388,20 +410,49 @@
 (most-significant byte first) or little endian (least-significant byte
 first) respectively.
 
+On systems that are neither, their return values are unspecified.
+
 =item int ecb_ctz32 (uint32_t x)
 
+=item int ecb_ctz64 (uint64_t x)
+
 Returns the index of the least significant bit set in C<x> (or
-equivalently the number of bits set to 0 before the least significant
-bit set), starting from 0. If C<x> is 0 the result is undefined. A
-common use case is to compute the integer binary logarithm, i.e.,
-floor(log2(n)). For example:
+equivalently the number of bits set to 0 before the least significant bit
+set), starting from 0. If C<x> is 0 the result is undefined.
+
+For smaller types than C<uint32_t> you can safely use C<ecb_ctz32>.
+
+For example:
 
   ecb_ctz32 (3) = 0
   ecb_ctz32 (6) = 1
 
+=item int ecb_ld32 (uint32_t x)
+
+=item int ecb_ld64 (uint64_t x)
+
+Returns the index of the most significant bit set in C<x>, or the number
+of digits the number requires in binary (so that C<< 2**ld <= x <
+2**(ld+1) >>). If C<x> is 0 the result is undefined. A common use case is
+to compute the integer binary logarithm, i.e. C<floor (log2 (n))>, for
+example to see how many bits a certain number requires to be encoded.
+
+This function is similar to the "count leading zero bits" function, except
+that that one returns how many zero bits are "in front" of the number (in
+the given data type), while C<ecb_ld> returns how many bits the number
+itself requires.
+
+For smaller types than C<uint32_t> you can safely use C<ecb_ld32>.
+
 =item int ecb_popcount32 (uint32_t x)
 
-Returns the number of bits set to 1 in C<x>. For example:
+=item int ecb_popcount64 (uint64_t x)
+
+Returns the number of bits set to 1 in C<x>.
+
+For smaller types than C<uint32_t> you can safely use C<ecb_popcount32>.
+
+For example:
 
   ecb_popcount32 (7) = 3
   ecb_popcount32 (255) = 8
@@ -410,18 +461,35 @@
 
 =item uint32_t ecb_bswap32 (uint32_t x)
 
-These two functions return the value of the 16-bit (32-bit) value C<x>
-after reversing the order of bytes (0x11223344 becomes 0x44332211).
+=item uint64_t ecb_bswap64 (uint64_t x)
 
-=item uint32_t ecb_rotr32 (uint32_t x, unsigned int count)
+These functions return the value of the 16-bit (32-bit, 64-bit) value
+C<x> after reversing the order of bytes (0x11223344 becomes 0x44332211 in
+C<ecb_bswap32>).
+
+=item uint8_t  ecb_rotl8  (uint8_t  x, unsigned int count)
+
+=item uint16_t ecb_rotl16 (uint16_t x, unsigned int count)
 
 =item uint32_t ecb_rotl32 (uint32_t x, unsigned int count)
 
-These two functions return the value of C<x> after rotating all the bits
-by C<count> positions to the right or left respectively.
+=item uint64_t ecb_rotl64 (uint64_t x, unsigned int count)
+
+=item uint8_t  ecb_rotr8  (uint8_t  x, unsigned int count)
+
+=item uint16_t ecb_rotr16 (uint16_t x, unsigned int count)
+
+=item uint32_t ecb_rotr32 (uint32_t x, unsigned int count)
+
+=item uint64_t ecb_rotr64 (uint64_t x, unsigned int count)
+
+These two families of functions return the value of C<x> after rotating
+all the bits by C<count> positions to the right (C<ecb_rotr>) or left
+(C<ecb_rotl>).
 
 Current GCC versions understand these functions and usually compile them
-to "optimal" code (e.g. a single C<roll> on x86).
+to "optimal" code (e.g. a single C<rol> or a combination of C<shld> on
+x86).
 
 =back
 
@@ -431,14 +499,38 @@
 
 =item x = ecb_mod (m, n)
 
-Returns the positive remainder of the modulo operation between C<m> and
-C<n>. Unlike the C modulo operator C<%>, this function ensures that the
-return value is always positive - ISO C guarantees very little when
-negative numbers are used with C<%>.
+Returns C<m> modulo C<n>, which is the same as the positive remainder
+of the division operation between C<m> and C<n>, using floored
+division. Unlike the C remainder operator C<%>, this function ensures that
+the return value is always positive and that the two numbers I<m> and
+I<m' = m + i * n> result in the same value modulo I<n> - in other words,
+C<ecb_mod> implements the mathematical modulo operation, which is missing
+in the language.
 
-C<n> must be strictly positive (i.e. C<< >1 >>), while C<m> must be
+C<n> must be strictly positive (i.e. C<< >= 1 >>), while C<m> must be
 negatable, that is, both C<m> and C<-m> must be representable in its
-type.
+type (this typically excludes the minimum signed integer value, the same
+limitation as for C</> and C<%> in C).
+
+Current GCC versions compile this into an efficient branchless sequence on
+almost all CPUs.
+
+For example, when you want to rotate forward through the members of an
+array for increasing C<m> (which might be negative), then you should use
+C<ecb_mod>, as the C<%> operator might give either negative results, or
+change direction for negative values:
+
+   for (m = -100; m <= 100; ++m)
+     int elem = myarray [ecb_mod (m, ecb_array_length (myarray))];
+
+=item x = ecb_div_rd (val, div)
+
+=item x = ecb_div_ru (val, div)
+
+Returns C<val> divided by C<div> rounded down or up, respectively.
+C<val> and C<div> must have integer types and C<div> must be strictly
+positive. Note that these functions are implemented with macros in C
+and with function templates in C++.
 
 =back
 
@@ -446,7 +538,7 @@
 
 =over 4
 
-=item element_count = ecb_array_length (name) [MACRO]
+=item element_count = ecb_array_length (name)
 
 Returns the number of elements in the array C<name>. For example: