--- libecb/ecb.pod	2011/05/26 20:05:25	1.4
+++ libecb/ecb.pod	2011/05/26 23:32:41	1.16
@@ -1,13 +1,56 @@
-=head1 LIBECB
+=head1 LIBECB - e-C-Builtins
 
-You suck, we don't(tm)
+=head2 ABOUT LIBECB
+
+Libecb is currently a simple header file that doesn't require any
+configuration to use or include in your project.
+
+It's part of the e-suite of libraries, other members of which include
+libev and libeio.
+
+Its homepage can be found here:
+
+    http://software.schmorp.de/pkg/libecb
+
+It mainly provides a number of wrappers around GCC built-ins, together
+with replacement functions for other compilers. In addition to this,
+it provides a number of other lowlevel C utilities, such as endianness
+detection, byte swapping or bit rotations.
+
+More might come.
 
 =head2 ABOUT THE HEADER
 
-- how to include it
-- it includes inttypes.h
-- no .a
-- whats a bool
+At the moment, all you have to do is copy F<ecb.h> somewhere where your
+compiler can find it and include it:
+
+   #include <ecb.h>
+
+The header should work fine for both C and C++ compilation, and gives you
+all of F<inttypes.h> in addition to the ECB symbols.
+
+There are currently no object files to link to - future versions might
+come with an (optional) object code library to link against, to reduce
+code size or gain access to additional features.
+
+It also currently includes everything from F<inttypes.h>.
+
+=head2 ABOUT THIS MANUAL / CONVENTIONS
+
+This manual mainly describes each (public) function available after
+including the F<ecb.h> header. The header might define other symbols than
+these, but these are not part of the public API, and not supported in any
+way.
+
+When the manual mentions a "function" then this could be defined either as
+as inline function, a macro, or an external symbol.
+
+When functions use a concrete standard type, such as C<int> or
+C<uint32_t>, then the corresponding function works only with that type. If
+only a generic name is used (C<expr>, C<cond>, C<value> and so on), then
+the corresponding function relies on C to implement the correct types, and
+is usually implemented as a macro. Specifically, a "bool" in this manual
+refers to any kind of boolean value, not a specific type.
 
 =head2 GCC ATTRIBUTES
 
@@ -17,8 +60,13 @@
 
 =item ecb_attribute ((attrs...))
 
-A simple wrapper that expands to C<__attribute__((attrs))> on GCC, and
-to nothing on other compilers, so the effect is that only GCC sees these.
+A simple wrapper that expands to C<__attribute__((attrs))> on GCC, and to
+nothing on other compilers, so the effect is that only GCC sees these.
+
+Example: use the C<deprecated> attribute on a function.
+
+  ecb_attribute((__deprecated__)) void
+  do_not_use_me_anymore (void);
 
 =item ecb_unused
 
@@ -26,20 +74,20 @@
 warning by GCC when it detects it as unused. This is useful when you e.g.
 declare a variable but do not always use it:
 
-   {
-     int var ecb_unused;
+  {
+    int var ecb_unused;
 
-     #ifdef SOMECONDITION
-        var = ...;
-        return var;
-     #else
-        return 0;
-     #endif
-   }
+    #ifdef SOMECONDITION
+       var = ...;
+       return var;
+    #else
+       return 0;
+    #endif
+  }
 
 =item ecb_noinline
 
-Prevent a function from being inlined - it might be optimsied away, but
+Prevent a function from being inlined - it might be optimised away, but
 not inlined into other functions. This is useful if you know your function
 is rarely called and large enough for inlining not to be helpful.
 
@@ -68,12 +116,12 @@
 
 For example, when you have a C<rndm16> function that returns a 16 bit
 random number, and you have a function that maps this to a range from
-0..n-1, then you could use this inline fucntion in a header file:
+0..n-1, then you could use this inline function in a header file:
 
   ecb_inline uint32_t
   rndm (uint32_t n)
   {
-    return n * (uint32_t)rndm16 ()) >> 16;
+    return (n * (uint32_t)rndm16 ()) >> 16;
   }
 
 However, for powers of two, you could use a normal mask, but that is only
@@ -86,20 +134,131 @@
   {
     return is_constant (n) && !(n & (n - 1))
       ? rndm16 () & (num - 1)
-      : (uint32_t)rndm16 ()) >> 16;
+      : (n * (uint32_t)rndm16 ()) >> 16;
+  }
+
+=item bool ecb_expect (expr, value)
+
+Evaluates C<expr> and returns it. In addition, it tells the compiler that
+the C<expr> evaluates to C<value> a lot, which can be used for static
+branch optimisations.
+
+Usually, you want to use the more intuitive C<ecb_likely> and
+C<ecb_unlikely> functions instead.
+
+=item bool ecb_likely (cond)
+
+=item bool ecb_unlikely (cond)
+
+These two functions expect a expression that is true or false and return
+C<1> or C<0>, respectively, so when used in the condition of an C<if> or
+other conditional statement, it will not change the program:
+
+  /* these two do the same thing */
+  if (some_condition) ...;
+  if (ecb_likely (some_condition)) ...;
+
+However, by using C<ecb_likely>, you tell the compiler that the condition
+is likely to be true (and for C<ecb_unlikely>, that it is unlikely to be
+true).
+
+For example, when you check for a null pointer and expect this to be a
+rare, exceptional, case, then use C<ecb_unlikely>:
+
+  void my_free (void *ptr)
+  {
+    if (ecb_unlikely (ptr == 0))
+      return;
+  }
+
+Consequent use of these functions to mark away exceptional cases or to
+tell the compiler what the hot path through a function is can increase
+performance considerably.
+
+A very good example is in a function that reserves more space for some
+memory block (for example, inside an implementation of a string stream) -
+each time something is added, you have to check for a buffer overrun, but
+you expect that most checks will turn out to be false:
+
+  /* make sure we have "size" extra room in our buffer */
+  ecb_inline void
+  reserve (int size)
+  {
+    if (ecb_unlikely (current + size > end))
+      real_reserve_method (size); /* presumably noinline */
   }
 
-=item bool ecb_expect(expr,value)
+=item bool ecb_assume (cond)
 
-=item bool ecb_unlikely(bool)
+Try to tell the compiler that some condition is true, even if it's not
+obvious.
+
+This can be used to teach the compiler about invariants or other
+conditions that might improve code generation, but which are impossible to
+deduce form the code itself.
+
+For example, the example reservation function from the C<ecb_unlikely>
+description could be written thus (only C<ecb_assume> was added):
+
+  ecb_inline void
+  reserve (int size)
+  {
+    if (ecb_unlikely (current + size > end))
+      real_reserve_method (size); /* presumably noinline */
+
+    ecb_assume (current + size <= end);
+  }
 
-=item bool ecb_likely(bool)
+If you then call this function twice, like this:
 
-=item bool ecb_assume(cond)
+  reserve (10);
+  reserve (1);
 
-=item bool ecb_unreachable()
+Then the compiler I<might> be able to optimise out the second call
+completely, as it knows that C<< current + 1 > end >> is false and the
+call will never be executed.
+
+=item bool ecb_unreachable ()
+
+This function does nothing itself, except tell the compiler that it will
+never be executed. Apart from suppressing a warning in some cases, this
+function can be used to implement C<ecb_assume> or similar functions.
+
+=item bool ecb_prefetch (addr, rw, locality)
+
+Tells the compiler to try to prefetch memory at the given C<addr>ess
+for either reading (C<rw> = 0) or writing (C<rw> = 1). A C<locality> of
+C<0> means that there will only be one access later, C<3> means that
+the data will likely be accessed very often, and values in between mean
+something... in between. The memory pointed to by the address does not
+need to be accessible (it could be a null pointer for example), but C<rw>
+and C<locality> must be compile-time constants.
+
+An obvious way to use this is to prefetch some data far away, in a big
+array you loop over. This prefetches memory some 128 array elements later,
+in the hope that it will be ready when the CPU arrives at that location.
+
+  int sum = 0;
+
+  for (i = 0; i < N; ++i)
+    {
+      sum += arr [i]
+      ecb_prefetch (arr + i + 128, 0, 0);
+    }
+
+It's hard to predict how far to prefetch, and most CPUs that can prefetch
+are often good enough to predict this kind of behaviour themselves. It
+gets more interesting with linked lists, especially when you do some fair
+processing on each list element:
+
+  for (node *n = start; n; n = n->next)
+    {
+      ecb_prefetch (n->next, 0, 0);
+      ... do medium amount of work with *n
+    }
 
-=item bool ecb_prefetch(addr,rw,locality)
+After processing the node, (part of) the next node might already be in
+cache.
 
 =back
 
@@ -111,25 +270,57 @@
 
 =item bool ecb_little_endian ()
 
+These two functions return true if the byte order is big endian
+(most-significant byte first) or little endian (least-significant byte
+first) respectively.
+
 =item int ecb_ctz32 (uint32_t x)
 
+Returns the index of the least significant bit set in C<x> (or
+equivalently the number of bits set to 0 before the least significant
+bit set), starting from 0. If C<x> is 0 the result is undefined. A
+common use case is to compute the integer binary logarithm, i.e.,
+floor(log2(n)). For example:
+
+  ecb_ctz32 (3) = 0
+  ecb_ctz32 (6) = 1
+
 =item int ecb_popcount32 (uint32_t x)
 
-=item uint32_t ecb_bswap32 (uint32_t x)
+Returns the number of bits set to 1 in C<x>. For example:
+
+  ecb_popcount32 (7) = 3
+  ecb_popcount32 (255) = 8
 
 =item uint32_t ecb_bswap16 (uint32_t x)
 
+=item uint32_t ecb_bswap32 (uint32_t x)
+
+These two functions return the value of the 16-bit (32-bit) variable
+C<x> after reversing the order of bytes.
+
 =item uint32_t ecb_rotr32 (uint32_t x, unsigned int count)
 
 =item uint32_t ecb_rotl32 (uint32_t x, unsigned int count)
 
+These two functions return the value of C<x> after shifting all the bits
+by C<count> positions to the right or left respectively.
+
 =back
 
 =head2 ARITHMETIC
 
 =over 4
 
-=item x = ecb_mod (m, n) [MACRO]
+=item x = ecb_mod (m, n)
+
+Returns the positive remainder of the modulo operation between C<m> and
+C<n>. Unlike the C modulo operator C<%>, this function ensures that the
+return value is always positive).
+
+C<n> must be strictly positive (i.e. C<< >1 >>), while C<m> must be
+negatable, that is, both C<m> and C<-m> must be representable in its
+type.
 
 =back
 
@@ -137,7 +328,15 @@
 
 =over 4
 
-=item ecb_array_length (name) [MACRO]
+=item element_count = ecb_array_length (name) [MACRO]
+
+Returns the number of elements in the array C<name>. For example:
+
+  int primes[] = { 2, 3, 5, 7, 11 };
+  int sum = 0;
+
+  for (i = 0; i < ecb_array_length (primes); i++)
+    sum += primes [i];
 
 =back