--- libecb/ecb.pod 2011/05/26 20:06:43 1.6 +++ libecb/ecb.pod 2011/05/26 20:49:40 1.7 @@ -8,6 +8,8 @@ - it includes inttypes.h - no .a - whats a bool +- function mean macro or function +- macro means untyped =head2 GCC ATTRIBUTES @@ -89,17 +91,128 @@ : (n * (uint32_t)rndm16 ()) >> 16; } -=item bool ecb_expect(expr,value) +=item bool ecb_expect (expr, value) -=item bool ecb_unlikely(bool) +Evaluates C and returns it. In addition, it tells the compiler that +the C evaluates to C a lot, which can be used for static +branch optimisations. -=item bool ecb_likely(bool) +Usually, you want to use the more intuitive C and +C functions instead. -=item bool ecb_assume(cond) +=item bool ecb_likely (bool) -=item bool ecb_unreachable() +=item bool ecb_unlikely (bool) -=item bool ecb_prefetch(addr,rw,locality) +These two functions expect a expression that is true or false and return +C<1> or C<0>, respectively, so when used in the condition of an C or +other conditional statement, it will not change the program: + + /* these two do the same thing */ + if (some_condition) ...; + if (ecb_likely (some_condition)) ...; + +However, by using C, you tell the compiler that the condition +is likely to be true (and for C, that it is unlikely to be +true). + +For example, when you check for a 0-ptr and expect this to be a rare, +exceptional, case, then use C: + + void my_free (void *ptr) + { + if (ecb_unlikely (ptr == 0)) + return; + } + +Consequent use of these functions to mark away exceptional cases or to +tell the compiler what the hot path through a function is can increase +performance considerably. + +A very good example is in a function that reserves more space for some +memory block (for example, inside an implementation of a string stream) - +eahc time something is added, you have to check for a buffer overrun, but +you expect that most checks will turn out to be false: + + /* make sure we have "size" extra room in our buffer */ + ecb_inline void + reserve (int size) + { + if (ecb_unlikely (current + size > end)) + real_reserve_method (size); /* presumably noinline */ + } + +=item bool ecb_assume (cond) + +Try to tell the compiler that some condition is true, even if it's not +obvious. + +This can be used to teach the compiler about invariants or other +conditions that might improve code generation, but which are impossible to +deduce form the code itself. + +For example, the example reservation function from the C +description could be written thus (only C was added): + + ecb_inline void + reserve (int size) + { + if (ecb_unlikely (current + size > end)) + real_reserve_method (size); /* presumably noinline */ + + ecb_assume (current + size <= end); + } + +If you then call this function twice, like this: + + reserve (10); + reserve (1); + +Then the compiler I be able to optimise out the second call +completely, as it knows that C<< current + 1 > end >> is false and the +call will never be executed. + +=item bool ecb_unreachable () + +This function does nothing itself, except tell the compiler that it will +never be executed. Apart from supressing a warning in some cases, this +function can be used to implement C or similar functions. + +=item bool ecb_prefetch (addr, rw, locality) + +Tells the compiler to try to prefetch memory at the given Cess +for either reading (c = 0) or writing (C = 1). A C of +C<0> means that there will only be one access later, C<3> means that +the data will likely be accessed very often, and values in between mean +something... in between. The memory pointed to by the address does not +need to be accessible (it could be a null pointer for example), but C +and C must be compile-time constants. + +An obvious way to use this is to prefetch some data far away, in a big +array you loop over. This prefethces memory some 128 array elements later, +in the hope that it will be ready when the CPU arrives at that location. + + int sum = 0; + + for (i = 0; i < N; ++i) + { + sum += arr [i] + ecb_prefetch (arr + i + 128, 0, 0); + } + +It's hard to predict how far to prefetch, and most CPUs that can prefetch +are often good enough to predict this kind of behaviour themselves. It +gets more interesting with linked lists, especially when you do some fair +processing on each list element: + + for (node *n = start; n; n = n->next) + { + ecb_prefetch (n->next, 0, 0); + ... do medium amount of work with *n + } + +After processing the node, (part of) the next node might already be in +cache. =back