--- deliantra/server/include/util.h	2008/12/30 07:24:16	1.83
+++ deliantra/server/include/util.h	2009/05/05 04:51:56	1.88
@@ -30,10 +30,12 @@
 # define is_constant(c)             __builtin_constant_p (c)
 # define expect(expr,value)         __builtin_expect ((expr),(value))
 # define prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality)
+# define noinline                   __attribute__((__noinline__))
 #else
 # define is_constant(c)             0
 # define expect(expr,value)         (expr)
 # define prefetch(addr,rw,locality)
+# define noinline
 #endif
 
 #if __GNUC__ < 4 || (__GNUC__ == 4 || __GNUC_MINOR__ < 4)
@@ -43,8 +45,8 @@
 // put into ifs if you are very sure that the expression
 // is mostly true or mosty false. note that these return
 // booleans, not the expression.
-#define expect_false(expr) expect ((expr) != 0, 0)
-#define expect_true(expr)  expect ((expr) != 0, 1)
+#define expect_false(expr) expect ((expr) ? 1 : 0, 0)
+#define expect_true(expr)  expect ((expr) ? 1 : 0, 1)
 
 #include <pthread.h>
 
@@ -118,13 +120,25 @@
 template<typename T>
 static inline T sign0 (T v) { return v ? sign (v) : 0; }
 
+// div* only work correctly for div > 0
 // div, with correct rounding (< 0.5 downwards, >=0.5 upwards)
-template<typename T> static inline T div    (T val, T div) { return (val + div / 2) / div; }
+template<typename T> static inline T div    (T val, T div)
+{
+  return expect_false (val < 0) ? - ((-val + (div - 1) / 2) / div) : (val + div / 2) / div;
+}
 // div, round-up
-template<typename T> static inline T div_ru (T val, T div) { return (val + div - 1) / div; }
+template<typename T> static inline T div_ru (T val, T div)
+{
+  return expect_false (val < 0) ? - ((-val                ) / div) : (val + div - 1) / div;
+}
 // div, round-down
-template<typename T> static inline T div_rd (T val, T div) { return (val          ) / div; }
+template<typename T> static inline T div_rd (T val, T div)
+{
+  return expect_false (val < 0) ? - ((-val + (div - 1)    ) / div) : (val          ) / div;
+}
 
+// lerp* only work correctly for min_in < max_in
+// Linear intERPolate, scales val from min_in..max_in to min_out..max_out
 template<typename T>
 static inline T
 lerp (T val, T min_in, T max_in, T min_out, T max_out)
@@ -409,6 +423,7 @@
 // http://www.jstatsoft.org/v08/i14/paper
 // this one is about 40% faster than the tausworthe one above (i.e. not much),
 // despite the inlining, and has the issue of only creating 2**32-1 numbers.
+// see also http://www.iro.umontreal.ca/~lecuyer/myftp/papers/xorshift.pdf
 struct xorshift_random_generator
 {
   uint32_t x, y;
@@ -542,7 +557,8 @@
 {
   std::size_t operator ()(const char *s) const
   {
-    unsigned long hash = 0;
+#if 0
+    uint32_t hash = 0;
 
     /* use the one-at-a-time hash function, which supposedly is
      * better than the djb2-like one used by perl5.005, but
@@ -559,6 +575,17 @@
     hash += hash <<  3;
     hash ^= hash >> 11;
     hash += hash << 15;
+#else
+    // use FNV-1a hash (http://isthe.com/chongo/tech/comp/fnv/)
+    // it is about twice as fast as the one-at-a-time one,
+    // with good distribution.
+    // FNV-1a is faster on many cpus because the multiplication
+    // runs concurrent with the looping logic.
+    uint32_t hash = 2166136261;
+    
+    while (*s)
+      hash = (hash ^ *s++) * 16777619;
+#endif
 
     return hash;
   }
@@ -663,13 +690,14 @@
 };
 
 // basically does what strncpy should do, but appends "..." to strings exceeding length
-void assign (char *dst, const char *src, int maxlen);
+// returns the number of bytes actually used (including \0)
+int assign (char *dst, const char *src, int maxsize);
 
 // type-safe version of assign
 template<int N>
-inline void assign (char (&dst)[N], const char *src)
+inline int assign (char (&dst)[N], const char *src)
 {
-  assign ((char *)&dst, src, N);
+  return assign ((char *)&dst, src, N);
 }
 
 typedef double tstamp;