--- deliantra/server/include/util.h 2008/12/19 22:47:29 1.79 +++ deliantra/server/include/util.h 2009/01/03 01:04:19 1.86 @@ -30,10 +30,12 @@ # define is_constant(c) __builtin_constant_p (c) # define expect(expr,value) __builtin_expect ((expr),(value)) # define prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality) +# define noinline __attribute__((__noinline__)) #else # define is_constant(c) 0 # define expect(expr,value) (expr) # define prefetch(addr,rw,locality) +# define noinline #endif #if __GNUC__ < 4 || (__GNUC__ == 4 || __GNUC_MINOR__ < 4) @@ -43,8 +45,8 @@ // put into ifs if you are very sure that the expression // is mostly true or mosty false. note that these return // booleans, not the expression. -#define expect_false(expr) expect ((expr) != 0, 0) -#define expect_true(expr) expect ((expr) != 0, 1) +#define expect_false(expr) expect ((expr) ? 1 : 0, 0) +#define expect_true(expr) expect ((expr) ? 1 : 0, 1) #include @@ -74,7 +76,7 @@ // use C0X decltype for auto declarations until ISO C++ sanctifies them (if ever) #define auto(var,expr) decltype(expr) var = (expr) -// very ugly macro that basicaly declares and initialises a variable +// very ugly macro that basically declares and initialises a variable // that is in scope for the next statement only // works only for stuff that can be assigned 0 and converts to false // (note: works great for pointers) @@ -98,9 +100,9 @@ template static inline T max (T a, U b) { return (U)a > b ? (U)a : b; } template static inline T clamp (T v, U a, V b) { return v < (T)a ? (T)a : v >(T)b ? (T)b : v; } -template static inline void min_it (T &v, T m) { v = min (v, m); } -template static inline void max_it (T &v, T m) { v = max (v, m); } -template static inline void clamp_it (T &v, T a, T b) { v = clamp (v, a, b); } +template static inline void min_it (T &v, U m) { v = min (v, (T)m); } +template static inline void max_it (T &v, U m) { v = max (v, (T)m); } +template static inline void clamp_it (T &v, U a, V b) { v = clamp (v, (T)a, (T)b); } template static inline void swap (T& a, U& b) { T t=a; a=(T)b; b=(U)t; } @@ -391,7 +393,6 @@ // http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps struct tausworthe_random_generator { - // generator uint32_t state [4]; void operator =(const tausworthe_random_generator &src) @@ -404,13 +405,47 @@ void seed (uint32_t seed); uint32_t next (); +}; + +// Xorshift RNGs, George Marsaglia +// http://www.jstatsoft.org/v08/i14/paper +// this one is about 40% faster than the tausworthe one above (i.e. not much), +// despite the inlining, and has the issue of only creating 2**32-1 numbers. +// see also http://www.iro.umontreal.ca/~lecuyer/myftp/papers/xorshift.pdf +struct xorshift_random_generator +{ + uint32_t x, y; + + void operator =(const xorshift_random_generator &src) + { + x = src.x; + y = src.y; + } + + void seed (uint32_t seed) + { + x = seed; + y = seed * 69069U; + } + uint32_t next () + { + uint32_t t = x ^ (x << 10); + x = y; + y = y ^ (y >> 13) ^ t ^ (t >> 10); + return y; + } +}; + +template +struct random_number_generator : generator +{ // uniform distribution, 0 .. max (0, num - 1) uint32_t operator ()(uint32_t num) { - return is_constant (num) - ? (next () * (uint64_t)num) >> 32U - : get_range (num); + return !is_constant (num) ? get_range (num) // non-constant + : num & (num - 1) ? (this->next () * (uint64_t)num) >> 32U // constant, non-power-of-two + : this->next () & (num - 1); // constant, power-of-two } // return a number within (min .. max) @@ -431,7 +466,7 @@ int get_range (int r_min, int r_max); }; -typedef tausworthe_random_generator rand_gen; +typedef random_number_generator rand_gen; extern rand_gen rndm, rmg_rndm; @@ -510,7 +545,8 @@ { std::size_t operator ()(const char *s) const { - unsigned long hash = 0; +#if 0 + uint32_t hash = 0; /* use the one-at-a-time hash function, which supposedly is * better than the djb2-like one used by perl5.005, but @@ -527,6 +563,17 @@ hash += hash << 3; hash ^= hash >> 11; hash += hash << 15; +#else + // use FNV-1a hash (http://isthe.com/chongo/tech/comp/fnv/) + // it is about twice as fast as the one-at-a-time one, + // with good distribution. + // FNV-1a is faster on many cpus because the multiplication + // runs concurrent with the looping logic. + uint32_t hash = 2166136261; + + while (*s) + hash = (hash ^ *s++) * 16777619; +#endif return hash; }