--- deliantra/server/include/util.h 2008/12/30 07:24:16 1.83 +++ deliantra/server/include/util.h 2009/05/05 04:51:56 1.88 @@ -30,10 +30,12 @@ # define is_constant(c) __builtin_constant_p (c) # define expect(expr,value) __builtin_expect ((expr),(value)) # define prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality) +# define noinline __attribute__((__noinline__)) #else # define is_constant(c) 0 # define expect(expr,value) (expr) # define prefetch(addr,rw,locality) +# define noinline #endif #if __GNUC__ < 4 || (__GNUC__ == 4 || __GNUC_MINOR__ < 4) @@ -43,8 +45,8 @@ // put into ifs if you are very sure that the expression // is mostly true or mosty false. note that these return // booleans, not the expression. -#define expect_false(expr) expect ((expr) != 0, 0) -#define expect_true(expr) expect ((expr) != 0, 1) +#define expect_false(expr) expect ((expr) ? 1 : 0, 0) +#define expect_true(expr) expect ((expr) ? 1 : 0, 1) #include @@ -118,13 +120,25 @@ template static inline T sign0 (T v) { return v ? sign (v) : 0; } +// div* only work correctly for div > 0 // div, with correct rounding (< 0.5 downwards, >=0.5 upwards) -template static inline T div (T val, T div) { return (val + div / 2) / div; } +template static inline T div (T val, T div) +{ + return expect_false (val < 0) ? - ((-val + (div - 1) / 2) / div) : (val + div / 2) / div; +} // div, round-up -template static inline T div_ru (T val, T div) { return (val + div - 1) / div; } +template static inline T div_ru (T val, T div) +{ + return expect_false (val < 0) ? - ((-val ) / div) : (val + div - 1) / div; +} // div, round-down -template static inline T div_rd (T val, T div) { return (val ) / div; } +template static inline T div_rd (T val, T div) +{ + return expect_false (val < 0) ? - ((-val + (div - 1) ) / div) : (val ) / div; +} +// lerp* only work correctly for min_in < max_in +// Linear intERPolate, scales val from min_in..max_in to min_out..max_out template static inline T lerp (T val, T min_in, T max_in, T min_out, T max_out) @@ -409,6 +423,7 @@ // http://www.jstatsoft.org/v08/i14/paper // this one is about 40% faster than the tausworthe one above (i.e. not much), // despite the inlining, and has the issue of only creating 2**32-1 numbers. +// see also http://www.iro.umontreal.ca/~lecuyer/myftp/papers/xorshift.pdf struct xorshift_random_generator { uint32_t x, y; @@ -542,7 +557,8 @@ { std::size_t operator ()(const char *s) const { - unsigned long hash = 0; +#if 0 + uint32_t hash = 0; /* use the one-at-a-time hash function, which supposedly is * better than the djb2-like one used by perl5.005, but @@ -559,6 +575,17 @@ hash += hash << 3; hash ^= hash >> 11; hash += hash << 15; +#else + // use FNV-1a hash (http://isthe.com/chongo/tech/comp/fnv/) + // it is about twice as fast as the one-at-a-time one, + // with good distribution. + // FNV-1a is faster on many cpus because the multiplication + // runs concurrent with the looping logic. + uint32_t hash = 2166136261; + + while (*s) + hash = (hash ^ *s++) * 16777619; +#endif return hash; } @@ -663,13 +690,14 @@ }; // basically does what strncpy should do, but appends "..." to strings exceeding length -void assign (char *dst, const char *src, int maxlen); +// returns the number of bytes actually used (including \0) +int assign (char *dst, const char *src, int maxsize); // type-safe version of assign template -inline void assign (char (&dst)[N], const char *src) +inline int assign (char (&dst)[N], const char *src) { - assign ((char *)&dst, src, N); + return assign ((char *)&dst, src, N); } typedef double tstamp;