ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/deliantra/server/include/util.h
Revision: 1.94
Committed: Sun Nov 8 16:13:45 2009 UTC (14 years, 6 months ago) by root
Content type: text/plain
Branch: MAIN
Changes since 1.93: +1 -1 lines
Log Message:
optimisations, cache volume/nrof in mapspace

File Contents

# Content
1 /*
2 * This file is part of Deliantra, the Roguelike Realtime MMORPG.
3 *
4 * Copyright (©) 2005,2006,2007,2008 Marc Alexander Lehmann / Robin Redeker / the Deliantra team
5 *
6 * Deliantra is free software: you can redistribute it and/or modify it under
7 * the terms of the Affero GNU General Public License as published by the
8 * Free Software Foundation, either version 3 of the License, or (at your
9 * option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the Affero GNU General Public License
17 * and the GNU General Public License along with this program. If not, see
18 * <http://www.gnu.org/licenses/>.
19 *
20 * The authors can be reached via e-mail to <support@deliantra.net>
21 */
22
23 #ifndef UTIL_H__
24 #define UTIL_H__
25
26 #include <compiler.h>
27
28 #define DEBUG_POISON 0x00 // poison memory before freeing it if != 0
29 #define DEBUG_SALLOC 0 // add a debug wrapper around all sallocs
30 #define PREFER_MALLOC 0 // use malloc and not the slice allocator
31
32 #include <pthread.h>
33
34 #include <cstddef>
35 #include <cmath>
36 #include <new>
37 #include <vector>
38
39 #include <glib.h>
40
41 #include <shstr.h>
42 #include <traits.h>
43
44 #if DEBUG_SALLOC
45 # define g_slice_alloc0(s) debug_slice_alloc0(s)
46 # define g_slice_alloc(s) debug_slice_alloc(s)
47 # define g_slice_free1(s,p) debug_slice_free1(s,p)
48 void *g_slice_alloc (unsigned long size);
49 void *g_slice_alloc0 (unsigned long size);
50 void g_slice_free1 (unsigned long size, void *ptr);
51 #elif PREFER_MALLOC
52 # define g_slice_alloc0(s) calloc (1, (s))
53 # define g_slice_alloc(s) malloc ((s))
54 # define g_slice_free1(s,p) free ((p))
55 #endif
56
57 // use C0X decltype for auto declarations until ISO C++ sanctifies them (if ever)
58 #define auto(var,expr) decltype(expr) var = (expr)
59
60 // very ugly macro that basically declares and initialises a variable
61 // that is in scope for the next statement only
62 // works only for stuff that can be assigned 0 and converts to false
63 // (note: works great for pointers)
64 // most ugly macro I ever wrote
65 #define statementvar(type, name, value) if (type name = 0) { } else if (((name) = (value)), 1)
66
67 // in range including end
68 #define IN_RANGE_INC(val,beg,end) \
69 ((unsigned int)(val) - (unsigned int)(beg) <= (unsigned int)(end) - (unsigned int)(beg))
70
71 // in range excluding end
72 #define IN_RANGE_EXC(val,beg,end) \
73 ((unsigned int)(val) - (unsigned int)(beg) < (unsigned int)(end) - (unsigned int)(beg))
74
75 void cleanup (const char *cause, bool make_core = false);
76 void fork_abort (const char *msg);
77
78 // rationale for using (U) not (T) is to reduce signed/unsigned issues,
79 // as a is often a constant while b is the variable. it is still a bug, though.
80 template<typename T, typename U> static inline T min (T a, U b) { return (U)a < b ? (U)a : b; }
81 template<typename T, typename U> static inline T max (T a, U b) { return (U)a > b ? (U)a : b; }
82 template<typename T, typename U, typename V> static inline T clamp (T v, U a, V b) { return v < (T)a ? (T)a : v >(T)b ? (T)b : v; }
83
84 template<typename T, typename U> static inline void min_it (T &v, U m) { v = min (v, (T)m); }
85 template<typename T, typename U> static inline void max_it (T &v, U m) { v = max (v, (T)m); }
86 template<typename T, typename U, typename V> static inline void clamp_it (T &v, U a, V b) { v = clamp (v, (T)a, (T)b); }
87
88 template<typename T, typename U> static inline void swap (T& a, U& b) { T t=a; a=(T)b; b=(U)t; }
89
90 template<typename T, typename U, typename V> static inline T min (T a, U b, V c) { return min (a, min (b, c)); }
91 template<typename T, typename U, typename V> static inline T max (T a, U b, V c) { return max (a, max (b, c)); }
92
93 // sign returns -1 or +1
94 template<typename T>
95 static inline T sign (T v) { return v < 0 ? -1 : +1; }
96 // relies on 2c representation
97 template<>
98 inline sint8 sign (sint8 v) { return 1 - (sint8 (uint8 (v) >> 7) * 2); }
99
100 // sign0 returns -1, 0 or +1
101 template<typename T>
102 static inline T sign0 (T v) { return v ? sign (v) : 0; }
103
104 // div* only work correctly for div > 0
105 // div, with correct rounding (< 0.5 downwards, >=0.5 upwards)
106 template<typename T> static inline T div (T val, T div)
107 {
108 return expect_false (val < 0) ? - ((-val + (div - 1) / 2) / div) : (val + div / 2) / div;
109 }
110 // div, round-up
111 template<typename T> static inline T div_ru (T val, T div)
112 {
113 return expect_false (val < 0) ? - ((-val ) / div) : (val + div - 1) / div;
114 }
115 // div, round-down
116 template<typename T> static inline T div_rd (T val, T div)
117 {
118 return expect_false (val < 0) ? - ((-val + (div - 1) ) / div) : (val ) / div;
119 }
120
121 // lerp* only work correctly for min_in < max_in
122 // Linear intERPolate, scales val from min_in..max_in to min_out..max_out
123 template<typename T>
124 static inline T
125 lerp (T val, T min_in, T max_in, T min_out, T max_out)
126 {
127 return min_out + div <T> ((val - min_in) * (max_out - min_out), max_in - min_in);
128 }
129
130 // lerp, round-down
131 template<typename T>
132 static inline T
133 lerp_rd (T val, T min_in, T max_in, T min_out, T max_out)
134 {
135 return min_out + div_rd<T> ((val - min_in) * (max_out - min_out), max_in - min_in);
136 }
137
138 // lerp, round-up
139 template<typename T>
140 static inline T
141 lerp_ru (T val, T min_in, T max_in, T min_out, T max_out)
142 {
143 return min_out + div_ru<T> ((val - min_in) * (max_out - min_out), max_in - min_in);
144 }
145
146 // lots of stuff taken from FXT
147
148 /* Rotate right. This is used in various places for checksumming */
149 //TODO: that sucks, use a better checksum algo
150 static inline uint32_t
151 rotate_right (uint32_t c, uint32_t count = 1)
152 {
153 return (c << (32 - count)) | (c >> count);
154 }
155
156 static inline uint32_t
157 rotate_left (uint32_t c, uint32_t count = 1)
158 {
159 return (c >> (32 - count)) | (c << count);
160 }
161
162 // Return abs(a-b)
163 // Both a and b must not have the most significant bit set
164 static inline uint32_t
165 upos_abs_diff (uint32_t a, uint32_t b)
166 {
167 long d1 = b - a;
168 long d2 = (d1 & (d1 >> 31)) << 1;
169
170 return d1 - d2; // == (b - d) - (a + d);
171 }
172
173 // Both a and b must not have the most significant bit set
174 static inline uint32_t
175 upos_min (uint32_t a, uint32_t b)
176 {
177 int32_t d = b - a;
178 d &= d >> 31;
179 return a + d;
180 }
181
182 // Both a and b must not have the most significant bit set
183 static inline uint32_t
184 upos_max (uint32_t a, uint32_t b)
185 {
186 int32_t d = b - a;
187 d &= d >> 31;
188 return b - d;
189 }
190
191 // this is much faster than crossfire's original algorithm
192 // on modern cpus
193 inline int
194 isqrt (int n)
195 {
196 return (int)sqrtf ((float)n);
197 }
198
199 // this is kind of like the ^^ operator, if it would exist, without sequence point.
200 // more handy than it looks like, due to the implicit !! done on its arguments
201 inline bool
202 logical_xor (bool a, bool b)
203 {
204 return a != b;
205 }
206
207 inline bool
208 logical_implies (bool a, bool b)
209 {
210 return a <= b;
211 }
212
213 // this is only twice as fast as naive sqrtf (dx*dy+dy*dy)
214 #if 0
215 // and has a max. error of 6 in the range -100..+100.
216 #else
217 // and has a max. error of 9 in the range -100..+100.
218 #endif
219 inline int
220 idistance (int dx, int dy)
221 {
222 unsigned int dx_ = abs (dx);
223 unsigned int dy_ = abs (dy);
224
225 #if 0
226 return dx_ > dy_
227 ? (dx_ * 61685 + dy_ * 26870) >> 16
228 : (dy_ * 61685 + dx_ * 26870) >> 16;
229 #else
230 return dx_ + dy_ - min (dx_, dy_) * 5 / 8;
231 #endif
232 }
233
234 /*
235 * absdir(int): Returns a number between 1 and 8, which represent
236 * the "absolute" direction of a number (it actually takes care of
237 * "overflow" in previous calculations of a direction).
238 */
239 inline int
240 absdir (int d)
241 {
242 return ((d - 1) & 7) + 1;
243 }
244
245 extern ssize_t slice_alloc; // statistics
246
247 void *salloc_ (int n) throw (std::bad_alloc);
248 void *salloc_ (int n, void *src) throw (std::bad_alloc);
249
250 // strictly the same as g_slice_alloc, but never returns 0
251 template<typename T>
252 inline T *salloc (int n = 1) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T)); }
253
254 // also copies src into the new area, like "memdup"
255 // if src is 0, clears the memory
256 template<typename T>
257 inline T *salloc (int n, T *src) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), (void *)src); }
258
259 // clears the memory
260 template<typename T>
261 inline T *salloc0(int n = 1) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), 0); }
262
263 // for symmetry
264 template<typename T>
265 inline void sfree (T *ptr, int n = 1) throw ()
266 {
267 if (expect_true (ptr))
268 {
269 slice_alloc -= n * sizeof (T);
270 if (DEBUG_POISON) memset (ptr, DEBUG_POISON, n * sizeof (T));
271 g_slice_free1 (n * sizeof (T), (void *)ptr);
272 assert (slice_alloc >= 0);//D
273 }
274 }
275
276 // nulls the pointer
277 template<typename T>
278 inline void sfree0 (T *&ptr, int n = 1) throw ()
279 {
280 sfree<T> (ptr, n);
281 ptr = 0;
282 }
283
284 // makes dynamically allocated objects zero-initialised
285 struct zero_initialised
286 {
287 void *operator new (size_t s, void *p)
288 {
289 memset (p, 0, s);
290 return p;
291 }
292
293 void *operator new (size_t s)
294 {
295 return salloc0<char> (s);
296 }
297
298 void *operator new[] (size_t s)
299 {
300 return salloc0<char> (s);
301 }
302
303 void operator delete (void *p, size_t s)
304 {
305 sfree ((char *)p, s);
306 }
307
308 void operator delete[] (void *p, size_t s)
309 {
310 sfree ((char *)p, s);
311 }
312 };
313
314 // makes dynamically allocated objects zero-initialised
315 struct slice_allocated
316 {
317 void *operator new (size_t s, void *p)
318 {
319 return p;
320 }
321
322 void *operator new (size_t s)
323 {
324 return salloc<char> (s);
325 }
326
327 void *operator new[] (size_t s)
328 {
329 return salloc<char> (s);
330 }
331
332 void operator delete (void *p, size_t s)
333 {
334 sfree ((char *)p, s);
335 }
336
337 void operator delete[] (void *p, size_t s)
338 {
339 sfree ((char *)p, s);
340 }
341 };
342
343 // a STL-compatible allocator that uses g_slice
344 // boy, this is verbose
345 template<typename Tp>
346 struct slice_allocator
347 {
348 typedef size_t size_type;
349 typedef ptrdiff_t difference_type;
350 typedef Tp *pointer;
351 typedef const Tp *const_pointer;
352 typedef Tp &reference;
353 typedef const Tp &const_reference;
354 typedef Tp value_type;
355
356 template <class U>
357 struct rebind
358 {
359 typedef slice_allocator<U> other;
360 };
361
362 slice_allocator () throw () { }
363 slice_allocator (const slice_allocator &) throw () { }
364 template<typename Tp2>
365 slice_allocator (const slice_allocator<Tp2> &) throw () { }
366
367 ~slice_allocator () { }
368
369 pointer address (reference x) const { return &x; }
370 const_pointer address (const_reference x) const { return &x; }
371
372 pointer allocate (size_type n, const_pointer = 0)
373 {
374 return salloc<Tp> (n);
375 }
376
377 void deallocate (pointer p, size_type n)
378 {
379 sfree<Tp> (p, n);
380 }
381
382 size_type max_size () const throw ()
383 {
384 return size_t (-1) / sizeof (Tp);
385 }
386
387 void construct (pointer p, const Tp &val)
388 {
389 ::new (p) Tp (val);
390 }
391
392 void destroy (pointer p)
393 {
394 p->~Tp ();
395 }
396 };
397
398 // P. L'Ecuyer, “Maximally Equidistributed Combined Tausworthe Generators”, Mathematics of Computation, 65, 213 (1996), 203–213.
399 // http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
400 // http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
401 struct tausworthe_random_generator
402 {
403 uint32_t state [4];
404
405 void operator =(const tausworthe_random_generator &src)
406 {
407 state [0] = src.state [0];
408 state [1] = src.state [1];
409 state [2] = src.state [2];
410 state [3] = src.state [3];
411 }
412
413 void seed (uint32_t seed);
414 uint32_t next ();
415 };
416
417 // Xorshift RNGs, George Marsaglia
418 // http://www.jstatsoft.org/v08/i14/paper
419 // this one is about 40% faster than the tausworthe one above (i.e. not much),
420 // despite the inlining, and has the issue of only creating 2**32-1 numbers.
421 // see also http://www.iro.umontreal.ca/~lecuyer/myftp/papers/xorshift.pdf
422 struct xorshift_random_generator
423 {
424 uint32_t x, y;
425
426 void operator =(const xorshift_random_generator &src)
427 {
428 x = src.x;
429 y = src.y;
430 }
431
432 void seed (uint32_t seed)
433 {
434 x = seed;
435 y = seed * 69069U;
436 }
437
438 uint32_t next ()
439 {
440 uint32_t t = x ^ (x << 10);
441 x = y;
442 y = y ^ (y >> 13) ^ t ^ (t >> 10);
443 return y;
444 }
445 };
446
447 template<class generator>
448 struct random_number_generator : generator
449 {
450 // uniform distribution, 0 .. max (0, num - 1)
451 uint32_t operator ()(uint32_t num)
452 {
453 return !is_constant (num) ? get_range (num) // non-constant
454 : num & (num - 1) ? (this->next () * (uint64_t)num) >> 32U // constant, non-power-of-two
455 : this->next () & (num - 1); // constant, power-of-two
456 }
457
458 // return a number within (min .. max)
459 int operator () (int r_min, int r_max)
460 {
461 return is_constant (r_min) && is_constant (r_max) && r_min <= r_max
462 ? r_min + operator ()(r_max - r_min + 1)
463 : get_range (r_min, r_max);
464 }
465
466 double operator ()()
467 {
468 return this->next () / (double)0xFFFFFFFFU;
469 }
470
471 protected:
472 uint32_t get_range (uint32_t r_max);
473 int get_range (int r_min, int r_max);
474 };
475
476 typedef random_number_generator<tausworthe_random_generator> rand_gen;
477
478 extern rand_gen rndm, rmg_rndm;
479
480 INTERFACE_CLASS (attachable)
481 struct refcnt_base
482 {
483 typedef int refcnt_t;
484 mutable refcnt_t ACC (RW, refcnt);
485
486 MTH void refcnt_inc () const { ++refcnt; }
487 MTH void refcnt_dec () const { --refcnt; }
488
489 refcnt_base () : refcnt (0) { }
490 };
491
492 // to avoid branches with more advanced compilers
493 extern refcnt_base::refcnt_t refcnt_dummy;
494
495 template<class T>
496 struct refptr
497 {
498 // p if not null
499 refcnt_base::refcnt_t *refcnt_ref () { return p ? &p->refcnt : &refcnt_dummy; }
500
501 void refcnt_dec ()
502 {
503 if (!is_constant (p))
504 --*refcnt_ref ();
505 else if (p)
506 --p->refcnt;
507 }
508
509 void refcnt_inc ()
510 {
511 if (!is_constant (p))
512 ++*refcnt_ref ();
513 else if (p)
514 ++p->refcnt;
515 }
516
517 T *p;
518
519 refptr () : p(0) { }
520 refptr (const refptr<T> &p) : p(p.p) { refcnt_inc (); }
521 refptr (T *p) : p(p) { refcnt_inc (); }
522 ~refptr () { refcnt_dec (); }
523
524 const refptr<T> &operator =(T *o)
525 {
526 // if decrementing ever destroys we need to reverse the order here
527 refcnt_dec ();
528 p = o;
529 refcnt_inc ();
530 return *this;
531 }
532
533 const refptr<T> &operator =(const refptr<T> &o)
534 {
535 *this = o.p;
536 return *this;
537 }
538
539 T &operator * () const { return *p; }
540 T *operator ->() const { return p; }
541
542 operator T *() const { return p; }
543 };
544
545 typedef refptr<maptile> maptile_ptr;
546 typedef refptr<object> object_ptr;
547 typedef refptr<archetype> arch_ptr;
548 typedef refptr<client> client_ptr;
549 typedef refptr<player> player_ptr;
550
551 struct str_hash
552 {
553 std::size_t operator ()(const char *s) const
554 {
555 #if 0
556 uint32_t hash = 0;
557
558 /* use the one-at-a-time hash function, which supposedly is
559 * better than the djb2-like one used by perl5.005, but
560 * certainly is better then the bug used here before.
561 * see http://burtleburtle.net/bob/hash/doobs.html
562 */
563 while (*s)
564 {
565 hash += *s++;
566 hash += hash << 10;
567 hash ^= hash >> 6;
568 }
569
570 hash += hash << 3;
571 hash ^= hash >> 11;
572 hash += hash << 15;
573 #else
574 // use FNV-1a hash (http://isthe.com/chongo/tech/comp/fnv/)
575 // it is about twice as fast as the one-at-a-time one,
576 // with good distribution.
577 // FNV-1a is faster on many cpus because the multiplication
578 // runs concurrent with the looping logic.
579 uint32_t hash = 2166136261;
580
581 while (*s)
582 hash = (hash ^ *s++) * 16777619;
583 #endif
584
585 return hash;
586 }
587 };
588
589 struct str_equal
590 {
591 bool operator ()(const char *a, const char *b) const
592 {
593 return !strcmp (a, b);
594 }
595 };
596
597 // Mostly the same as std::vector, but insert/erase can reorder
598 // the elements, making append(=insert)/remove O(1) instead of O(n).
599 //
600 // NOTE: only some forms of erase are available
601 template<class T>
602 struct unordered_vector : std::vector<T, slice_allocator<T> >
603 {
604 typedef typename unordered_vector::iterator iterator;
605
606 void erase (unsigned int pos)
607 {
608 if (pos < this->size () - 1)
609 (*this)[pos] = (*this)[this->size () - 1];
610
611 this->pop_back ();
612 }
613
614 void erase (iterator i)
615 {
616 erase ((unsigned int )(i - this->begin ()));
617 }
618 };
619
620 // This container blends advantages of linked lists
621 // (efficiency) with vectors (random access) by
622 // by using an unordered vector and storing the vector
623 // index inside the object.
624 //
625 // + memory-efficient on most 64 bit archs
626 // + O(1) insert/remove
627 // + free unique (but varying) id for inserted objects
628 // + cache-friendly iteration
629 // - only works for pointers to structs
630 //
631 // NOTE: only some forms of erase/insert are available
632 typedef int object_vector_index;
633
634 template<class T, object_vector_index T::*indexmember>
635 struct object_vector : std::vector<T *, slice_allocator<T *> >
636 {
637 typedef typename object_vector::iterator iterator;
638
639 bool contains (const T *obj) const
640 {
641 return obj->*indexmember;
642 }
643
644 iterator find (const T *obj)
645 {
646 return obj->*indexmember
647 ? this->begin () + obj->*indexmember - 1
648 : this->end ();
649 }
650
651 void push_back (T *obj)
652 {
653 std::vector<T *, slice_allocator<T *> >::push_back (obj);
654 obj->*indexmember = this->size ();
655 }
656
657 void insert (T *obj)
658 {
659 push_back (obj);
660 }
661
662 void insert (T &obj)
663 {
664 insert (&obj);
665 }
666
667 void erase (T *obj)
668 {
669 unsigned int pos = obj->*indexmember;
670 obj->*indexmember = 0;
671
672 if (pos < this->size ())
673 {
674 (*this)[pos - 1] = (*this)[this->size () - 1];
675 (*this)[pos - 1]->*indexmember = pos;
676 }
677
678 this->pop_back ();
679 }
680
681 void erase (T &obj)
682 {
683 erase (&obj);
684 }
685 };
686
687 // basically does what strncpy should do, but appends "..." to strings exceeding length
688 // returns the number of bytes actually used (including \0)
689 int assign (char *dst, const char *src, int maxsize);
690
691 // type-safe version of assign
692 template<int N>
693 inline int assign (char (&dst)[N], const char *src)
694 {
695 return assign ((char *)&dst, src, N);
696 }
697
698 typedef double tstamp;
699
700 // return current time as timestamp
701 tstamp now ();
702
703 int similar_direction (int a, int b);
704
705 // like v?sprintf, but returns a "static" buffer
706 char *vformat (const char *format, va_list ap);
707 char *format (const char *format, ...) attribute ((format (printf, 1, 2)));
708
709 // safety-check player input which will become object->msg
710 bool msg_is_safe (const char *msg);
711
712 /////////////////////////////////////////////////////////////////////////////
713 // threads, very very thin wrappers around pthreads
714
715 struct thread
716 {
717 pthread_t id;
718
719 void start (void *(*start_routine)(void *), void *arg = 0);
720
721 void cancel ()
722 {
723 pthread_cancel (id);
724 }
725
726 void *join ()
727 {
728 void *ret;
729
730 if (pthread_join (id, &ret))
731 cleanup ("pthread_join failed", 1);
732
733 return ret;
734 }
735 };
736
737 // note that mutexes are not classes
738 typedef pthread_mutex_t smutex;
739
740 #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)
741 #define SMUTEX_INITIALISER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
742 #else
743 #define SMUTEX_INITIALISER PTHREAD_MUTEX_INITIALIZER
744 #endif
745
746 #define SMUTEX(name) smutex name = SMUTEX_INITIALISER
747 #define SMUTEX_LOCK(name) pthread_mutex_lock (&(name))
748 #define SMUTEX_UNLOCK(name) pthread_mutex_unlock (&(name))
749
750 typedef pthread_cond_t scond;
751
752 #define SCOND(name) scond name = PTHREAD_COND_INITIALIZER
753 #define SCOND_SIGNAL(name) pthread_cond_signal (&(name))
754 #define SCOND_BROADCAST(name) pthread_cond_broadcast (&(name))
755 #define SCOND_WAIT(name,mutex) pthread_cond_wait (&(name), &(mutex))
756
757 #endif
758