ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/deliantra/server/include/util.h
Revision: 1.54
Committed: Mon Aug 6 10:54:12 2007 UTC (16 years, 9 months ago) by root
Content type: text/plain
Branch: MAIN
Changes since 1.53: +41 -8 lines
Log Message:
rather uncertain optimisation.. trying to take advantage of cmov instructions
if refptr.

a = b before:

     510:       48 8b 16                mov    (%rsi),%rdx
     513:       48 85 d2                test   %rdx,%rdx
     516:       75 0c                   jne    524 <xyzzy(refptr<object>, refptr<object>)+0x14>
     518:       48 8b 07                mov    (%rdi),%rax
     51b:       48 85 c0                test   %rax,%rax
     51e:       75 09                   jne    529 <xyzzy(refptr<object>, refptr<object>)+0x19>
     520:       48 89 17                mov    %rdx,(%rdi)
     523:       c3                      retq
     524:       ff 42 08                incl   0x8(%rdx)
     527:       eb ef                   jmp    518 <xyzzy(refptr<object>, refptr<object>)+0x8>
     529:       ff 48 08                decl   0x8(%rax)
     52c:       0f 1f 40 00             nopl   0x0(%rax)
     530:       eb ee                   jmp    520 <xyzzy(refptr<object>, refptr<object>)+0x10>

a = b after:

     141:       4c 8b 0f                mov    (%rdi),%r9
     144:       ba 00 00 00 00          mov    $0x0,%edx
     149:       4d 8d 41 08             lea    0x8(%r9),%r8
     14d:       4d 85 c9                test   %r9,%r9
     150:       4c 0f 44 c2             cmove  %rdx,%r8
     154:       41 ff 08                decl   (%r8)
     157:       48 8b 06                mov    (%rsi),%rax
     15a:       48 8d 48 08             lea    0x8(%rax),%rcx
     15e:       48 85 c0                test   %rax,%rax
     161:       48 89 07                mov    %rax,(%rdi)
     164:       48 0f 45 d1             cmovne %rcx,%rdx
     168:       ff 02                   incl   (%rdx)
     16a:       c3                      retq

note no jumps but larger codeside (1.7kb net increase).

File Contents

# User Rev Content
1 root 1.46 /*
2 root 1.51 * This file is part of Crossfire TRT, the Roguelike Realtime MORPG.
3 root 1.46 *
4     * Copyright (©) 2005,2006,2007 Marc Alexander Lehmann / Robin Redeker / the Crossfire TRT team
5     *
6 root 1.51 * Crossfire TRT is free software: you can redistribute it and/or modify
7     * it under the terms of the GNU General Public License as published by
8     * the Free Software Foundation, either version 3 of the License, or
9     * (at your option) any later version.
10 root 1.46 *
11 root 1.51 * This program is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15 root 1.46 *
16 root 1.51 * You should have received a copy of the GNU General Public License
17     * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 root 1.46 *
19     * The authors can be reached via e-mail to <crossfire@schmorp.de>
20     */
21    
22 root 1.1 #ifndef UTIL_H__
23     #define UTIL_H__
24    
25 root 1.36 //#define PREFER_MALLOC
26    
27 root 1.2 #if __GNUC__ >= 3
28 root 1.45 # define is_constant(c) __builtin_constant_p (c)
29     # define expect(expr,value) __builtin_expect ((expr),(value))
30     # define prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality)
31 root 1.2 #else
32 root 1.45 # define is_constant(c) 0
33     # define expect(expr,value) (expr)
34     # define prefetch(addr,rw,locality)
35 root 1.2 #endif
36    
37 root 1.47 #if __GNUC__ < 4 || (__GNUC__ == 4 || __GNUC_MINOR__ < 4)
38     # define decltype(x) typeof(x)
39     #endif
40    
41 root 1.45 // put into ifs if you are very sure that the expression
42     // is mostly true or mosty false. note that these return
43     // booleans, not the expression.
44     #define expect_false(expr) expect ((expr) != 0, 0)
45     #define expect_true(expr) expect ((expr) != 0, 1)
46    
47 root 1.11 #include <cstddef>
48 root 1.28 #include <cmath>
49 root 1.25 #include <new>
50     #include <vector>
51 root 1.11
52     #include <glib.h>
53    
54 root 1.25 #include <shstr.h>
55     #include <traits.h>
56    
57 root 1.49 // use C0X decltype for auto declarations until ISO C++ sanctifies them (if ever)
58 root 1.47 #define auto(var,expr) decltype(expr) var = (expr)
59 root 1.14
60 root 1.26 // very ugly macro that basicaly declares and initialises a variable
61     // that is in scope for the next statement only
62     // works only for stuff that can be assigned 0 and converts to false
63     // (note: works great for pointers)
64     // most ugly macro I ever wrote
65 root 1.48 #define statementvar(type, name, value) if (type name = 0) { } else if (((name) = (value)), 1)
66 root 1.26
67 root 1.27 // in range including end
68     #define IN_RANGE_INC(val,beg,end) \
69     ((unsigned int)(val) - (unsigned int)(beg) <= (unsigned int)(end) - (unsigned int)(beg))
70    
71     // in range excluding end
72     #define IN_RANGE_EXC(val,beg,end) \
73     ((unsigned int)(val) - (unsigned int)(beg) < (unsigned int)(end) - (unsigned int)(beg))
74    
75 root 1.31 void fork_abort (const char *msg);
76    
77 root 1.35 // rationale for using (U) not (T) is to reduce signed/unsigned issues,
78     // as a is often a constant while b is the variable. it is still a bug, though.
79     template<typename T, typename U> static inline T min (T a, U b) { return (U)a < b ? (U)a : b; }
80     template<typename T, typename U> static inline T max (T a, U b) { return (U)a > b ? (U)a : b; }
81     template<typename T, typename U, typename V> static inline T clamp (T v, U a, V b) { return v < (T)a ? (T)a : v >(T)b ? (T)b : v; }
82 root 1.32
83     template<typename T, typename U> static inline void swap (T& a, U& b) { T t=a; a=(T)b; b=(U)t; }
84    
85 root 1.44 template<typename T>
86     static inline T
87     lerp (T val, T min_in, T max_in, T min_out, T max_out)
88     {
89     return (val - min_in) * (max_out - min_out) / (max_in - min_in) + min_out;
90     }
91    
92 root 1.37 // lots of stuff taken from FXT
93    
94     /* Rotate right. This is used in various places for checksumming */
95 root 1.38 //TODO: that sucks, use a better checksum algo
96 root 1.37 static inline uint32_t
97 root 1.38 rotate_right (uint32_t c, uint32_t count = 1)
98 root 1.37 {
99 root 1.38 return (c << (32 - count)) | (c >> count);
100     }
101    
102     static inline uint32_t
103     rotate_left (uint32_t c, uint32_t count = 1)
104     {
105     return (c >> (32 - count)) | (c << count);
106 root 1.37 }
107    
108     // Return abs(a-b)
109     // Both a and b must not have the most significant bit set
110     static inline uint32_t
111     upos_abs_diff (uint32_t a, uint32_t b)
112     {
113     long d1 = b - a;
114     long d2 = (d1 & (d1 >> 31)) << 1;
115    
116     return d1 - d2; // == (b - d) - (a + d);
117     }
118    
119     // Both a and b must not have the most significant bit set
120     static inline uint32_t
121     upos_min (uint32_t a, uint32_t b)
122     {
123     int32_t d = b - a;
124     d &= d >> 31;
125     return a + d;
126     }
127    
128     // Both a and b must not have the most significant bit set
129     static inline uint32_t
130     upos_max (uint32_t a, uint32_t b)
131     {
132     int32_t d = b - a;
133     d &= d >> 31;
134     return b - d;
135     }
136    
137 root 1.28 // this is much faster than crossfires original algorithm
138     // on modern cpus
139     inline int
140     isqrt (int n)
141     {
142     return (int)sqrtf ((float)n);
143     }
144    
145     // this is only twice as fast as naive sqrtf (dx*dy+dy*dy)
146     #if 0
147     // and has a max. error of 6 in the range -100..+100.
148     #else
149     // and has a max. error of 9 in the range -100..+100.
150     #endif
151     inline int
152     idistance (int dx, int dy)
153     {
154     unsigned int dx_ = abs (dx);
155     unsigned int dy_ = abs (dy);
156    
157     #if 0
158     return dx_ > dy_
159     ? (dx_ * 61685 + dy_ * 26870) >> 16
160     : (dy_ * 61685 + dx_ * 26870) >> 16;
161     #else
162 root 1.30 return dx_ + dy_ - min (dx_, dy_) * 5 / 8;
163 root 1.28 #endif
164     }
165    
166 root 1.29 /*
167     * absdir(int): Returns a number between 1 and 8, which represent
168     * the "absolute" direction of a number (it actually takes care of
169     * "overflow" in previous calculations of a direction).
170     */
171     inline int
172     absdir (int d)
173     {
174     return ((d - 1) & 7) + 1;
175     }
176 root 1.28
177 root 1.1 // makes dynamically allocated objects zero-initialised
178     struct zero_initialised
179     {
180 root 1.11 void *operator new (size_t s, void *p)
181     {
182     memset (p, 0, s);
183     return p;
184     }
185    
186     void *operator new (size_t s)
187     {
188     return g_slice_alloc0 (s);
189     }
190    
191     void *operator new[] (size_t s)
192     {
193     return g_slice_alloc0 (s);
194     }
195    
196     void operator delete (void *p, size_t s)
197     {
198     g_slice_free1 (s, p);
199     }
200    
201     void operator delete[] (void *p, size_t s)
202     {
203     g_slice_free1 (s, p);
204     }
205     };
206    
207 root 1.20 void *salloc_ (int n) throw (std::bad_alloc);
208     void *salloc_ (int n, void *src) throw (std::bad_alloc);
209    
210 root 1.12 // strictly the same as g_slice_alloc, but never returns 0
211 root 1.20 template<typename T>
212     inline T *salloc (int n = 1) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T)); }
213    
214 root 1.17 // also copies src into the new area, like "memdup"
215 root 1.18 // if src is 0, clears the memory
216     template<typename T>
217 root 1.20 inline T *salloc (int n, T *src) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), (void *)src); }
218 root 1.18
219 root 1.21 // clears the memory
220     template<typename T>
221     inline T *salloc0(int n = 1) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), 0); }
222    
223 root 1.12 // for symmetry
224 root 1.18 template<typename T>
225 root 1.20 inline void sfree (T *ptr, int n = 1) throw ()
226 root 1.12 {
227 root 1.36 #ifdef PREFER_MALLOC
228     free (ptr);
229     #else
230 root 1.20 g_slice_free1 (n * sizeof (T), (void *)ptr);
231 root 1.36 #endif
232 root 1.12 }
233 root 1.11
234     // a STL-compatible allocator that uses g_slice
235     // boy, this is verbose
236     template<typename Tp>
237     struct slice_allocator
238     {
239     typedef size_t size_type;
240     typedef ptrdiff_t difference_type;
241     typedef Tp *pointer;
242     typedef const Tp *const_pointer;
243     typedef Tp &reference;
244     typedef const Tp &const_reference;
245     typedef Tp value_type;
246    
247     template <class U>
248     struct rebind
249     {
250     typedef slice_allocator<U> other;
251     };
252    
253     slice_allocator () throw () { }
254     slice_allocator (const slice_allocator &o) throw () { }
255     template<typename Tp2>
256     slice_allocator (const slice_allocator<Tp2> &) throw () { }
257    
258     ~slice_allocator () { }
259    
260     pointer address (reference x) const { return &x; }
261     const_pointer address (const_reference x) const { return &x; }
262    
263     pointer allocate (size_type n, const_pointer = 0)
264     {
265 root 1.18 return salloc<Tp> (n);
266 root 1.11 }
267    
268     void deallocate (pointer p, size_type n)
269     {
270 root 1.19 sfree<Tp> (p, n);
271 root 1.11 }
272    
273     size_type max_size ()const throw ()
274     {
275     return size_t (-1) / sizeof (Tp);
276     }
277    
278     void construct (pointer p, const Tp &val)
279     {
280     ::new (p) Tp (val);
281     }
282    
283     void destroy (pointer p)
284     {
285     p->~Tp ();
286     }
287 root 1.1 };
288    
289 root 1.32 // P. L'Ecuyer, “Maximally Equidistributed Combined Tausworthe Generators”, Mathematics of Computation, 65, 213 (1996), 203–213.
290     // http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
291     // http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
292     struct tausworthe_random_generator
293     {
294 root 1.34 // generator
295 root 1.32 uint32_t state [4];
296    
297 root 1.34 void operator =(const tausworthe_random_generator &src)
298     {
299     state [0] = src.state [0];
300     state [1] = src.state [1];
301     state [2] = src.state [2];
302     state [3] = src.state [3];
303     }
304    
305     void seed (uint32_t seed);
306 root 1.32 uint32_t next ();
307    
308 root 1.34 // uniform distribution
309 root 1.42 uint32_t operator ()(uint32_t num)
310 root 1.32 {
311 root 1.42 return is_constant (num)
312     ? (next () * (uint64_t)num) >> 32U
313     : get_range (num);
314 root 1.32 }
315    
316     // return a number within (min .. max)
317     int operator () (int r_min, int r_max)
318     {
319 root 1.42 return is_constant (r_min) && is_constant (r_max) && r_min <= r_max
320     ? r_min + operator ()(r_max - r_min + 1)
321 root 1.34 : get_range (r_min, r_max);
322 root 1.32 }
323    
324     double operator ()()
325     {
326 root 1.34 return this->next () / (double)0xFFFFFFFFU;
327 root 1.32 }
328 root 1.34
329     protected:
330     uint32_t get_range (uint32_t r_max);
331     int get_range (int r_min, int r_max);
332 root 1.32 };
333    
334     typedef tausworthe_random_generator rand_gen;
335    
336     extern rand_gen rndm;
337    
338 root 1.54 INTERFACE_CLASS (attachable)
339     struct refcnt_base
340     {
341     typedef int refcnt_t;
342     mutable refcnt_t ACC (RW, refcnt);
343    
344     MTH void refcnt_inc () const { ++refcnt; }
345     MTH void refcnt_dec () const { --refcnt; }
346    
347     refcnt_base () : refcnt (0) { }
348     };
349    
350     extern refcnt_base::refcnt_t refcnt_dummy;
351    
352 root 1.7 template<class T>
353     struct refptr
354     {
355 root 1.54 // p if not null
356     refcnt_base::refcnt_t *refcnt_ref () { return p ? &p->refcnt : &refcnt_dummy; }
357    
358     void refcnt_dec ()
359     {
360     if (!is_constant (p))
361     --*refcnt_ref ();
362     else if (p)
363     --p->refcnt;
364     }
365    
366     void refcnt_inc ()
367     {
368     if (!is_constant (p))
369     ++*refcnt_ref ();
370     else if (p)
371     ++p->refcnt;
372     }
373    
374 root 1.7 T *p;
375    
376     refptr () : p(0) { }
377 root 1.54 refptr (const refptr<T> &p) : p(p.p) { refcnt_inc (); }
378     refptr (T *p) : p(p) { refcnt_inc (); }
379     ~refptr () { refcnt_dec (); }
380 root 1.7
381     const refptr<T> &operator =(T *o)
382     {
383 root 1.54 // if decrementing ever destroys we need to reverse the order here
384     refcnt_dec ();
385 root 1.7 p = o;
386 root 1.54 refcnt_inc ();
387 root 1.7 return *this;
388     }
389    
390 root 1.54 const refptr<T> &operator =(const refptr<T> &o)
391 root 1.7 {
392     *this = o.p;
393     return *this;
394     }
395    
396     T &operator * () const { return *p; }
397 root 1.54 T *operator ->() const { return p; }
398 root 1.7
399     operator T *() const { return p; }
400     };
401    
402 root 1.24 typedef refptr<maptile> maptile_ptr;
403 root 1.22 typedef refptr<object> object_ptr;
404     typedef refptr<archetype> arch_ptr;
405 root 1.24 typedef refptr<client> client_ptr;
406     typedef refptr<player> player_ptr;
407 root 1.22
408 root 1.4 struct str_hash
409     {
410     std::size_t operator ()(const char *s) const
411     {
412     unsigned long hash = 0;
413    
414     /* use the one-at-a-time hash function, which supposedly is
415     * better than the djb2-like one used by perl5.005, but
416     * certainly is better then the bug used here before.
417     * see http://burtleburtle.net/bob/hash/doobs.html
418     */
419     while (*s)
420     {
421     hash += *s++;
422     hash += hash << 10;
423     hash ^= hash >> 6;
424     }
425    
426     hash += hash << 3;
427     hash ^= hash >> 11;
428     hash += hash << 15;
429    
430     return hash;
431     }
432     };
433    
434     struct str_equal
435     {
436     bool operator ()(const char *a, const char *b) const
437     {
438     return !strcmp (a, b);
439     }
440     };
441    
442 root 1.49 // Mostly the same as std::vector, but insert/erase can reorder
443 root 1.52 // the elements, making append(=insert)/remove O(1) instead of O(n).
444 root 1.49 //
445 root 1.52 // NOTE: only some forms of erase are available
446 root 1.26 template<class T>
447     struct unordered_vector : std::vector<T, slice_allocator<T> >
448 root 1.6 {
449 root 1.11 typedef typename unordered_vector::iterator iterator;
450 root 1.6
451     void erase (unsigned int pos)
452     {
453     if (pos < this->size () - 1)
454     (*this)[pos] = (*this)[this->size () - 1];
455    
456     this->pop_back ();
457     }
458    
459     void erase (iterator i)
460     {
461     erase ((unsigned int )(i - this->begin ()));
462     }
463     };
464    
465 root 1.49 // This container blends advantages of linked lists
466     // (efficiency) with vectors (random access) by
467     // by using an unordered vector and storing the vector
468     // index inside the object.
469     //
470     // + memory-efficient on most 64 bit archs
471     // + O(1) insert/remove
472     // + free unique (but varying) id for inserted objects
473     // + cache-friendly iteration
474     // - only works for pointers to structs
475     //
476     // NOTE: only some forms of erase/insert are available
477 root 1.50 typedef int object_vector_index;
478    
479     template<class T, object_vector_index T::*indexmember>
480 root 1.26 struct object_vector : std::vector<T *, slice_allocator<T *> >
481     {
482 root 1.48 typedef typename object_vector::iterator iterator;
483    
484     bool contains (const T *obj) const
485     {
486 root 1.50 return obj->*indexmember;
487 root 1.48 }
488    
489     iterator find (const T *obj)
490     {
491 root 1.50 return obj->*indexmember
492     ? this->begin () + obj->*indexmember - 1
493 root 1.48 : this->end ();
494     }
495    
496 root 1.53 void push_back (T *obj)
497     {
498     std::vector<T *, slice_allocator<T *> >::push_back (obj);
499     obj->*indexmember = this->size ();
500     }
501    
502 root 1.26 void insert (T *obj)
503     {
504     push_back (obj);
505     }
506    
507     void insert (T &obj)
508     {
509     insert (&obj);
510     }
511    
512     void erase (T *obj)
513     {
514 root 1.50 unsigned int pos = obj->*indexmember;
515     obj->*indexmember = 0;
516 root 1.26
517     if (pos < this->size ())
518     {
519     (*this)[pos - 1] = (*this)[this->size () - 1];
520 root 1.50 (*this)[pos - 1]->*indexmember = pos;
521 root 1.26 }
522    
523     this->pop_back ();
524     }
525    
526     void erase (T &obj)
527     {
528 root 1.50 erase (&obj);
529 root 1.26 }
530     };
531    
532 root 1.10 // basically does what strncpy should do, but appends "..." to strings exceeding length
533     void assign (char *dst, const char *src, int maxlen);
534    
535     // type-safe version of assign
536 root 1.9 template<int N>
537     inline void assign (char (&dst)[N], const char *src)
538     {
539 root 1.10 assign ((char *)&dst, src, N);
540 root 1.9 }
541    
542 root 1.17 typedef double tstamp;
543    
544     // return current time as timestampe
545     tstamp now ();
546    
547 root 1.25 int similar_direction (int a, int b);
548    
549 root 1.43 // like printf, but returns a std::string
550     const std::string format (const char *format, ...);
551    
552 root 1.1 #endif
553