ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libecb/ecb.h
(Generate patch)

Comparing libecb/ecb.h (file contents):
Revision 1.157 by root, Fri Feb 20 17:17:26 2015 UTC vs.
Revision 1.179 by root, Sat Dec 28 08:01:05 2019 UTC

40 40
41#ifndef ECB_H 41#ifndef ECB_H
42#define ECB_H 42#define ECB_H
43 43
44/* 16 bits major, 16 bits minor */ 44/* 16 bits major, 16 bits minor */
45#define ECB_VERSION 0x00010004 45#define ECB_VERSION 0x00010007
46 46
47#ifdef _WIN32 47#ifdef _WIN32
48 typedef signed char int8_t; 48 typedef signed char int8_t;
49 typedef unsigned char uint8_t; 49 typedef unsigned char uint8_t;
50 typedef signed short int16_t; 50 typedef signed short int16_t;
67 typedef uint32_t uintptr_t; 67 typedef uint32_t uintptr_t;
68 typedef int32_t intptr_t; 68 typedef int32_t intptr_t;
69 #endif 69 #endif
70#else 70#else
71 #include <inttypes.h> 71 #include <inttypes.h>
72 #if UINTMAX_MAX > 0xffffffffU 72 #if (defined INTPTR_MAX ? INTPTR_MAX : ULONG_MAX) > 0xffffffffU
73 #define ECB_PTRSIZE 8 73 #define ECB_PTRSIZE 8
74 #else 74 #else
75 #define ECB_PTRSIZE 4 75 #define ECB_PTRSIZE 4
76 #endif 76 #endif
77#endif 77#endif
78 78
79#define ECB_GCC_AMD64 (__amd64 || __amd64__ || __x86_64 || __x86_64__)
80#define ECB_MSVC_AMD64 (_M_AMD64 || _M_X64)
81
82#ifndef ECB_OPTIMIZE_SIZE
83 #if __OPTIMIZE_SIZE__
84 #define ECB_OPTIMIZE_SIZE 1
85 #else
86 #define ECB_OPTIMIZE_SIZE 0
87 #endif
88#endif
89
79/* work around x32 idiocy by defining proper macros */ 90/* work around x32 idiocy by defining proper macros */
80#if __amd64 || __x86_64 || _M_AMD64 || _M_X64 91#if ECB_GCC_AMD64 || ECB_MSVC_AMD64
81 #if _ILP32 92 #if _ILP32
82 #define ECB_AMD64_X32 1 93 #define ECB_AMD64_X32 1
83 #else 94 #else
84 #define ECB_AMD64 1 95 #define ECB_AMD64 1
85 #endif 96 #endif
112 #define ECB_CLANG_EXTENSION(x) 0 123 #define ECB_CLANG_EXTENSION(x) 0
113#endif 124#endif
114 125
115#define ECB_CPP (__cplusplus+0) 126#define ECB_CPP (__cplusplus+0)
116#define ECB_CPP11 (__cplusplus >= 201103L) 127#define ECB_CPP11 (__cplusplus >= 201103L)
128#define ECB_CPP14 (__cplusplus >= 201402L)
129#define ECB_CPP17 (__cplusplus >= 201703L)
117 130
118#if ECB_CPP 131#if ECB_CPP
119 #define ECB_C 0 132 #define ECB_C 0
120 #define ECB_STDC_VERSION 0 133 #define ECB_STDC_VERSION 0
121#else 134#else
123 #define ECB_STDC_VERSION __STDC_VERSION__ 136 #define ECB_STDC_VERSION __STDC_VERSION__
124#endif 137#endif
125 138
126#define ECB_C99 (ECB_STDC_VERSION >= 199901L) 139#define ECB_C99 (ECB_STDC_VERSION >= 199901L)
127#define ECB_C11 (ECB_STDC_VERSION >= 201112L) 140#define ECB_C11 (ECB_STDC_VERSION >= 201112L)
141#define ECB_C17 (ECB_STDC_VERSION >= 201710L)
128 142
129#if ECB_CPP 143#if ECB_CPP
130 #define ECB_EXTERN_C extern "C" 144 #define ECB_EXTERN_C extern "C"
131 #define ECB_EXTERN_C_BEG ECB_EXTERN_C { 145 #define ECB_EXTERN_C_BEG ECB_EXTERN_C {
132 #define ECB_EXTERN_C_END } 146 #define ECB_EXTERN_C_END }
147 161
148#if ECB_NO_SMP 162#if ECB_NO_SMP
149 #define ECB_MEMORY_FENCE do { } while (0) 163 #define ECB_MEMORY_FENCE do { } while (0)
150#endif 164#endif
151 165
166/* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/compiler_ref/compiler_builtins.html */
167#if __xlC__ && ECB_CPP
168 #include <builtins.h>
169#endif
170
171#if 1400 <= _MSC_VER
172 #include <intrin.h> /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
173#endif
174
152#ifndef ECB_MEMORY_FENCE 175#ifndef ECB_MEMORY_FENCE
153 #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 176 #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
177 #define ECB_MEMORY_FENCE_RELAXED __asm__ __volatile__ ("" : : : "memory")
154 #if __i386 || __i386__ 178 #if __i386 || __i386__
155 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory") 179 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory")
156 #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") 180 #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory")
157 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") 181 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("" : : : "memory")
158 #elif __amd64 || __amd64__ || __x86_64 || __x86_64__ 182 #elif ECB_GCC_AMD64
159 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mfence" : : : "memory") 183 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mfence" : : : "memory")
160 #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") 184 #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory")
161 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") 185 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("" : : : "memory")
162 #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ 186 #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__
163 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory") 187 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory")
188 #elif defined __ARM_ARCH_2__ \
189 || defined __ARM_ARCH_3__ || defined __ARM_ARCH_3M__ \
190 || defined __ARM_ARCH_4__ || defined __ARM_ARCH_4T__ \
191 || defined __ARM_ARCH_5__ || defined __ARM_ARCH_5E__ \
192 || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__ \
193 || defined __ARM_ARCH_5TEJ__
194 /* should not need any, unless running old code on newer cpu - arm doesn't support that */
164 #elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \ 195 #elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \
165 || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ 196 || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ \
197 || defined __ARM_ARCH_6T2__
166 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory") 198 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory")
167 #elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \ 199 #elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \
168 || defined __ARM_ARCH_7M__ || defined __ARM_ARCH_7R__ 200 || defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__
169 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb" : : : "memory") 201 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb" : : : "memory")
170 #elif __aarch64__ 202 #elif __aarch64__
171 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb ish" : : : "memory") 203 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb ish" : : : "memory")
172 #elif (__sparc || __sparc__) && !__sparcv8 204 #elif (__sparc || __sparc__) && !(__sparc_v8__ || defined __sparcv8)
173 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory") 205 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory")
174 #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad" : : : "memory") 206 #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad" : : : "memory")
175 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore | #StoreStore") 207 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore | #StoreStore")
176 #elif defined __s390__ || defined __s390x__ 208 #elif defined __s390__ || defined __s390x__
177 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("bcr 15,0" : : : "memory") 209 #define ECB_MEMORY_FENCE __asm__ __volatile__ ("bcr 15,0" : : : "memory")
200 #if ECB_GCC_VERSION(4,7) 232 #if ECB_GCC_VERSION(4,7)
201 /* see comment below (stdatomic.h) about the C11 memory model. */ 233 /* see comment below (stdatomic.h) about the C11 memory model. */
202 #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) 234 #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST)
203 #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE) 235 #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE)
204 #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE) 236 #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE)
237 #define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED)
205 238
206 #elif ECB_CLANG_EXTENSION(c_atomic) 239 #elif ECB_CLANG_EXTENSION(c_atomic)
207 /* see comment below (stdatomic.h) about the C11 memory model. */ 240 /* see comment below (stdatomic.h) about the C11 memory model. */
208 #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) 241 #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST)
209 #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE) 242 #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE)
210 #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE) 243 #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE)
244 #define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED)
211 245
212 #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ 246 #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__
213 #define ECB_MEMORY_FENCE __sync_synchronize () 247 #define ECB_MEMORY_FENCE __sync_synchronize ()
214 #elif _MSC_VER >= 1500 /* VC++ 2008 */ 248 #elif _MSC_VER >= 1500 /* VC++ 2008 */
215 /* apparently, microsoft broke all the memory barrier stuff in Visual Studio 2008... */ 249 /* apparently, microsoft broke all the memory barrier stuff in Visual Studio 2008... */
225 #elif defined _WIN32 259 #elif defined _WIN32
226 #include <WinNT.h> 260 #include <WinNT.h>
227 #define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */ 261 #define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */
228 #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 262 #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
229 #include <mbarrier.h> 263 #include <mbarrier.h>
230 #define ECB_MEMORY_FENCE __machine_rw_barrier () 264 #define ECB_MEMORY_FENCE __machine_rw_barrier ()
231 #define ECB_MEMORY_FENCE_ACQUIRE __machine_r_barrier () 265 #define ECB_MEMORY_FENCE_ACQUIRE __machine_acq_barrier ()
232 #define ECB_MEMORY_FENCE_RELEASE __machine_w_barrier () 266 #define ECB_MEMORY_FENCE_RELEASE __machine_rel_barrier ()
267 #define ECB_MEMORY_FENCE_RELAXED __compiler_barrier ()
233 #elif __xlC__ 268 #elif __xlC__
234 #define ECB_MEMORY_FENCE __sync () 269 #define ECB_MEMORY_FENCE __sync ()
235 #endif 270 #endif
236#endif 271#endif
237 272
238#ifndef ECB_MEMORY_FENCE 273#ifndef ECB_MEMORY_FENCE
239 #if ECB_C11 && !defined __STDC_NO_ATOMICS__ 274 #if ECB_C11 && !defined __STDC_NO_ATOMICS__
240 /* we assume that these memory fences work on all variables/all memory accesses, */ 275 /* we assume that these memory fences work on all variables/all memory accesses, */
241 /* not just C11 atomics and atomic accesses */ 276 /* not just C11 atomics and atomic accesses */
242 #include <stdatomic.h> 277 #include <stdatomic.h>
243 /* Unfortunately, neither gcc 4.7 nor clang 3.1 generate any instructions for */
244 /* any fence other than seq_cst, which isn't very efficient for us. */
245 /* Why that is, we don't know - either the C11 memory model is quite useless */
246 /* for most usages, or gcc and clang have a bug */
247 /* I *currently* lean towards the latter, and inefficiently implement */
248 /* all three of ecb's fences as a seq_cst fence */
249 /* Update, gcc-4.8 generates mfence for all c++ fences, but nothing */
250 /* for all __atomic_thread_fence's except seq_cst */
251 #define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst) 278 #define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst)
279 #define ECB_MEMORY_FENCE_ACQUIRE atomic_thread_fence (memory_order_acquire)
280 #define ECB_MEMORY_FENCE_RELEASE atomic_thread_fence (memory_order_release)
252 #endif 281 #endif
253#endif 282#endif
254 283
255#ifndef ECB_MEMORY_FENCE 284#ifndef ECB_MEMORY_FENCE
256 #if !ECB_AVOID_PTHREADS 285 #if !ECB_AVOID_PTHREADS
276 305
277#if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE 306#if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE
278 #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE 307 #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE
279#endif 308#endif
280 309
310#if !defined ECB_MEMORY_FENCE_RELAXED && defined ECB_MEMORY_FENCE
311 #define ECB_MEMORY_FENCE_RELAXED ECB_MEMORY_FENCE /* very heavy-handed */
312#endif
313
281/*****************************************************************************/ 314/*****************************************************************************/
282 315
283#if ECB_CPP 316#if ECB_CPP
284 #define ecb_inline static inline 317 #define ecb_inline static inline
285#elif ECB_GCC_VERSION(2,5) 318#elif ECB_GCC_VERSION(2,5)
349 #define ecb_deprecated __declspec (deprecated) 382 #define ecb_deprecated __declspec (deprecated)
350#else 383#else
351 #define ecb_deprecated ecb_attribute ((__deprecated__)) 384 #define ecb_deprecated ecb_attribute ((__deprecated__))
352#endif 385#endif
353 386
354#if __MSC_VER >= 1500 387#if _MSC_VER >= 1500
355 #define ecb_deprecated_message(msg) __declspec (deprecated (msg)) 388 #define ecb_deprecated_message(msg) __declspec (deprecated (msg))
356#elif ECB_GCC_VERSION(4,5) 389#elif ECB_GCC_VERSION(4,5)
357 #define ecb_deprecated_message(msg) ecb_attribute ((__deprecated__ (msg)) 390 #define ecb_deprecated_message(msg) ecb_attribute ((__deprecated__ (msg))
358#else 391#else
359 #define ecb_deprecated_message(msg) ecb_deprecated 392 #define ecb_deprecated_message(msg) ecb_deprecated
368#define ecb_unused ecb_attribute ((__unused__)) 401#define ecb_unused ecb_attribute ((__unused__))
369#define ecb_const ecb_attribute ((__const__)) 402#define ecb_const ecb_attribute ((__const__))
370#define ecb_pure ecb_attribute ((__pure__)) 403#define ecb_pure ecb_attribute ((__pure__))
371 404
372#if ECB_C11 || __IBMC_NORETURN 405#if ECB_C11 || __IBMC_NORETURN
373 /* http://pic.dhe.ibm.com/infocenter/compbg/v121v141/topic/com.ibm.xlcpp121.bg.doc/language_ref/noreturn.html */ 406 /* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/language_ref/noreturn.html */
374 #define ecb_noreturn _Noreturn 407 #define ecb_noreturn _Noreturn
375#elif ECB_CPP11 408#elif ECB_CPP11
376 #define ecb_noreturn [[noreturn]] 409 #define ecb_noreturn [[noreturn]]
377#elif _MSC_VER >= 1200 410#elif _MSC_VER >= 1200
378 /* http://msdn.microsoft.com/en-us/library/k6ktzx3s.aspx */ 411 /* http://msdn.microsoft.com/en-us/library/k6ktzx3s.aspx */
415#else 448#else
416 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); 449 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x);
417 ecb_function_ ecb_const int 450 ecb_function_ ecb_const int
418 ecb_ctz32 (uint32_t x) 451 ecb_ctz32 (uint32_t x)
419 { 452 {
453#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
454 unsigned long r;
455 _BitScanForward (&r, x);
456 return (int)r;
457#else
420 int r = 0; 458 int r = 0;
421 459
422 x &= ~x + 1; /* this isolates the lowest bit */ 460 x &= ~x + 1; /* this isolates the lowest bit */
423 461
424#if ECB_branchless_on_i386 462#if ECB_branchless_on_i386
434 if (x & 0xff00ff00) r += 8; 472 if (x & 0xff00ff00) r += 8;
435 if (x & 0xffff0000) r += 16; 473 if (x & 0xffff0000) r += 16;
436#endif 474#endif
437 475
438 return r; 476 return r;
477#endif
439 } 478 }
440 479
441 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); 480 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
442 ecb_function_ ecb_const int 481 ecb_function_ ecb_const int
443 ecb_ctz64 (uint64_t x) 482 ecb_ctz64 (uint64_t x)
444 { 483 {
484#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
485 unsigned long r;
486 _BitScanForward64 (&r, x);
487 return (int)r;
488#else
445 int shift = x & 0xffffffffU ? 0 : 32; 489 int shift = x & 0xffffffff ? 0 : 32;
446 return ecb_ctz32 (x >> shift) + shift; 490 return ecb_ctz32 (x >> shift) + shift;
491#endif
447 } 492 }
448 493
449 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); 494 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
450 ecb_function_ ecb_const int 495 ecb_function_ ecb_const int
451 ecb_popcount32 (uint32_t x) 496 ecb_popcount32 (uint32_t x)
459 } 504 }
460 505
461 ecb_function_ ecb_const int ecb_ld32 (uint32_t x); 506 ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
462 ecb_function_ ecb_const int ecb_ld32 (uint32_t x) 507 ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
463 { 508 {
509#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
510 unsigned long r;
511 _BitScanReverse (&r, x);
512 return (int)r;
513#else
464 int r = 0; 514 int r = 0;
465 515
466 if (x >> 16) { x >>= 16; r += 16; } 516 if (x >> 16) { x >>= 16; r += 16; }
467 if (x >> 8) { x >>= 8; r += 8; } 517 if (x >> 8) { x >>= 8; r += 8; }
468 if (x >> 4) { x >>= 4; r += 4; } 518 if (x >> 4) { x >>= 4; r += 4; }
469 if (x >> 2) { x >>= 2; r += 2; } 519 if (x >> 2) { x >>= 2; r += 2; }
470 if (x >> 1) { r += 1; } 520 if (x >> 1) { r += 1; }
471 521
472 return r; 522 return r;
523#endif
473 } 524 }
474 525
475 ecb_function_ ecb_const int ecb_ld64 (uint64_t x); 526 ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
476 ecb_function_ ecb_const int ecb_ld64 (uint64_t x) 527 ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
477 { 528 {
529#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
530 unsigned long r;
531 _BitScanReverse64 (&r, x);
532 return (int)r;
533#else
478 int r = 0; 534 int r = 0;
479 535
480 if (x >> 32) { x >>= 32; r += 32; } 536 if (x >> 32) { x >>= 32; r += 32; }
481 537
482 return r + ecb_ld32 (x); 538 return r + ecb_ld32 (x);
539#endif
483 } 540 }
484#endif 541#endif
485 542
486ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x); 543ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x);
487ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); } 544ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); }
544ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); } 601ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); }
545ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); } 602ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); }
546ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); } 603ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); }
547 604
548#if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64)) 605#if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64))
606 #if ECB_GCC_VERSION(4,8) || ECB_CLANG_BUILTIN(__builtin_bswap16)
607 #define ecb_bswap16(x) __builtin_bswap16 (x)
608 #else
549 #define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16) 609 #define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16)
610 #endif
550 #define ecb_bswap32(x) __builtin_bswap32 (x) 611 #define ecb_bswap32(x) __builtin_bswap32 (x)
551 #define ecb_bswap64(x) __builtin_bswap64 (x) 612 #define ecb_bswap64(x) __builtin_bswap64 (x)
613#elif _MSC_VER
614 #include <stdlib.h>
615 #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x)))
616 #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x)))
617 #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x)))
552#else 618#else
553 ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); 619 ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x);
554 ecb_function_ ecb_const uint16_t 620 ecb_function_ ecb_const uint16_t
555 ecb_bswap16 (uint16_t x) 621 ecb_bswap16 (uint16_t x)
556 { 622 {
581#endif 647#endif
582 648
583/* try to tell the compiler that some condition is definitely true */ 649/* try to tell the compiler that some condition is definitely true */
584#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 650#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0
585 651
586ecb_inline ecb_const unsigned char ecb_byteorder_helper (void); 652ecb_inline ecb_const uint32_t ecb_byteorder_helper (void);
587ecb_inline ecb_const unsigned char 653ecb_inline ecb_const uint32_t
588ecb_byteorder_helper (void) 654ecb_byteorder_helper (void)
589{ 655{
590 /* the union code still generates code under pressure in gcc, */ 656 /* the union code still generates code under pressure in gcc, */
591 /* but less than using pointers, and always seems to */ 657 /* but less than using pointers, and always seems to */
592 /* successfully return a constant. */ 658 /* successfully return a constant. */
593 /* the reason why we have this horrible preprocessor mess */ 659 /* the reason why we have this horrible preprocessor mess */
594 /* is to avoid it in all cases, at least on common architectures */ 660 /* is to avoid it in all cases, at least on common architectures */
595 /* or when using a recent enough gcc version (>= 4.6) */ 661 /* or when using a recent enough gcc version (>= 4.6) */
596#if ((__i386 || __i386__) && !__VOS__) || _M_X86 || __amd64 || __amd64__ || _M_X64
597 return 0x44;
598#elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 662#if (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
663 || ((__i386 || __i386__ || _M_IX86 || ECB_GCC_AMD64 || ECB_MSVC_AMD64) && !__VOS__)
664 #define ECB_LITTLE_ENDIAN 1
599 return 0x44; 665 return 0x44332211;
600#elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 666#elif (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
667 || ((__AARCH64EB__ || __MIPSEB__ || __ARMEB__) && !__VOS__)
668 #define ECB_BIG_ENDIAN 1
601 return 0x11; 669 return 0x11223344;
602#else 670#else
603 union 671 union
604 { 672 {
673 uint8_t c[4];
605 uint32_t i; 674 uint32_t u;
606 uint8_t c;
607 } u = { 0x11223344 }; 675 } u = { 0x11, 0x22, 0x33, 0x44 };
608 return u.c; 676 return u.u;
609#endif 677#endif
610} 678}
611 679
612ecb_inline ecb_const ecb_bool ecb_big_endian (void); 680ecb_inline ecb_const ecb_bool ecb_big_endian (void);
613ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11; } 681ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; }
614ecb_inline ecb_const ecb_bool ecb_little_endian (void); 682ecb_inline ecb_const ecb_bool ecb_little_endian (void);
615ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44; } 683ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; }
616 684
617#if ECB_GCC_VERSION(3,0) || ECB_C99 685#if ECB_GCC_VERSION(3,0) || ECB_C99
618 #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) 686 #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0))
619#else 687#else
620 #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n))) 688 #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n)))
645 } 713 }
646#else 714#else
647 #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) 715 #define ecb_array_length(name) (sizeof (name) / sizeof (name [0]))
648#endif 716#endif
649 717
718ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
719ecb_function_ ecb_const uint32_t
720ecb_binary16_to_binary32 (uint32_t x)
721{
722 unsigned int s = (x & 0x8000) << (31 - 15);
723 int e = (x >> 10) & 0x001f;
724 unsigned int m = x & 0x03ff;
725
726 if (ecb_expect_false (e == 31))
727 /* infinity or NaN */
728 e = 255 - (127 - 15);
729 else if (ecb_expect_false (!e))
730 {
731 if (ecb_expect_true (!m))
732 /* zero, handled by code below by forcing e to 0 */
733 e = 0 - (127 - 15);
734 else
735 {
736 /* subnormal, renormalise */
737 unsigned int s = 10 - ecb_ld32 (m);
738
739 m = (m << s) & 0x3ff; /* mask implicit bit */
740 e -= s - 1;
741 }
742 }
743
744 /* e and m now are normalised, or zero, (or inf or nan) */
745 e += 127 - 15;
746
747 return s | (e << 23) | (m << (23 - 10));
748}
749
750ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x);
751ecb_function_ ecb_const uint16_t
752ecb_binary32_to_binary16 (uint32_t x)
753{
754 unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */
755 unsigned int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */
756 unsigned int m = x & 0x007fffff;
757
758 x &= 0x7fffffff;
759
760 /* if it's within range of binary16 normals, use fast path */
761 if (ecb_expect_true (0x38800000 <= x && x <= 0x477fefff))
762 {
763 /* mantissa round-to-even */
764 m += 0x00000fff + ((m >> (23 - 10)) & 1);
765
766 /* handle overflow */
767 if (ecb_expect_false (m >= 0x00800000))
768 {
769 m >>= 1;
770 e += 1;
771 }
772
773 return s | (e << 10) | (m >> (23 - 10));
774 }
775
776 /* handle large numbers and infinity */
777 if (ecb_expect_true (0x477fefff < x && x <= 0x7f800000))
778 return s | 0x7c00;
779
780 /* handle zero, subnormals and small numbers */
781 if (ecb_expect_true (x < 0x38800000))
782 {
783 /* zero */
784 if (ecb_expect_true (!x))
785 return s;
786
787 /* handle subnormals */
788
789 /* too small, will be zero */
790 if (e < (14 - 24)) /* might not be sharp, but is good enough */
791 return s;
792
793 m |= 0x00800000; /* make implicit bit explicit */
794
795 /* very tricky - we need to round to the nearest e (+10) bit value */
796 {
797 unsigned int bits = 14 - e;
798 unsigned int half = (1 << (bits - 1)) - 1;
799 unsigned int even = (m >> bits) & 1;
800
801 /* if this overflows, we will end up with a normalised number */
802 m = (m + half + even) >> bits;
803 }
804
805 return s | m;
806 }
807
808 /* handle NaNs, preserve leftmost nan bits, but make sure we don't turn them into infinities */
809 m >>= 13;
810
811 return s | 0x7c00 | m | !m;
812}
813
650/*******************************************************************************/ 814/*******************************************************************************/
651/* floating point stuff, can be disabled by defining ECB_NO_LIBM */ 815/* floating point stuff, can be disabled by defining ECB_NO_LIBM */
652 816
653/* basically, everything uses "ieee pure-endian" floating point numbers */ 817/* basically, everything uses "ieee pure-endian" floating point numbers */
654/* the only noteworthy exception is ancient armle, which uses order 43218765 */ 818/* the only noteworthy exception is ancient armle, which uses order 43218765 */
655#if 0 \ 819#if 0 \
656 || __i386 || __i386__ \ 820 || __i386 || __i386__ \
657 || __amd64 || __amd64__ || __x86_64 || __x86_64__ \ 821 || ECB_GCC_AMD64 \
658 || __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \ 822 || __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \
659 || defined __s390__ || defined __s390x__ \ 823 || defined __s390__ || defined __s390x__ \
660 || defined __mips__ \ 824 || defined __mips__ \
661 || defined __alpha__ \ 825 || defined __alpha__ \
662 || defined __hppa__ \ 826 || defined __hppa__ \
663 || defined __ia64__ \ 827 || defined __ia64__ \
664 || defined __m68k__ \ 828 || defined __m68k__ \
665 || defined __m88k__ \ 829 || defined __m88k__ \
666 || defined __sh__ \ 830 || defined __sh__ \
667 || defined _M_IX86 || defined _M_AMD64 || defined _M_IA64 \ 831 || defined _M_IX86 || defined ECB_MSVC_AMD64 || defined _M_IA64 \
668 || (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \ 832 || (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \
669 || defined __aarch64__ 833 || defined __aarch64__
670 #define ECB_STDFP 1 834 #define ECB_STDFP 1
671 #include <string.h> /* for memcpy */ 835 #include <string.h> /* for memcpy */
672#else 836#else
690 #define ECB_NAN ECB_INFINITY 854 #define ECB_NAN ECB_INFINITY
691 #endif 855 #endif
692 856
693 #if ECB_C99 || _XOPEN_VERSION >= 600 || _POSIX_VERSION >= 200112L 857 #if ECB_C99 || _XOPEN_VERSION >= 600 || _POSIX_VERSION >= 200112L
694 #define ecb_ldexpf(x,e) ldexpf ((x), (e)) 858 #define ecb_ldexpf(x,e) ldexpf ((x), (e))
859 #define ecb_frexpf(x,e) frexpf ((x), (e))
695 #else 860 #else
696 #define ecb_ldexpf(x,e) (float) ldexp ((x), (e)) 861 #define ecb_ldexpf(x,e) (float) ldexp ((double) (x), (e))
862 #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e))
697 #endif 863 #endif
698
699 /* converts an ieee half/binary16 to a float */
700 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
701 ecb_function_ ecb_const float
702 ecb_binary16_to_float (uint16_t x)
703 {
704 int e = (x >> 10) & 0x1f;
705 int m = x & 0x3ff;
706 float r;
707
708 if (!e ) r = ecb_ldexpf (m , -24);
709 else if (e != 31) r = ecb_ldexpf (m + 0x400, e - 25);
710 else if (m ) r = ECB_NAN;
711 else r = ECB_INFINITY;
712
713 return x & 0x8000 ? -r : r;
714 }
715 864
716 /* convert a float to ieee single/binary32 */ 865 /* convert a float to ieee single/binary32 */
717 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); 866 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x);
718 ecb_function_ ecb_const uint32_t 867 ecb_function_ ecb_const uint32_t
719 ecb_float_to_binary32 (float x) 868 ecb_float_to_binary32 (float x)
730 if (x == 0e0f ) return 0x00000000U; 879 if (x == 0e0f ) return 0x00000000U;
731 if (x > +3.40282346638528860e+38f) return 0x7f800000U; 880 if (x > +3.40282346638528860e+38f) return 0x7f800000U;
732 if (x < -3.40282346638528860e+38f) return 0xff800000U; 881 if (x < -3.40282346638528860e+38f) return 0xff800000U;
733 if (x != x ) return 0x7fbfffffU; 882 if (x != x ) return 0x7fbfffffU;
734 883
735 m = frexpf (x, &e) * 0x1000000U; 884 m = ecb_frexpf (x, &e) * 0x1000000U;
736 885
737 r = m & 0x80000000U; 886 r = m & 0x80000000U;
738 887
739 if (r) 888 if (r)
740 m = -m; 889 m = -m;
851 #endif 1000 #endif
852 1001
853 return r; 1002 return r;
854 } 1003 }
855 1004
856#endif 1005 /* convert a float to ieee half/binary16 */
1006 ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x);
1007 ecb_function_ ecb_const uint16_t
1008 ecb_float_to_binary16 (float x)
1009 {
1010 return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x));
1011 }
857 1012
858#endif 1013 /* convert an ieee half/binary16 to float */
1014 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
1015 ecb_function_ ecb_const float
1016 ecb_binary16_to_float (uint16_t x)
1017 {
1018 return ecb_binary32_to_float (ecb_binary16_to_binary32 (x));
1019 }
859 1020
1021#endif
1022
1023#endif
1024

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines