1 | /* |
1 | /* |
2 | * libecb - http://software.schmorp.de/pkg/libecb |
2 | * libecb - http://software.schmorp.de/pkg/libecb |
3 | * |
3 | * |
4 | * Copyright (©) 2009-2015,2018-2020 Marc Alexander Lehmann <libecb@schmorp.de> |
4 | * Copyright (©) 2009-2015,2018-2021 Marc Alexander Lehmann <libecb@schmorp.de> |
5 | * Copyright (©) 2011 Emanuele Giaquinta |
5 | * Copyright (©) 2011 Emanuele Giaquinta |
6 | * All rights reserved. |
6 | * All rights reserved. |
7 | * |
7 | * |
8 | * Redistribution and use in source and binary forms, with or without modifica- |
8 | * Redistribution and use in source and binary forms, with or without modifica- |
9 | * tion, are permitted provided that the following conditions are met: |
9 | * tion, are permitted provided that the following conditions are met: |
… | |
… | |
40 | |
40 | |
41 | #ifndef ECB_H |
41 | #ifndef ECB_H |
42 | #define ECB_H |
42 | #define ECB_H |
43 | |
43 | |
44 | /* 16 bits major, 16 bits minor */ |
44 | /* 16 bits major, 16 bits minor */ |
45 | #define ECB_VERSION 0x00010008 |
45 | #define ECB_VERSION 0x00010009 |
46 | |
46 | |
47 | #include <string.h> /* for memcpy */ |
47 | #include <string.h> /* for memcpy */ |
48 | |
48 | |
49 | #ifdef _WIN32 |
49 | #if defined (_WIN32) && !defined (__MINGW32__) |
50 | typedef signed char int8_t; |
50 | typedef signed char int8_t; |
51 | typedef unsigned char uint8_t; |
51 | typedef unsigned char uint8_t; |
52 | typedef signed char int_fast8_t; |
52 | typedef signed char int_fast8_t; |
53 | typedef unsigned char uint_fast8_t; |
53 | typedef unsigned char uint_fast8_t; |
54 | typedef signed short int16_t; |
54 | typedef signed short int16_t; |
… | |
… | |
102 | #if _ILP32 |
102 | #if _ILP32 |
103 | #define ECB_AMD64_X32 1 |
103 | #define ECB_AMD64_X32 1 |
104 | #else |
104 | #else |
105 | #define ECB_AMD64 1 |
105 | #define ECB_AMD64 1 |
106 | #endif |
106 | #endif |
|
|
107 | #endif |
|
|
108 | |
|
|
109 | #if ECB_PTRSIZE >= 8 || ECB_AMD64_X32 |
|
|
110 | #define ECB_64BIT_NATIVE 1 |
|
|
111 | #else |
|
|
112 | #define ECB_64BIT_NATIVE 0 |
107 | #endif |
113 | #endif |
108 | |
114 | |
109 | /* many compilers define _GNUC_ to some versions but then only implement |
115 | /* many compilers define _GNUC_ to some versions but then only implement |
110 | * what their idiot authors think are the "more important" extensions, |
116 | * what their idiot authors think are the "more important" extensions, |
111 | * causing enormous grief in return for some better fake benchmark numbers. |
117 | * causing enormous grief in return for some better fake benchmark numbers. |
… | |
… | |
242 | #if ECB_GCC_VERSION(4,7) |
248 | #if ECB_GCC_VERSION(4,7) |
243 | /* see comment below (stdatomic.h) about the C11 memory model. */ |
249 | /* see comment below (stdatomic.h) about the C11 memory model. */ |
244 | #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) |
250 | #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) |
245 | #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE) |
251 | #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE) |
246 | #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE) |
252 | #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE) |
|
|
253 | #undef ECB_MEMORY_FENCE_RELAXED |
247 | #define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED) |
254 | #define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED) |
248 | |
255 | |
249 | #elif ECB_CLANG_EXTENSION(c_atomic) |
256 | #elif ECB_CLANG_EXTENSION(c_atomic) |
250 | /* see comment below (stdatomic.h) about the C11 memory model. */ |
257 | /* see comment below (stdatomic.h) about the C11 memory model. */ |
251 | #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) |
258 | #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) |
252 | #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE) |
259 | #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE) |
253 | #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE) |
260 | #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE) |
|
|
261 | #undef ECB_MEMORY_FENCE_RELAXED |
254 | #define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED) |
262 | #define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED) |
255 | |
263 | |
256 | #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ |
264 | #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ |
257 | #define ECB_MEMORY_FENCE __sync_synchronize () |
265 | #define ECB_MEMORY_FENCE __sync_synchronize () |
258 | #elif _MSC_VER >= 1500 /* VC++ 2008 */ |
266 | #elif _MSC_VER >= 1500 /* VC++ 2008 */ |
… | |
… | |
799 | template<typename T> inline void ecb_poke_le_u (void *ptr, T v) { return ecb_poke_u<T> (ptr, ecb_host_to_le (v)); } |
807 | template<typename T> inline void ecb_poke_le_u (void *ptr, T v) { return ecb_poke_u<T> (ptr, ecb_host_to_le (v)); } |
800 | |
808 | |
801 | #endif |
809 | #endif |
802 | |
810 | |
803 | /*****************************************************************************/ |
811 | /*****************************************************************************/ |
|
|
812 | /* division */ |
804 | |
813 | |
805 | #if ECB_GCC_VERSION(3,0) || ECB_C99 |
814 | #if ECB_GCC_VERSION(3,0) || ECB_C99 |
|
|
815 | /* C99 tightened the definition of %, so we can use a more efficient version */ |
806 | #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) |
816 | #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) |
807 | #else |
817 | #else |
808 | #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n))) |
818 | #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n))) |
809 | #endif |
819 | #endif |
810 | |
820 | |
… | |
… | |
821 | } |
831 | } |
822 | #else |
832 | #else |
823 | #define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val) ) / (div)) |
833 | #define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val) ) / (div)) |
824 | #define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val) ) / (div)) : ((val) + (div) - 1) / (div)) |
834 | #define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val) ) / (div)) : ((val) + (div) - 1) / (div)) |
825 | #endif |
835 | #endif |
|
|
836 | |
|
|
837 | /*****************************************************************************/ |
|
|
838 | /* array length */ |
826 | |
839 | |
827 | #if ecb_cplusplus_does_not_suck |
840 | #if ecb_cplusplus_does_not_suck |
828 | /* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */ |
841 | /* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */ |
829 | template<typename T, int N> |
842 | template<typename T, int N> |
830 | static inline int ecb_array_length (const T (&arr)[N]) |
843 | static inline int ecb_array_length (const T (&arr)[N]) |
… | |
… | |
834 | #else |
847 | #else |
835 | #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) |
848 | #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) |
836 | #endif |
849 | #endif |
837 | |
850 | |
838 | /*****************************************************************************/ |
851 | /*****************************************************************************/ |
|
|
852 | /* IEEE 754-2008 half float conversions */ |
839 | |
853 | |
840 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
854 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
841 | ecb_function_ ecb_const uint32_t |
855 | ecb_function_ ecb_const uint32_t |
842 | ecb_binary16_to_binary32 (uint32_t x) |
856 | ecb_binary16_to_binary32 (uint32_t x) |
843 | { |
857 | { |
… | |
… | |
872 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
886 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
873 | ecb_function_ ecb_const uint16_t |
887 | ecb_function_ ecb_const uint16_t |
874 | ecb_binary32_to_binary16 (uint32_t x) |
888 | ecb_binary32_to_binary16 (uint32_t x) |
875 | { |
889 | { |
876 | unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
890 | unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
877 | unsigned int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
891 | int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
878 | unsigned int m = x & 0x007fffff; |
892 | unsigned int m = x & 0x007fffff; |
879 | |
893 | |
880 | x &= 0x7fffffff; |
894 | x &= 0x7fffffff; |
881 | |
895 | |
882 | /* if it's within range of binary16 normals, use fast path */ |
896 | /* if it's within range of binary16 normals, use fast path */ |