ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libecb/ecb.h
(Generate patch)

Comparing libecb/ecb.h (file contents):
Revision 1.166 by root, Sun Aug 9 00:10:21 2015 UTC vs.
Revision 1.171 by root, Tue Nov 24 19:46:20 2015 UTC

40 40
41#ifndef ECB_H 41#ifndef ECB_H
42#define ECB_H 42#define ECB_H
43 43
44/* 16 bits major, 16 bits minor */ 44/* 16 bits major, 16 bits minor */
45#define ECB_VERSION 0x00010004 45#define ECB_VERSION 0x00010005
46 46
47#ifdef _WIN32 47#ifdef _WIN32
48 typedef signed char int8_t; 48 typedef signed char int8_t;
49 typedef unsigned char uint8_t; 49 typedef unsigned char uint8_t;
50 typedef signed short int16_t; 50 typedef signed short int16_t;
153#endif 153#endif
154 154
155/* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/compiler_ref/compiler_builtins.html */ 155/* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/compiler_ref/compiler_builtins.html */
156#if __xlC__ && ECB_CPP 156#if __xlC__ && ECB_CPP
157 #include <builtins.h> 157 #include <builtins.h>
158#endif
159
160#if 1400 <= _MSC_VER
161 #include <intrin.h> /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
158#endif 162#endif
159 163
160#ifndef ECB_MEMORY_FENCE 164#ifndef ECB_MEMORY_FENCE
161 #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 165 #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
162 #if __i386 || __i386__ 166 #if __i386 || __i386__
423#else 427#else
424 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); 428 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x);
425 ecb_function_ ecb_const int 429 ecb_function_ ecb_const int
426 ecb_ctz32 (uint32_t x) 430 ecb_ctz32 (uint32_t x)
427 { 431 {
432#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
433 unsigned long r = 0;
434 _BitScanForward (&r, x);
435 return (int)r;
436#else
428 int r = 0; 437 int r = 0;
429 438
430 x &= ~x + 1; /* this isolates the lowest bit */ 439 x &= ~x + 1; /* this isolates the lowest bit */
431 440
432#if ECB_branchless_on_i386 441#if ECB_branchless_on_i386
442 if (x & 0xff00ff00) r += 8; 451 if (x & 0xff00ff00) r += 8;
443 if (x & 0xffff0000) r += 16; 452 if (x & 0xffff0000) r += 16;
444#endif 453#endif
445 454
446 return r; 455 return r;
456#endif
447 } 457 }
448 458
449 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); 459 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
450 ecb_function_ ecb_const int 460 ecb_function_ ecb_const int
451 ecb_ctz64 (uint64_t x) 461 ecb_ctz64 (uint64_t x)
452 { 462 {
463#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
464 unsigned long r = 0;
465 _BitScanForward64 (&r, x);
466 return (int)r;
467#else
453 int shift = x & 0xffffffffU ? 0 : 32; 468 int shift = x & 0xffffffff ? 0 : 32;
454 return ecb_ctz32 (x >> shift) + shift; 469 return ecb_ctz32 (x >> shift) + shift;
470#endif
455 } 471 }
456 472
457 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); 473 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
458 ecb_function_ ecb_const int 474 ecb_function_ ecb_const int
459 ecb_popcount32 (uint32_t x) 475 ecb_popcount32 (uint32_t x)
467 } 483 }
468 484
469 ecb_function_ ecb_const int ecb_ld32 (uint32_t x); 485 ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
470 ecb_function_ ecb_const int ecb_ld32 (uint32_t x) 486 ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
471 { 487 {
488#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
489 unsigned long r = 0;
490 _BitScanReverse (&r, x);
491 return (int)r;
492#else
472 int r = 0; 493 int r = 0;
473 494
474 if (x >> 16) { x >>= 16; r += 16; } 495 if (x >> 16) { x >>= 16; r += 16; }
475 if (x >> 8) { x >>= 8; r += 8; } 496 if (x >> 8) { x >>= 8; r += 8; }
476 if (x >> 4) { x >>= 4; r += 4; } 497 if (x >> 4) { x >>= 4; r += 4; }
477 if (x >> 2) { x >>= 2; r += 2; } 498 if (x >> 2) { x >>= 2; r += 2; }
478 if (x >> 1) { r += 1; } 499 if (x >> 1) { r += 1; }
479 500
480 return r; 501 return r;
502#endif
481 } 503 }
482 504
483 ecb_function_ ecb_const int ecb_ld64 (uint64_t x); 505 ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
484 ecb_function_ ecb_const int ecb_ld64 (uint64_t x) 506 ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
485 { 507 {
508#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
509 unsigned long r = 0;
510 _BitScanReverse64 (&r, x);
511 return (int)r;
512#else
486 int r = 0; 513 int r = 0;
487 514
488 if (x >> 32) { x >>= 32; r += 32; } 515 if (x >> 32) { x >>= 32; r += 32; }
489 516
490 return r + ecb_ld32 (x); 517 return r + ecb_ld32 (x);
518#endif
491 } 519 }
492#endif 520#endif
493 521
494ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x); 522ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x);
495ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); } 523ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); }
661 return N; 689 return N;
662 } 690 }
663#else 691#else
664 #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) 692 #define ecb_array_length(name) (sizeof (name) / sizeof (name [0]))
665#endif 693#endif
694
695ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
696ecb_function_ ecb_const uint32_t
697ecb_binary16_to_binary32 (uint32_t x)
698{
699 unsigned int s = (x & 0x8000) << (31 - 15);
700 int e = (x >> 10) & 0x001f;
701 unsigned int m = x & 0x03ff;
702
703 if (ecb_expect_false (e == 31))
704 /* infinity or NaN */
705 e = 255 - (127 - 15);
706 else if (ecb_expect_false (!e))
707 {
708 if (ecb_expect_true (!m))
709 /* zero, handled by code below by forcing e to 0 */
710 e = 0 - (127 - 15);
711 else
712 {
713 /* subnormal, renormalise */
714 unsigned int s = 10 - ecb_ld32 (m);
715
716 m = (m << s) & 0x3ff; /* mask implicit bit */
717 e -= s - 1;
718 }
719 }
720
721 /* e and m now are normalised, or zero, (or inf or nan) */
722 e += 127 - 15;
723
724 return s | (e << 23) | (m << (23 - 10));
725}
726
727ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x);
728ecb_function_ ecb_const uint16_t
729ecb_binary32_to_binary16 (uint32_t x)
730{
731 unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */
732 unsigned int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */
733 unsigned int m = x & 0x007fffff;
734
735 x &= 0x7fffffff;
736
737 /* if it's within range of binary16 normals, use fast path */
738 if (ecb_expect_true (0x38800000 <= x && x <= 0x477fefff))
739 {
740 /* mantissa round-to-even */
741 m += 0x00000fff + ((m >> (23 - 10)) & 1);
742
743 /* handle overflow */
744 if (ecb_expect_false (m >= 0x00800000))
745 {
746 m >>= 1;
747 e += 1;
748 }
749
750 return s | (e << 10) | (m >> (23 - 10));
751 }
752
753 /* handle large numbers and infinity */
754 if (ecb_expect_true (0x477fefff < x && x <= 0x7f800000))
755 return s | 0x7c00;
756
757 /* handle zero, subnormals and small numbers */
758 if (ecb_expect_true (x < 0x38800000))
759 {
760 /* zero */
761 if (ecb_expect_true (!x))
762 return s;
763
764 /* handle subnormals */
765
766 /* too small, will be zero */
767 if (e < (14 - 24)) /* might not be sharp, but is good enough */
768 return s;
769
770 m |= 0x00800000; /* make implicit bit explicit */
771
772 /* very tricky - we need to round to the nearest e (+10) bit value */
773 {
774 unsigned int bits = 14 - e;
775 unsigned int half = (1 << (bits - 1)) - 1;
776 unsigned int even = (m >> bits) & 1;
777
778 /* if this overflows, we will end up with a normalised number */
779 m = (m + half + even) >> bits;
780 }
781
782 return s | m;
783 }
784
785 /* handle NaNs, preserve leftmost nan bits, but make sure we don't turn them into infinities */
786 m >>= 13;
787
788 return s | 0x7c00 | m | !m;
789}
666 790
667/*******************************************************************************/ 791/*******************************************************************************/
668/* floating point stuff, can be disabled by defining ECB_NO_LIBM */ 792/* floating point stuff, can be disabled by defining ECB_NO_LIBM */
669 793
670/* basically, everything uses "ieee pure-endian" floating point numbers */ 794/* basically, everything uses "ieee pure-endian" floating point numbers */
713 #else 837 #else
714 #define ecb_ldexpf(x,e) (float) ldexp ((double) (x), (e)) 838 #define ecb_ldexpf(x,e) (float) ldexp ((double) (x), (e))
715 #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) 839 #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e))
716 #endif 840 #endif
717 841
718 /* converts an ieee half/binary16 to a float */
719 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
720 ecb_function_ ecb_const float
721 ecb_binary16_to_float (uint16_t x)
722 {
723 int e = (x >> 10) & 0x1f;
724 int m = x & 0x3ff;
725 float r;
726
727 if (!e ) r = ecb_ldexpf (m , -24);
728 else if (e != 31) r = ecb_ldexpf (m + 0x400, e - 25);
729 else if (m ) r = ECB_NAN;
730 else r = ECB_INFINITY;
731
732 return x & 0x8000 ? -r : r;
733 }
734
735 /* convert a float to ieee single/binary32 */ 842 /* convert a float to ieee single/binary32 */
736 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); 843 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x);
737 ecb_function_ ecb_const uint32_t 844 ecb_function_ ecb_const uint32_t
738 ecb_float_to_binary32 (float x) 845 ecb_float_to_binary32 (float x)
739 { 846 {
870 #endif 977 #endif
871 978
872 return r; 979 return r;
873 } 980 }
874 981
875#endif 982 /* convert a float to ieee half/binary16 */
983 ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x);
984 ecb_function_ ecb_const uint16_t
985 ecb_float_to_binary16 (float x)
986 {
987 return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x));
988 }
876 989
877#endif 990 /* convert an ieee half/binary16 to float */
991 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
992 ecb_function_ ecb_const float
993 ecb_binary16_to_float (uint16_t x)
994 {
995 return ecb_binary32_to_float (ecb_binary16_to_binary32 (x));
996 }
878 997
998#endif
999
1000#endif
1001

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines