… | |
… | |
468 | #define ecb_ld64(x) (ecb_clz64 (x) ^ 63) |
468 | #define ecb_ld64(x) (ecb_clz64 (x) ^ 63) |
469 | #define ecb_popcount32(x) __builtin_popcount (x) |
469 | #define ecb_popcount32(x) __builtin_popcount (x) |
470 | /* ecb_popcount64 is more difficult, see below */ |
470 | /* ecb_popcount64 is more difficult, see below */ |
471 | #else |
471 | #else |
472 | ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); |
472 | ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); |
473 | ecb_function_ ecb_const int |
473 | ecb_function_ ecb_const int ecb_ctz32 (uint32_t x) |
474 | ecb_ctz32 (uint32_t x) |
|
|
475 | { |
474 | { |
476 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
475 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
477 | unsigned long r; |
476 | unsigned long r; |
478 | _BitScanForward (&r, x); |
477 | _BitScanForward (&r, x); |
479 | return (int)r; |
478 | return (int)r; |
… | |
… | |
518 | return r; |
517 | return r; |
519 | #endif |
518 | #endif |
520 | } |
519 | } |
521 | |
520 | |
522 | ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); |
521 | ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); |
523 | ecb_function_ ecb_const int |
522 | ecb_function_ ecb_const int ecb_ctz64 (uint64_t x) |
524 | ecb_ctz64 (uint64_t x) |
|
|
525 | { |
523 | { |
526 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
524 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
527 | unsigned long r; |
525 | unsigned long r; |
528 | _BitScanForward64 (&r, x); |
526 | _BitScanForward64 (&r, x); |
529 | return (int)r; |
527 | return (int)r; |
… | |
… | |
532 | return ecb_ctz32 (x >> shift) + shift; |
530 | return ecb_ctz32 (x >> shift) + shift; |
533 | #endif |
531 | #endif |
534 | } |
532 | } |
535 | |
533 | |
536 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x); |
534 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x); |
537 | ecb_function_ ecb_const int |
535 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x) |
538 | ecb_clz32 (uint32_t x) |
|
|
539 | { |
536 | { |
540 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
537 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
541 | unsigned long r; |
538 | unsigned long r; |
542 | _BitScanReverse (&r, x); |
539 | _BitScanReverse (&r, x); |
543 | return (int)r; |
540 | return (int)r; |
… | |
… | |
568 | return table [x >> 26]; |
565 | return table [x >> 26]; |
569 | #endif |
566 | #endif |
570 | } |
567 | } |
571 | |
568 | |
572 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x); |
569 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x); |
573 | ecb_function_ ecb_const int |
570 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x) |
574 | ecb_clz64 (uint64_t x) |
|
|
575 | { |
571 | { |
576 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
572 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
577 | unsigned long r; |
573 | unsigned long r; |
578 | _BitScanReverse64 (&r, x); |
574 | _BitScanReverse64 (&r, x); |
579 | return (int)r; |
575 | return (int)r; |
… | |
… | |
583 | return ecb_clz32 (l ? l : x) + shift; |
579 | return ecb_clz32 (l ? l : x) + shift; |
584 | #endif |
580 | #endif |
585 | } |
581 | } |
586 | |
582 | |
587 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
583 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
588 | ecb_function_ ecb_const int |
584 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x) |
589 | ecb_popcount32 (uint32_t x) |
|
|
590 | { |
585 | { |
591 | x -= (x >> 1) & 0x55555555; |
586 | x -= (x >> 1) & 0x55555555; |
592 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); |
587 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); |
593 | x = ((x >> 4) + x) & 0x0f0f0f0f; |
588 | x = ((x >> 4) + x) & 0x0f0f0f0f; |
594 | x *= 0x01010101; |
589 | x *= 0x01010101; |
… | |
… | |
667 | |
662 | |
668 | return x; |
663 | return x; |
669 | } |
664 | } |
670 | |
665 | |
671 | ecb_function_ ecb_const int ecb_popcount64 (uint64_t x); |
666 | ecb_function_ ecb_const int ecb_popcount64 (uint64_t x); |
672 | ecb_function_ ecb_const int |
667 | ecb_function_ ecb_const int ecb_popcount64 (uint64_t x) |
673 | ecb_popcount64 (uint64_t x) |
|
|
674 | { |
668 | { |
675 | /* popcount64 is only available on 64 bit cpus as gcc builtin. */ |
669 | /* popcount64 is only available on 64 bit cpus as gcc builtin. */ |
676 | /* also, gcc/clang make this surprisingly difficult to use */ |
670 | /* also, gcc/clang make this surprisingly difficult to use */ |
677 | #if (__SIZEOF_LONG__ == 8) && (ECB_GCC_VERSION(3,4) || ECB_CLANG_BUILTIN (__builtin_popcountl)) |
671 | #if (__SIZEOF_LONG__ == 8) && (ECB_GCC_VERSION(3,4) || ECB_CLANG_BUILTIN (__builtin_popcountl)) |
678 | return __builtin_popcountl (x); |
672 | return __builtin_popcountl (x); |
679 | #else |
673 | #else |
680 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); |
674 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); |
681 | #endif |
675 | #endif |
682 | } |
676 | } |
683 | |
677 | |
684 | ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count); |
|
|
685 | ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count); |
|
|
686 | ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count); |
|
|
687 | ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count); |
|
|
688 | ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count); |
|
|
689 | ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count); |
|
|
690 | ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count); |
|
|
691 | ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count); |
|
|
692 | |
|
|
693 | ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } |
678 | ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } |
694 | ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } |
679 | ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } |
695 | ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } |
680 | ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } |
696 | ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } |
681 | ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } |
697 | ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } |
682 | ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } |
698 | ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } |
683 | ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } |
699 | ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } |
684 | ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } |
700 | ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } |
685 | ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } |
701 | |
686 | |
702 | #if ECB_CPP |
687 | #if ECB_CPP |
703 | |
688 | |
704 | inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } |
689 | inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } |
705 | inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } |
690 | inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } |
… | |
… | |
750 | #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x))) |
735 | #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x))) |
751 | #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x))) |
736 | #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x))) |
752 | #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x))) |
737 | #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x))) |
753 | #else |
738 | #else |
754 | ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); |
739 | ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); |
755 | ecb_function_ ecb_const uint16_t |
740 | ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x) |
756 | ecb_bswap16 (uint16_t x) |
|
|
757 | { |
741 | { |
758 | return ecb_rotl16 (x, 8); |
742 | return ecb_rotl16 (x, 8); |
759 | } |
743 | } |
760 | |
744 | |
761 | ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x); |
745 | ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x); |
762 | ecb_function_ ecb_const uint32_t |
746 | ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x) |
763 | ecb_bswap32 (uint32_t x) |
|
|
764 | { |
747 | { |
765 | return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); |
748 | return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); |
766 | } |
749 | } |
767 | |
750 | |
768 | ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x); |
751 | ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x); |
769 | ecb_function_ ecb_const uint64_t |
752 | ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x) |
770 | ecb_bswap64 (uint64_t x) |
|
|
771 | { |
753 | { |
772 | return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); |
754 | return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); |
773 | } |
755 | } |
774 | #endif |
756 | #endif |
775 | |
757 | |
… | |
… | |
782 | #endif |
764 | #endif |
783 | |
765 | |
784 | /* try to tell the compiler that some condition is definitely true */ |
766 | /* try to tell the compiler that some condition is definitely true */ |
785 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
767 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
786 | |
768 | |
787 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); |
769 | ecb_inline uint32_t ecb_byteorder_helper (void); |
788 | ecb_inline ecb_const uint32_t |
770 | ecb_inline uint32_t ecb_byteorder_helper (void) |
789 | ecb_byteorder_helper (void) |
|
|
790 | { |
771 | { |
791 | /* the union code still generates code under pressure in gcc, */ |
772 | /* the union code still generates code under pressure in gcc, */ |
792 | /* but less than using pointers, and always seems to */ |
773 | /* but less than using pointers, and always seems to */ |
793 | /* successfully return a constant. */ |
774 | /* successfully return a constant. */ |
794 | /* the reason why we have this horrible preprocessor mess */ |
775 | /* the reason why we have this horrible preprocessor mess */ |
… | |
… | |
810 | } u = { 0x11, 0x22, 0x33, 0x44 }; |
791 | } u = { 0x11, 0x22, 0x33, 0x44 }; |
811 | return u.u; |
792 | return u.u; |
812 | #endif |
793 | #endif |
813 | } |
794 | } |
814 | |
795 | |
815 | ecb_inline ecb_const ecb_bool ecb_big_endian (void); |
|
|
816 | ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } |
796 | ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } |
817 | ecb_inline ecb_const ecb_bool ecb_little_endian (void); |
|
|
818 | ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } |
797 | ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } |
819 | |
798 | |
820 | /*****************************************************************************/ |
799 | /*****************************************************************************/ |
821 | /* unaligned load/store */ |
800 | /* unaligned load/store */ |
822 | |
801 | |
… | |
… | |
889 | |
868 | |
890 | /*****************************************************************************/ |
869 | /*****************************************************************************/ |
891 | /* pointer/integer hashing */ |
870 | /* pointer/integer hashing */ |
892 | |
871 | |
893 | /* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ |
872 | /* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ |
894 | ecb_function_ uint32_t ecb_mix32 (uint32_t v); |
873 | ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v); |
895 | ecb_function_ uint32_t ecb_mix32 (uint32_t v) |
874 | ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v) |
896 | { |
875 | { |
897 | v ^= v >> 16; v *= 0x7feb352dU; |
876 | v ^= v >> 16; v *= 0x7feb352dU; |
898 | v ^= v >> 15; v *= 0x846ca68bU; |
877 | v ^= v >> 15; v *= 0x846ca68bU; |
899 | v ^= v >> 16; |
878 | v ^= v >> 16; |
900 | return v; |
879 | return v; |
901 | } |
880 | } |
902 | |
881 | |
903 | ecb_function_ uint32_t ecb_unmix32 (uint32_t v); |
882 | ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v); |
904 | ecb_function_ uint32_t ecb_unmix32 (uint32_t v) |
883 | ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v) |
905 | { |
884 | { |
906 | v ^= v >> 16 ; v *= 0x43021123U; |
885 | v ^= v >> 16 ; v *= 0x43021123U; |
907 | v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; |
886 | v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; |
908 | v ^= v >> 16 ; |
887 | v ^= v >> 16 ; |
909 | return v; |
888 | return v; |
910 | } |
889 | } |
911 | |
890 | |
912 | /* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ |
891 | /* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ |
913 | ecb_function_ uint64_t ecb_mix64 (uint64_t v); |
892 | ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v); |
914 | ecb_function_ uint64_t ecb_mix64 (uint64_t v) |
893 | ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v) |
915 | { |
894 | { |
916 | v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; |
895 | v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; |
917 | v ^= v >> 27; v *= 0x94d049bb133111ebU; |
896 | v ^= v >> 27; v *= 0x94d049bb133111ebU; |
918 | v ^= v >> 31; |
897 | v ^= v >> 31; |
919 | return v; |
898 | return v; |
920 | } |
899 | } |
921 | |
900 | |
922 | ecb_function_ uint64_t ecb_unmix64 (uint64_t v); |
901 | ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v); |
923 | ecb_function_ uint64_t ecb_unmix64 (uint64_t v) |
902 | ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v) |
924 | { |
903 | { |
925 | v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; |
904 | v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; |
926 | v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; |
905 | v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; |
927 | v ^= v >> 30 ^ v >> 60; |
906 | v ^= v >> 30 ^ v >> 60; |
928 | return v; |
907 | return v; |
929 | } |
908 | } |
930 | |
909 | |
931 | ecb_function_ uintptr_t ecb_ptrmix (void *p); |
910 | ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p); |
932 | ecb_function_ uintptr_t ecb_ptrmix (void *p) |
911 | ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p) |
933 | { |
912 | { |
934 | #if ECB_PTRSIZE <= 4 |
913 | #if ECB_PTRSIZE <= 4 |
935 | return ecb_mix32 ((uint32_t)p); |
914 | return ecb_mix32 ((uint32_t)p); |
936 | #else |
915 | #else |
937 | return ecb_mix64 ((uint64_t)p); |
916 | return ecb_mix64 ((uint64_t)p); |
938 | #endif |
917 | #endif |
939 | } |
918 | } |
940 | |
919 | |
941 | ecb_function_ void *ecb_ptrunmix (uintptr_t v); |
920 | ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v); |
942 | ecb_function_ void *ecb_ptrunmix (uintptr_t v) |
921 | ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v) |
943 | { |
922 | { |
944 | #if ECB_PTRSIZE <= 4 |
923 | #if ECB_PTRSIZE <= 4 |
945 | return (void *)ecb_unmix32 (v); |
924 | return (void *)ecb_unmix32 (v); |
946 | #else |
925 | #else |
947 | return (void *)ecb_unmix64 (v); |
926 | return (void *)ecb_unmix64 (v); |
… | |
… | |
970 | ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } |
949 | ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } |
971 | ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } |
950 | ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } |
972 | ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } |
951 | ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } |
973 | ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } |
952 | ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } |
974 | |
953 | |
975 | ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g); |
954 | ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g); |
976 | ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g) |
955 | ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g) |
977 | { |
956 | { |
978 | g ^= g >> 1; |
957 | g ^= g >> 1; |
979 | g ^= g >> 2; |
958 | g ^= g >> 2; |
980 | g ^= g >> 4; |
959 | g ^= g >> 4; |
981 | |
960 | |
982 | return g; |
961 | return g; |
983 | } |
962 | } |
984 | |
963 | |
985 | ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g); |
964 | ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g); |
986 | ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g) |
965 | ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g) |
987 | { |
966 | { |
988 | g ^= g >> 1; |
967 | g ^= g >> 1; |
989 | g ^= g >> 2; |
968 | g ^= g >> 2; |
990 | g ^= g >> 4; |
969 | g ^= g >> 4; |
991 | g ^= g >> 8; |
970 | g ^= g >> 8; |
992 | |
971 | |
993 | return g; |
972 | return g; |
994 | } |
973 | } |
995 | |
974 | |
996 | ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g); |
975 | ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g); |
997 | ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g) |
976 | ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g) |
998 | { |
977 | { |
999 | g ^= g >> 1; |
978 | g ^= g >> 1; |
1000 | g ^= g >> 2; |
979 | g ^= g >> 2; |
1001 | g ^= g >> 4; |
980 | g ^= g >> 4; |
1002 | g ^= g >> 8; |
981 | g ^= g >> 8; |
1003 | g ^= g >> 16; |
982 | g ^= g >> 16; |
1004 | |
983 | |
1005 | return g; |
984 | return g; |
1006 | } |
985 | } |
1007 | |
986 | |
1008 | ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g); |
987 | ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g); |
1009 | ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g) |
988 | ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g) |
1010 | { |
989 | { |
1011 | g ^= g >> 1; |
990 | g ^= g >> 1; |
1012 | g ^= g >> 2; |
991 | g ^= g >> 2; |
1013 | g ^= g >> 4; |
992 | g ^= g >> 4; |
1014 | g ^= g >> 8; |
993 | g ^= g >> 8; |
… | |
… | |
1035 | /*****************************************************************************/ |
1014 | /*****************************************************************************/ |
1036 | /* 2d hilbert curves */ |
1015 | /* 2d hilbert curves */ |
1037 | |
1016 | |
1038 | /* algorithm from the book Hacker's Delight, modified to not */ |
1017 | /* algorithm from the book Hacker's Delight, modified to not */ |
1039 | /* run into undefined behaviour for n==16 */ |
1018 | /* run into undefined behaviour for n==16 */ |
1040 | static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); |
1019 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); |
1041 | static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) |
1020 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) |
1042 | { |
1021 | { |
1043 | uint32_t comp, swap, cs, t, sr; |
1022 | uint32_t comp, swap, cs, t, sr; |
1044 | |
1023 | |
1045 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1024 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1046 | s |= 0x55555555U << n << n; |
1025 | s |= 0x55555555U << n << n; |
… | |
… | |
1080 | /* now s contains two 16-bit coordinates */ |
1059 | /* now s contains two 16-bit coordinates */ |
1081 | return s; |
1060 | return s; |
1082 | } |
1061 | } |
1083 | |
1062 | |
1084 | /* 64 bit, a straightforward extension to the 32 bit case */ |
1063 | /* 64 bit, a straightforward extension to the 32 bit case */ |
1085 | static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); |
1064 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); |
1086 | static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) |
1065 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) |
1087 | { |
1066 | { |
1088 | uint64_t comp, swap, cs, t, sr; |
1067 | uint64_t comp, swap, cs, t, sr; |
1089 | |
1068 | |
1090 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1069 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1091 | s |= 0x5555555555555555U << n << n; |
1070 | s |= 0x5555555555555555U << n << n; |
… | |
… | |
1129 | } |
1108 | } |
1130 | |
1109 | |
1131 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ |
1110 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ |
1132 | /* is given in https://doi.org/10.1002/spe.4380160103 */ |
1111 | /* is given in https://doi.org/10.1002/spe.4380160103 */ |
1133 | /* this has been slightly improved over the original version */ |
1112 | /* this has been slightly improved over the original version */ |
1134 | ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); |
1113 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); |
1135 | ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) |
1114 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) |
1136 | { |
1115 | { |
1137 | uint32_t row; |
1116 | uint32_t row; |
1138 | uint32_t state = 0; |
1117 | uint32_t state = 0; |
1139 | uint32_t s = 0; |
1118 | uint32_t s = 0; |
1140 | |
1119 | |
… | |
… | |
1154 | |
1133 | |
1155 | return s; |
1134 | return s; |
1156 | } |
1135 | } |
1157 | |
1136 | |
1158 | /* 64 bit, essentially the same as 32 bit */ |
1137 | /* 64 bit, essentially the same as 32 bit */ |
1159 | ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); |
1138 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); |
1160 | ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) |
1139 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) |
1161 | { |
1140 | { |
1162 | uint32_t row; |
1141 | uint32_t row; |
1163 | uint32_t state = 0; |
1142 | uint32_t state = 0; |
1164 | uint64_t s = 0; |
1143 | uint64_t s = 0; |
1165 | |
1144 | |
… | |
… | |
1222 | |
1201 | |
1223 | /*****************************************************************************/ |
1202 | /*****************************************************************************/ |
1224 | /* IEEE 754-2008 half float conversions */ |
1203 | /* IEEE 754-2008 half float conversions */ |
1225 | |
1204 | |
1226 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
1205 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
1227 | ecb_function_ ecb_const uint32_t |
1206 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x) |
1228 | ecb_binary16_to_binary32 (uint32_t x) |
|
|
1229 | { |
1207 | { |
1230 | unsigned int s = (x & 0x8000) << (31 - 15); |
1208 | unsigned int s = (x & 0x8000) << (31 - 15); |
1231 | int e = (x >> 10) & 0x001f; |
1209 | int e = (x >> 10) & 0x001f; |
1232 | unsigned int m = x & 0x03ff; |
1210 | unsigned int m = x & 0x03ff; |
1233 | |
1211 | |
… | |
… | |
1254 | |
1232 | |
1255 | return s | (e << 23) | (m << (23 - 10)); |
1233 | return s | (e << 23) | (m << (23 - 10)); |
1256 | } |
1234 | } |
1257 | |
1235 | |
1258 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
1236 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
1259 | ecb_function_ ecb_const uint16_t |
1237 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x) |
1260 | ecb_binary32_to_binary16 (uint32_t x) |
|
|
1261 | { |
1238 | { |
1262 | unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
1239 | unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
1263 | int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
1240 | int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
1264 | unsigned int m = x & 0x007fffff; |
1241 | unsigned int m = x & 0x007fffff; |
1265 | |
1242 | |
… | |
… | |
1406 | #define ECB_I2A_U32_DIGITS 10 |
1383 | #define ECB_I2A_U32_DIGITS 10 |
1407 | #define ECB_I2A_I64_DIGITS 20 |
1384 | #define ECB_I2A_I64_DIGITS 20 |
1408 | #define ECB_I2A_U64_DIGITS 21 |
1385 | #define ECB_I2A_U64_DIGITS 21 |
1409 | #define ECB_I2A_MAX_DIGITS 21 |
1386 | #define ECB_I2A_MAX_DIGITS 21 |
1410 | |
1387 | |
1411 | ecb_inline char * |
1388 | ecb_function_ char * ecb_i2a_u32 (char *ptr, uint32_t u); |
1412 | ecb_i2a_u32 (char *ptr, uint32_t u) |
1389 | ecb_function_ char * ecb_i2a_u32 (char *ptr, uint32_t u) |
1413 | { |
1390 | { |
1414 | #if ECB_64BIT_NATIVE |
1391 | #if ECB_64BIT_NATIVE |
1415 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1392 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1416 | ptr = ecb_i2a_x10 (ptr, u); |
1393 | ptr = ecb_i2a_x10 (ptr, u); |
1417 | else /* x10 almost, but not fully, covers 32 bit */ |
1394 | else /* x10 almost, but not fully, covers 32 bit */ |
… | |
… | |
1447 | #endif |
1424 | #endif |
1448 | |
1425 | |
1449 | return ptr; |
1426 | return ptr; |
1450 | } |
1427 | } |
1451 | |
1428 | |
1452 | ecb_inline char * |
1429 | ecb_function_ char * ecb_i2a_i32 (char *ptr, int32_t v); |
1453 | ecb_i2a_i32 (char *ptr, int32_t v) |
1430 | ecb_function_ char * ecb_i2a_i32 (char *ptr, int32_t v) |
1454 | { |
1431 | { |
1455 | *ptr = '-'; ptr += v < 0; |
1432 | *ptr = '-'; ptr += v < 0; |
1456 | uint32_t u = v < 0 ? -(uint32_t)v : v; |
1433 | uint32_t u = v < 0 ? -(uint32_t)v : v; |
1457 | |
1434 | |
1458 | #if ECB_64BIT_NATIVE |
1435 | #if ECB_64BIT_NATIVE |
… | |
… | |
1462 | #endif |
1439 | #endif |
1463 | |
1440 | |
1464 | return ptr; |
1441 | return ptr; |
1465 | } |
1442 | } |
1466 | |
1443 | |
1467 | ecb_inline char * |
1444 | ecb_function_ char * ecb_i2a_u64 (char *ptr, uint64_t u); |
1468 | ecb_i2a_u64 (char *ptr, uint64_t u) |
1445 | ecb_function_ char * ecb_i2a_u64 (char *ptr, uint64_t u) |
1469 | { |
1446 | { |
1470 | #if ECB_64BIT_NATIVE |
1447 | #if ECB_64BIT_NATIVE |
1471 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1448 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1472 | ptr = ecb_i2a_x10 (ptr, u); |
1449 | ptr = ecb_i2a_x10 (ptr, u); |
1473 | else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) |
1450 | else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) |
… | |
… | |
1503 | #endif |
1480 | #endif |
1504 | |
1481 | |
1505 | return ptr; |
1482 | return ptr; |
1506 | } |
1483 | } |
1507 | |
1484 | |
1508 | ecb_inline char * |
1485 | ecb_function_ char * ecb_i2a_i64 (char *ptr, int64_t v); |
1509 | ecb_i2a_i64 (char *ptr, int64_t v) |
1486 | ecb_function_ char * ecb_i2a_i64 (char *ptr, int64_t v) |
1510 | { |
1487 | { |
1511 | *ptr = '-'; ptr += v < 0; |
1488 | *ptr = '-'; ptr += v < 0; |
1512 | uint64_t u = v < 0 ? -(uint64_t)v : v; |
1489 | uint64_t u = v < 0 ? -(uint64_t)v : v; |
1513 | |
1490 | |
1514 | #if ECB_64BIT_NATIVE |
1491 | #if ECB_64BIT_NATIVE |
… | |
… | |
1591 | #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) |
1568 | #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) |
1592 | #endif |
1569 | #endif |
1593 | |
1570 | |
1594 | /* convert a float to ieee single/binary32 */ |
1571 | /* convert a float to ieee single/binary32 */ |
1595 | ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); |
1572 | ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); |
1596 | ecb_function_ ecb_const uint32_t |
1573 | ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x) |
1597 | ecb_float_to_binary32 (float x) |
|
|
1598 | { |
1574 | { |
1599 | uint32_t r; |
1575 | uint32_t r; |
1600 | |
1576 | |
1601 | #if ECB_STDFP |
1577 | #if ECB_STDFP |
1602 | memcpy (&r, &x, 4); |
1578 | memcpy (&r, &x, 4); |
… | |
… | |
1631 | return r; |
1607 | return r; |
1632 | } |
1608 | } |
1633 | |
1609 | |
1634 | /* converts an ieee single/binary32 to a float */ |
1610 | /* converts an ieee single/binary32 to a float */ |
1635 | ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x); |
1611 | ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x); |
1636 | ecb_function_ ecb_const float |
1612 | ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x) |
1637 | ecb_binary32_to_float (uint32_t x) |
|
|
1638 | { |
1613 | { |
1639 | float r; |
1614 | float r; |
1640 | |
1615 | |
1641 | #if ECB_STDFP |
1616 | #if ECB_STDFP |
1642 | memcpy (&r, &x, 4); |
1617 | memcpy (&r, &x, 4); |
… | |
… | |
1661 | return r; |
1636 | return r; |
1662 | } |
1637 | } |
1663 | |
1638 | |
1664 | /* convert a double to ieee double/binary64 */ |
1639 | /* convert a double to ieee double/binary64 */ |
1665 | ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x); |
1640 | ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x); |
1666 | ecb_function_ ecb_const uint64_t |
1641 | ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x) |
1667 | ecb_double_to_binary64 (double x) |
|
|
1668 | { |
1642 | { |
1669 | uint64_t r; |
1643 | uint64_t r; |
1670 | |
1644 | |
1671 | #if ECB_STDFP |
1645 | #if ECB_STDFP |
1672 | memcpy (&r, &x, 8); |
1646 | memcpy (&r, &x, 8); |
… | |
… | |
1701 | return r; |
1675 | return r; |
1702 | } |
1676 | } |
1703 | |
1677 | |
1704 | /* converts an ieee double/binary64 to a double */ |
1678 | /* converts an ieee double/binary64 to a double */ |
1705 | ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x); |
1679 | ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x); |
1706 | ecb_function_ ecb_const double |
1680 | ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x) |
1707 | ecb_binary64_to_double (uint64_t x) |
|
|
1708 | { |
1681 | { |
1709 | double r; |
1682 | double r; |
1710 | |
1683 | |
1711 | #if ECB_STDFP |
1684 | #if ECB_STDFP |
1712 | memcpy (&r, &x, 8); |
1685 | memcpy (&r, &x, 8); |
… | |
… | |
1731 | return r; |
1704 | return r; |
1732 | } |
1705 | } |
1733 | |
1706 | |
1734 | /* convert a float to ieee half/binary16 */ |
1707 | /* convert a float to ieee half/binary16 */ |
1735 | ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x); |
1708 | ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x); |
1736 | ecb_function_ ecb_const uint16_t |
1709 | ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x) |
1737 | ecb_float_to_binary16 (float x) |
|
|
1738 | { |
1710 | { |
1739 | return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x)); |
1711 | return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x)); |
1740 | } |
1712 | } |
1741 | |
1713 | |
1742 | /* convert an ieee half/binary16 to float */ |
1714 | /* convert an ieee half/binary16 to float */ |
1743 | ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x); |
1715 | ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x); |
1744 | ecb_function_ ecb_const float |
1716 | ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x) |
1745 | ecb_binary16_to_float (uint16_t x) |
|
|
1746 | { |
1717 | { |
1747 | return ecb_binary32_to_float (ecb_binary16_to_binary32 (x)); |
1718 | return ecb_binary32_to_float (ecb_binary16_to_binary32 (x)); |
1748 | } |
1719 | } |
1749 | |
1720 | |
1750 | #endif |
1721 | #endif |