… | |
… | |
579 | return ecb_clz32 (l ? l : x) + shift; |
579 | return ecb_clz32 (l ? l : x) + shift; |
580 | #endif |
580 | #endif |
581 | } |
581 | } |
582 | |
582 | |
583 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
583 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
584 | ecb_function_ ecb_const int |
584 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x) |
585 | ecb_popcount32 (uint32_t x) |
|
|
586 | { |
585 | { |
587 | x -= (x >> 1) & 0x55555555; |
586 | x -= (x >> 1) & 0x55555555; |
588 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); |
587 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); |
589 | x = ((x >> 4) + x) & 0x0f0f0f0f; |
588 | x = ((x >> 4) + x) & 0x0f0f0f0f; |
590 | x *= 0x01010101; |
589 | x *= 0x01010101; |
… | |
… | |
674 | #else |
673 | #else |
675 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); |
674 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); |
676 | #endif |
675 | #endif |
677 | } |
676 | } |
678 | |
677 | |
679 | ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count); |
|
|
680 | ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count); |
|
|
681 | ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count); |
|
|
682 | ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count); |
|
|
683 | ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count); |
|
|
684 | ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count); |
|
|
685 | ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count); |
|
|
686 | ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count); |
|
|
687 | |
|
|
688 | ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } |
678 | ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } |
689 | ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } |
679 | ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } |
690 | ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } |
680 | ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } |
691 | ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } |
681 | ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } |
692 | ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } |
682 | ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } |
693 | ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } |
683 | ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } |
694 | ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } |
684 | ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } |
695 | ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } |
685 | ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } |
696 | |
686 | |
697 | #if ECB_CPP |
687 | #if ECB_CPP |
698 | |
688 | |
699 | inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } |
689 | inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } |
700 | inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } |
690 | inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } |
… | |
… | |
774 | #endif |
764 | #endif |
775 | |
765 | |
776 | /* try to tell the compiler that some condition is definitely true */ |
766 | /* try to tell the compiler that some condition is definitely true */ |
777 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
767 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
778 | |
768 | |
779 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); |
769 | ecb_inline uint32_t ecb_byteorder_helper (void); |
780 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void) |
770 | ecb_inline uint32_t ecb_byteorder_helper (void) |
781 | { |
771 | { |
782 | /* the union code still generates code under pressure in gcc, */ |
772 | /* the union code still generates code under pressure in gcc, */ |
783 | /* but less than using pointers, and always seems to */ |
773 | /* but less than using pointers, and always seems to */ |
784 | /* successfully return a constant. */ |
774 | /* successfully return a constant. */ |
785 | /* the reason why we have this horrible preprocessor mess */ |
775 | /* the reason why we have this horrible preprocessor mess */ |
… | |
… | |
801 | } u = { 0x11, 0x22, 0x33, 0x44 }; |
791 | } u = { 0x11, 0x22, 0x33, 0x44 }; |
802 | return u.u; |
792 | return u.u; |
803 | #endif |
793 | #endif |
804 | } |
794 | } |
805 | |
795 | |
806 | ecb_inline ecb_const ecb_bool ecb_big_endian (void); |
|
|
807 | ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } |
796 | ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } |
808 | ecb_inline ecb_const ecb_bool ecb_little_endian (void); |
|
|
809 | ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } |
797 | ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } |
810 | |
798 | |
811 | /*****************************************************************************/ |
799 | /*****************************************************************************/ |
812 | /* unaligned load/store */ |
800 | /* unaligned load/store */ |
813 | |
801 | |
… | |
… | |
880 | |
868 | |
881 | /*****************************************************************************/ |
869 | /*****************************************************************************/ |
882 | /* pointer/integer hashing */ |
870 | /* pointer/integer hashing */ |
883 | |
871 | |
884 | /* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ |
872 | /* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ |
885 | ecb_function_ uint32_t ecb_mix32 (uint32_t v); |
873 | ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v); |
886 | ecb_function_ uint32_t ecb_mix32 (uint32_t v) |
874 | ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v) |
887 | { |
875 | { |
888 | v ^= v >> 16; v *= 0x7feb352dU; |
876 | v ^= v >> 16; v *= 0x7feb352dU; |
889 | v ^= v >> 15; v *= 0x846ca68bU; |
877 | v ^= v >> 15; v *= 0x846ca68bU; |
890 | v ^= v >> 16; |
878 | v ^= v >> 16; |
891 | return v; |
879 | return v; |
892 | } |
880 | } |
893 | |
881 | |
894 | ecb_function_ uint32_t ecb_unmix32 (uint32_t v); |
882 | ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v); |
895 | ecb_function_ uint32_t ecb_unmix32 (uint32_t v) |
883 | ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v) |
896 | { |
884 | { |
897 | v ^= v >> 16 ; v *= 0x43021123U; |
885 | v ^= v >> 16 ; v *= 0x43021123U; |
898 | v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; |
886 | v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; |
899 | v ^= v >> 16 ; |
887 | v ^= v >> 16 ; |
900 | return v; |
888 | return v; |
901 | } |
889 | } |
902 | |
890 | |
903 | /* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ |
891 | /* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ |
904 | ecb_function_ uint64_t ecb_mix64 (uint64_t v); |
892 | ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v); |
905 | ecb_function_ uint64_t ecb_mix64 (uint64_t v) |
893 | ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v) |
906 | { |
894 | { |
907 | v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; |
895 | v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; |
908 | v ^= v >> 27; v *= 0x94d049bb133111ebU; |
896 | v ^= v >> 27; v *= 0x94d049bb133111ebU; |
909 | v ^= v >> 31; |
897 | v ^= v >> 31; |
910 | return v; |
898 | return v; |
911 | } |
899 | } |
912 | |
900 | |
913 | ecb_function_ uint64_t ecb_unmix64 (uint64_t v); |
901 | ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v); |
914 | ecb_function_ uint64_t ecb_unmix64 (uint64_t v) |
902 | ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v) |
915 | { |
903 | { |
916 | v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; |
904 | v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; |
917 | v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; |
905 | v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; |
918 | v ^= v >> 30 ^ v >> 60; |
906 | v ^= v >> 30 ^ v >> 60; |
919 | return v; |
907 | return v; |
920 | } |
908 | } |
921 | |
909 | |
922 | ecb_function_ uintptr_t ecb_ptrmix (void *p); |
910 | ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p); |
923 | ecb_function_ uintptr_t ecb_ptrmix (void *p) |
911 | ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p) |
924 | { |
912 | { |
925 | #if ECB_PTRSIZE <= 4 |
913 | #if ECB_PTRSIZE <= 4 |
926 | return ecb_mix32 ((uint32_t)p); |
914 | return ecb_mix32 ((uint32_t)p); |
927 | #else |
915 | #else |
928 | return ecb_mix64 ((uint64_t)p); |
916 | return ecb_mix64 ((uint64_t)p); |
929 | #endif |
917 | #endif |
930 | } |
918 | } |
931 | |
919 | |
932 | ecb_function_ void *ecb_ptrunmix (uintptr_t v); |
920 | ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v); |
933 | ecb_function_ void *ecb_ptrunmix (uintptr_t v) |
921 | ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v) |
934 | { |
922 | { |
935 | #if ECB_PTRSIZE <= 4 |
923 | #if ECB_PTRSIZE <= 4 |
936 | return (void *)ecb_unmix32 (v); |
924 | return (void *)ecb_unmix32 (v); |
937 | #else |
925 | #else |
938 | return (void *)ecb_unmix64 (v); |
926 | return (void *)ecb_unmix64 (v); |
… | |
… | |
961 | ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } |
949 | ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } |
962 | ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } |
950 | ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } |
963 | ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } |
951 | ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } |
964 | ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } |
952 | ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } |
965 | |
953 | |
966 | ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g); |
954 | ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g); |
967 | ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g) |
955 | ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g) |
968 | { |
956 | { |
969 | g ^= g >> 1; |
957 | g ^= g >> 1; |
970 | g ^= g >> 2; |
958 | g ^= g >> 2; |
971 | g ^= g >> 4; |
959 | g ^= g >> 4; |
972 | |
960 | |
973 | return g; |
961 | return g; |
974 | } |
962 | } |
975 | |
963 | |
976 | ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g); |
964 | ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g); |
977 | ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g) |
965 | ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g) |
978 | { |
966 | { |
979 | g ^= g >> 1; |
967 | g ^= g >> 1; |
980 | g ^= g >> 2; |
968 | g ^= g >> 2; |
981 | g ^= g >> 4; |
969 | g ^= g >> 4; |
982 | g ^= g >> 8; |
970 | g ^= g >> 8; |
983 | |
971 | |
984 | return g; |
972 | return g; |
985 | } |
973 | } |
986 | |
974 | |
987 | ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g); |
975 | ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g); |
988 | ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g) |
976 | ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g) |
989 | { |
977 | { |
990 | g ^= g >> 1; |
978 | g ^= g >> 1; |
991 | g ^= g >> 2; |
979 | g ^= g >> 2; |
992 | g ^= g >> 4; |
980 | g ^= g >> 4; |
993 | g ^= g >> 8; |
981 | g ^= g >> 8; |
994 | g ^= g >> 16; |
982 | g ^= g >> 16; |
995 | |
983 | |
996 | return g; |
984 | return g; |
997 | } |
985 | } |
998 | |
986 | |
999 | ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g); |
987 | ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g); |
1000 | ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g) |
988 | ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g) |
1001 | { |
989 | { |
1002 | g ^= g >> 1; |
990 | g ^= g >> 1; |
1003 | g ^= g >> 2; |
991 | g ^= g >> 2; |
1004 | g ^= g >> 4; |
992 | g ^= g >> 4; |
1005 | g ^= g >> 8; |
993 | g ^= g >> 8; |
… | |
… | |
1026 | /*****************************************************************************/ |
1014 | /*****************************************************************************/ |
1027 | /* 2d hilbert curves */ |
1015 | /* 2d hilbert curves */ |
1028 | |
1016 | |
1029 | /* algorithm from the book Hacker's Delight, modified to not */ |
1017 | /* algorithm from the book Hacker's Delight, modified to not */ |
1030 | /* run into undefined behaviour for n==16 */ |
1018 | /* run into undefined behaviour for n==16 */ |
1031 | static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); |
1019 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); |
1032 | static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) |
1020 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) |
1033 | { |
1021 | { |
1034 | uint32_t comp, swap, cs, t, sr; |
1022 | uint32_t comp, swap, cs, t, sr; |
1035 | |
1023 | |
1036 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1024 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1037 | s |= 0x55555555U << n << n; |
1025 | s |= 0x55555555U << n << n; |
… | |
… | |
1071 | /* now s contains two 16-bit coordinates */ |
1059 | /* now s contains two 16-bit coordinates */ |
1072 | return s; |
1060 | return s; |
1073 | } |
1061 | } |
1074 | |
1062 | |
1075 | /* 64 bit, a straightforward extension to the 32 bit case */ |
1063 | /* 64 bit, a straightforward extension to the 32 bit case */ |
1076 | static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); |
1064 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); |
1077 | static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) |
1065 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) |
1078 | { |
1066 | { |
1079 | uint64_t comp, swap, cs, t, sr; |
1067 | uint64_t comp, swap, cs, t, sr; |
1080 | |
1068 | |
1081 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1069 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
1082 | s |= 0x5555555555555555U << n << n; |
1070 | s |= 0x5555555555555555U << n << n; |
… | |
… | |
1120 | } |
1108 | } |
1121 | |
1109 | |
1122 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ |
1110 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ |
1123 | /* is given in https://doi.org/10.1002/spe.4380160103 */ |
1111 | /* is given in https://doi.org/10.1002/spe.4380160103 */ |
1124 | /* this has been slightly improved over the original version */ |
1112 | /* this has been slightly improved over the original version */ |
1125 | ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); |
1113 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); |
1126 | ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) |
1114 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) |
1127 | { |
1115 | { |
1128 | uint32_t row; |
1116 | uint32_t row; |
1129 | uint32_t state = 0; |
1117 | uint32_t state = 0; |
1130 | uint32_t s = 0; |
1118 | uint32_t s = 0; |
1131 | |
1119 | |
… | |
… | |
1145 | |
1133 | |
1146 | return s; |
1134 | return s; |
1147 | } |
1135 | } |
1148 | |
1136 | |
1149 | /* 64 bit, essentially the same as 32 bit */ |
1137 | /* 64 bit, essentially the same as 32 bit */ |
1150 | ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); |
1138 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); |
1151 | ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) |
1139 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) |
1152 | { |
1140 | { |
1153 | uint32_t row; |
1141 | uint32_t row; |
1154 | uint32_t state = 0; |
1142 | uint32_t state = 0; |
1155 | uint64_t s = 0; |
1143 | uint64_t s = 0; |
1156 | |
1144 | |