Revision 1.214 by

Revision 1.215 by

… | … | ||
---|---|---|---|

579 | return ecb_clz32 (l ? l : x) + shift; | 579 | return ecb_clz32 (l ? l : x) + shift; |

580 | #endif | 580 | #endif |

581 | } | 581 | } |

582 | 582 | ||

583 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); | 583 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |

584 | ecb_function_ ecb_const int | 584 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x) |

585 | ecb_popcount32 (uint32_t x) | ||

586 | { | 585 | { |

587 | x -= (x >> 1) & 0x55555555; | 586 | x -= (x >> 1) & 0x55555555; |

588 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); | 587 | x = ((x >> 2) & 0x33333333) + (x & 0x33333333); |

589 | x = ((x >> 4) + x) & 0x0f0f0f0f; | 588 | x = ((x >> 4) + x) & 0x0f0f0f0f; |

590 | x *= 0x01010101; | 589 | x *= 0x01010101; |

… | … | ||

674 | #else | 673 | #else |

675 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); | 674 | return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); |

676 | #endif | 675 | #endif |

677 | } | 676 | } |

678 | 677 | ||

679 | ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count); | ||

680 | ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count); | ||

681 | ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count); | ||

682 | ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count); | ||

683 | ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count); | ||

684 | ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count); | ||

685 | ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count); | ||

686 | ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count); | ||

687 | |||

688 | ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } | 678 | ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } |

689 | ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } | 679 | ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } |

690 | ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } | 680 | ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } |

691 | ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } | 681 | ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } |

692 | ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } | 682 | ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } |

693 | ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } | 683 | ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } |

694 | ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } | 684 | ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } |

695 | ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } | 685 | ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } |

696 | 686 | ||

697 | #if ECB_CPP | 687 | #if ECB_CPP |

698 | 688 | ||

699 | inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } | 689 | inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } |

700 | inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } | 690 | inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } |

… | … | ||

774 | #endif | 764 | #endif |

775 | 765 | ||

776 | /* try to tell the compiler that some condition is definitely true */ | 766 | /* try to tell the compiler that some condition is definitely true */ |

777 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 | 767 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |

778 | 768 | ||

779 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); | 769 | ecb_inline uint32_t ecb_byteorder_helper (void); |

780 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void) | 770 | ecb_inline uint32_t ecb_byteorder_helper (void) |

781 | { | 771 | { |

782 | /* the union code still generates code under pressure in gcc, */ | 772 | /* the union code still generates code under pressure in gcc, */ |

783 | /* but less than using pointers, and always seems to */ | 773 | /* but less than using pointers, and always seems to */ |

784 | /* successfully return a constant. */ | 774 | /* successfully return a constant. */ |

785 | /* the reason why we have this horrible preprocessor mess */ | 775 | /* the reason why we have this horrible preprocessor mess */ |

… | … | ||

801 | } u = { 0x11, 0x22, 0x33, 0x44 }; | 791 | } u = { 0x11, 0x22, 0x33, 0x44 }; |

802 | return u.u; | 792 | return u.u; |

803 | #endif | 793 | #endif |

804 | } | 794 | } |

805 | 795 | ||

806 | ecb_inline ecb_const ecb_bool ecb_big_endian (void); | ||

807 | ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } | 796 | ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } |

808 | ecb_inline ecb_const ecb_bool ecb_little_endian (void); | ||

809 | ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } | 797 | ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } |

810 | 798 | ||

811 | /*****************************************************************************/ | 799 | /*****************************************************************************/ |

812 | /* unaligned load/store */ | 800 | /* unaligned load/store */ |

813 | 801 | ||

… | … | ||

880 | 868 | ||

881 | /*****************************************************************************/ | 869 | /*****************************************************************************/ |

882 | /* pointer/integer hashing */ | 870 | /* pointer/integer hashing */ |

883 | 871 | ||

884 | /* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ | 872 | /* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ |

885 | ecb_function_ uint32_t ecb_mix32 (uint32_t v); | 873 | ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v); |

886 | ecb_function_ uint32_t ecb_mix32 (uint32_t v) | 874 | ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v) |

887 | { | 875 | { |

888 | v ^= v >> 16; v *= 0x7feb352dU; | 876 | v ^= v >> 16; v *= 0x7feb352dU; |

889 | v ^= v >> 15; v *= 0x846ca68bU; | 877 | v ^= v >> 15; v *= 0x846ca68bU; |

890 | v ^= v >> 16; | 878 | v ^= v >> 16; |

891 | return v; | 879 | return v; |

892 | } | 880 | } |

893 | 881 | ||

894 | ecb_function_ uint32_t ecb_unmix32 (uint32_t v); | 882 | ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v); |

895 | ecb_function_ uint32_t ecb_unmix32 (uint32_t v) | 883 | ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v) |

896 | { | 884 | { |

897 | v ^= v >> 16 ; v *= 0x43021123U; | 885 | v ^= v >> 16 ; v *= 0x43021123U; |

898 | v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; | 886 | v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; |

899 | v ^= v >> 16 ; | 887 | v ^= v >> 16 ; |

900 | return v; | 888 | return v; |

901 | } | 889 | } |

902 | 890 | ||

903 | /* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ | 891 | /* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ |

904 | ecb_function_ uint64_t ecb_mix64 (uint64_t v); | 892 | ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v); |

905 | ecb_function_ uint64_t ecb_mix64 (uint64_t v) | 893 | ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v) |

906 | { | 894 | { |

907 | v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; | 895 | v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; |

908 | v ^= v >> 27; v *= 0x94d049bb133111ebU; | 896 | v ^= v >> 27; v *= 0x94d049bb133111ebU; |

909 | v ^= v >> 31; | 897 | v ^= v >> 31; |

910 | return v; | 898 | return v; |

911 | } | 899 | } |

912 | 900 | ||

913 | ecb_function_ uint64_t ecb_unmix64 (uint64_t v); | 901 | ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v); |

914 | ecb_function_ uint64_t ecb_unmix64 (uint64_t v) | 902 | ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v) |

915 | { | 903 | { |

916 | v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; | 904 | v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; |

917 | v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; | 905 | v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; |

918 | v ^= v >> 30 ^ v >> 60; | 906 | v ^= v >> 30 ^ v >> 60; |

919 | return v; | 907 | return v; |

920 | } | 908 | } |

921 | 909 | ||

922 | ecb_function_ uintptr_t ecb_ptrmix (void *p); | 910 | ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p); |

923 | ecb_function_ uintptr_t ecb_ptrmix (void *p) | 911 | ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p) |

924 | { | 912 | { |

925 | #if ECB_PTRSIZE <= 4 | 913 | #if ECB_PTRSIZE <= 4 |

926 | return ecb_mix32 ((uint32_t)p); | 914 | return ecb_mix32 ((uint32_t)p); |

927 | #else | 915 | #else |

928 | return ecb_mix64 ((uint64_t)p); | 916 | return ecb_mix64 ((uint64_t)p); |

929 | #endif | 917 | #endif |

930 | } | 918 | } |

931 | 919 | ||

932 | ecb_function_ void *ecb_ptrunmix (uintptr_t v); | 920 | ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v); |

933 | ecb_function_ void *ecb_ptrunmix (uintptr_t v) | 921 | ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v) |

934 | { | 922 | { |

935 | #if ECB_PTRSIZE <= 4 | 923 | #if ECB_PTRSIZE <= 4 |

936 | return (void *)ecb_unmix32 (v); | 924 | return (void *)ecb_unmix32 (v); |

937 | #else | 925 | #else |

938 | return (void *)ecb_unmix64 (v); | 926 | return (void *)ecb_unmix64 (v); |

… | … | ||

961 | ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } | 949 | ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } |

962 | ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } | 950 | ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } |

963 | ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } | 951 | ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } |

964 | ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } | 952 | ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } |

965 | 953 | ||

966 | ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g); | 954 | ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g); |

967 | ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g) | 955 | ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g) |

968 | { | 956 | { |

969 | g ^= g >> 1; | 957 | g ^= g >> 1; |

970 | g ^= g >> 2; | 958 | g ^= g >> 2; |

971 | g ^= g >> 4; | 959 | g ^= g >> 4; |

972 | 960 | ||

973 | return g; | 961 | return g; |

974 | } | 962 | } |

975 | 963 | ||

976 | ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g); | 964 | ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g); |

977 | ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g) | 965 | ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g) |

978 | { | 966 | { |

979 | g ^= g >> 1; | 967 | g ^= g >> 1; |

980 | g ^= g >> 2; | 968 | g ^= g >> 2; |

981 | g ^= g >> 4; | 969 | g ^= g >> 4; |

982 | g ^= g >> 8; | 970 | g ^= g >> 8; |

983 | 971 | ||

984 | return g; | 972 | return g; |

985 | } | 973 | } |

986 | 974 | ||

987 | ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g); | 975 | ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g); |

988 | ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g) | 976 | ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g) |

989 | { | 977 | { |

990 | g ^= g >> 1; | 978 | g ^= g >> 1; |

991 | g ^= g >> 2; | 979 | g ^= g >> 2; |

992 | g ^= g >> 4; | 980 | g ^= g >> 4; |

993 | g ^= g >> 8; | 981 | g ^= g >> 8; |

994 | g ^= g >> 16; | 982 | g ^= g >> 16; |

995 | 983 | ||

996 | return g; | 984 | return g; |

997 | } | 985 | } |

998 | 986 | ||

999 | ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g); | 987 | ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g); |

1000 | ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g) | 988 | ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g) |

1001 | { | 989 | { |

1002 | g ^= g >> 1; | 990 | g ^= g >> 1; |

1003 | g ^= g >> 2; | 991 | g ^= g >> 2; |

1004 | g ^= g >> 4; | 992 | g ^= g >> 4; |

1005 | g ^= g >> 8; | 993 | g ^= g >> 8; |

… | … | ||

1026 | /*****************************************************************************/ | 1014 | /*****************************************************************************/ |

1027 | /* 2d hilbert curves */ | 1015 | /* 2d hilbert curves */ |

1028 | 1016 | ||

1029 | /* algorithm from the book Hacker's Delight, modified to not */ | 1017 | /* algorithm from the book Hacker's Delight, modified to not */ |

1030 | /* run into undefined behaviour for n==16 */ | 1018 | /* run into undefined behaviour for n==16 */ |

1031 | static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); | 1019 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); |

1032 | static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) | 1020 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) |

1033 | { | 1021 | { |

1034 | uint32_t comp, swap, cs, t, sr; | 1022 | uint32_t comp, swap, cs, t, sr; |

1035 | 1023 | ||

1036 | /* pad s on the left (unused) bits with 01 (no change groups) */ | 1024 | /* pad s on the left (unused) bits with 01 (no change groups) */ |

1037 | s |= 0x55555555U << n << n; | 1025 | s |= 0x55555555U << n << n; |

… | … | ||

1071 | /* now s contains two 16-bit coordinates */ | 1059 | /* now s contains two 16-bit coordinates */ |

1072 | return s; | 1060 | return s; |

1073 | } | 1061 | } |

1074 | 1062 | ||

1075 | /* 64 bit, a straightforward extension to the 32 bit case */ | 1063 | /* 64 bit, a straightforward extension to the 32 bit case */ |

1076 | static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); | 1064 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); |

1077 | static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) | 1065 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) |

1078 | { | 1066 | { |

1079 | uint64_t comp, swap, cs, t, sr; | 1067 | uint64_t comp, swap, cs, t, sr; |

1080 | 1068 | ||

1081 | /* pad s on the left (unused) bits with 01 (no change groups) */ | 1069 | /* pad s on the left (unused) bits with 01 (no change groups) */ |

1082 | s |= 0x5555555555555555U << n << n; | 1070 | s |= 0x5555555555555555U << n << n; |

… | … | ||

1120 | } | 1108 | } |

1121 | 1109 | ||

1122 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ | 1110 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ |

1123 | /* is given in https://doi.org/10.1002/spe.4380160103 */ | 1111 | /* is given in https://doi.org/10.1002/spe.4380160103 */ |

1124 | /* this has been slightly improved over the original version */ | 1112 | /* this has been slightly improved over the original version */ |

1125 | ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); | 1113 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); |

1126 | ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) | 1114 | ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) |

1127 | { | 1115 | { |

1128 | uint32_t row; | 1116 | uint32_t row; |

1129 | uint32_t state = 0; | 1117 | uint32_t state = 0; |

1130 | uint32_t s = 0; | 1118 | uint32_t s = 0; |

1131 | 1119 | ||

… | … | ||

1145 | 1133 | ||

1146 | return s; | 1134 | return s; |

1147 | } | 1135 | } |

1148 | 1136 | ||

1149 | /* 64 bit, essentially the same as 32 bit */ | 1137 | /* 64 bit, essentially the same as 32 bit */ |

1150 | ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); | 1138 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); |

1151 | ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) | 1139 | ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) |

1152 | { | 1140 | { |

1153 | uint32_t row; | 1141 | uint32_t row; |

1154 | uint32_t state = 0; | 1142 | uint32_t state = 0; |

1155 | uint64_t s = 0; | 1143 | uint64_t s = 0; |

1156 | 1144 |

– |
Removed lines |

+ |
Added lines |

< |
Changed lines |

> |
Changed lines |