… | |
… | |
40 | |
40 | |
41 | #ifndef ECB_H |
41 | #ifndef ECB_H |
42 | #define ECB_H |
42 | #define ECB_H |
43 | |
43 | |
44 | /* 16 bits major, 16 bits minor */ |
44 | /* 16 bits major, 16 bits minor */ |
45 | #define ECB_VERSION 0x0001000b |
45 | #define ECB_VERSION 0x0001000c |
46 | |
46 | |
47 | #include <string.h> /* for memcpy */ |
47 | #include <string.h> /* for memcpy */ |
48 | |
48 | |
49 | #if defined (_WIN32) && !defined (__MINGW32__) |
49 | #if defined (_WIN32) && !defined (__MINGW32__) |
50 | typedef signed char int8_t; |
50 | typedef signed char int8_t; |
… | |
… | |
948 | ecb_function_ uint64_t ecb_gray_decode (uint64_t g) { return ecb_gray64_decode (g); } |
948 | ecb_function_ uint64_t ecb_gray_decode (uint64_t g) { return ecb_gray64_decode (g); } |
949 | |
949 | |
950 | #endif |
950 | #endif |
951 | |
951 | |
952 | /*****************************************************************************/ |
952 | /*****************************************************************************/ |
|
|
953 | /* 2d hilbert curves */ |
|
|
954 | |
|
|
955 | /* algorithm from the book Hacker's Delight, modified to not */ |
|
|
956 | /* run into undefined behaviour for n==16 */ |
|
|
957 | static uint32_t |
|
|
958 | ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) |
|
|
959 | { |
|
|
960 | uint32_t comp, swap, cs, t, sr; |
|
|
961 | |
|
|
962 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
|
|
963 | s |= 0x55555555U << n << n; |
|
|
964 | /* "s shift right" */ |
|
|
965 | sr = (s >> 1) & 0x55555555U; |
|
|
966 | /* compute complement and swap info in two-bit groups */ |
|
|
967 | cs = ((s & 0x55555555U) + sr) ^ 0x55555555U; |
|
|
968 | |
|
|
969 | /* parallel prefix xor op to propagate both complement |
|
|
970 | * and swap info together from left to right (there is |
|
|
971 | * no step "cs ^= cs >> 1", so in effect it computes |
|
|
972 | * two independent parallel prefix operations on two |
|
|
973 | * interleaved sets of sixteen bits). |
|
|
974 | */ |
|
|
975 | cs ^= cs >> 2; |
|
|
976 | cs ^= cs >> 4; |
|
|
977 | cs ^= cs >> 8; |
|
|
978 | cs ^= cs >> 16; |
|
|
979 | |
|
|
980 | /* separate swap and complement bits */ |
|
|
981 | swap = cs & 0x55555555U; |
|
|
982 | comp = (cs >> 1) & 0x55555555U; |
|
|
983 | |
|
|
984 | /* calculate coordinates in odd and even bit positions */ |
|
|
985 | t = (s & swap) ^ comp; |
|
|
986 | s = s ^ sr ^ t ^ (t << 1); |
|
|
987 | |
|
|
988 | /* unpad/clear out any junk on the left */ |
|
|
989 | s = s & ((1 << n << n) - 1); |
|
|
990 | |
|
|
991 | /* Now "unshuffle" to separate the x and y bits. */ |
|
|
992 | t = (s ^ (s >> 1)) & 0x22222222U; s ^= t ^ (t << 1); |
|
|
993 | t = (s ^ (s >> 2)) & 0x0c0c0c0cU; s ^= t ^ (t << 2); |
|
|
994 | t = (s ^ (s >> 4)) & 0x00f000f0U; s ^= t ^ (t << 4); |
|
|
995 | t = (s ^ (s >> 8)) & 0x0000ff00U; s ^= t ^ (t << 8); |
|
|
996 | |
|
|
997 | /* now s contains two 16-bit coordinates */ |
|
|
998 | return s; |
|
|
999 | } |
|
|
1000 | |
|
|
1001 | /* 64 bit, a straightforward extension to the 32 bit case */ |
|
|
1002 | static uint64_t |
|
|
1003 | ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) |
|
|
1004 | { |
|
|
1005 | uint64_t comp, swap, cs, t, sr; |
|
|
1006 | |
|
|
1007 | /* pad s on the left (unused) bits with 01 (no change groups) */ |
|
|
1008 | s |= 0x5555555555555555U << n << n; |
|
|
1009 | /* "s shift right" */ |
|
|
1010 | sr = (s >> 1) & 0x5555555555555555U; |
|
|
1011 | /* compute complement and swap info in two-bit groups */ |
|
|
1012 | cs = ((s & 0x5555555555555555U) + sr) ^ 0x5555555555555555U; |
|
|
1013 | |
|
|
1014 | /* parallel prefix xor op to propagate both complement |
|
|
1015 | * and swap info together from left to right (there is |
|
|
1016 | * no step "cs ^= cs >> 1", so in effect it computes |
|
|
1017 | * two independent parallel prefix operations on two |
|
|
1018 | * interleaved sets of thirty-two bits). |
|
|
1019 | */ |
|
|
1020 | cs ^= cs >> 2; |
|
|
1021 | cs ^= cs >> 4; |
|
|
1022 | cs ^= cs >> 8; |
|
|
1023 | cs ^= cs >> 16; |
|
|
1024 | cs ^= cs >> 32; |
|
|
1025 | |
|
|
1026 | /* separate swap and complement bits */ |
|
|
1027 | swap = cs & 0x5555555555555555U; |
|
|
1028 | comp = (cs >> 1) & 0x5555555555555555U; |
|
|
1029 | |
|
|
1030 | /* calculate coordinates in odd and even bit positions */ |
|
|
1031 | t = (s & swap) ^ comp; |
|
|
1032 | s = s ^ sr ^ t ^ (t << 1); |
|
|
1033 | |
|
|
1034 | /* unpad/clear out any junk on the left */ |
|
|
1035 | s = s & ((1 << n << n) - 1); |
|
|
1036 | |
|
|
1037 | /* Now "unshuffle" to separate the x and y bits. */ |
|
|
1038 | t = (s ^ (s >> 1)) & 0x2222222222222222U; s ^= t ^ (t << 1); |
|
|
1039 | t = (s ^ (s >> 2)) & 0x0c0c0c0c0c0c0c0cU; s ^= t ^ (t << 2); |
|
|
1040 | t = (s ^ (s >> 4)) & 0x00f000f000f000f0U; s ^= t ^ (t << 4); |
|
|
1041 | t = (s ^ (s >> 8)) & 0x0000ff000000ff00U; s ^= t ^ (t << 8); |
|
|
1042 | t = (s ^ (s >> 16)) & 0x00000000ffff0000U; s ^= t ^ (t << 16); |
|
|
1043 | |
|
|
1044 | /* now s contains two 32-bit coordinates */ |
|
|
1045 | return s; |
|
|
1046 | } |
|
|
1047 | |
|
|
1048 | /* algorithm from the book Hacker's Delight, but a similar algorithm*/ |
|
|
1049 | /* is given in https://doi.org/10.1002/spe.4380160103 */ |
|
|
1050 | /* this has been slightly improved over the original version */ |
|
|
1051 | ecb_function_ uint32_t |
|
|
1052 | ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) |
|
|
1053 | { |
|
|
1054 | uint32_t row; |
|
|
1055 | uint32_t state = 0; |
|
|
1056 | uint32_t s = 0; |
|
|
1057 | |
|
|
1058 | do |
|
|
1059 | { |
|
|
1060 | --n; |
|
|
1061 | |
|
|
1062 | row = 4 * state |
|
|
1063 | | (2 & (xy >> n >> 15)) |
|
|
1064 | | (1 & (xy >> n )); |
|
|
1065 | |
|
|
1066 | /* these funky constants are lookup tables for two-bit values */ |
|
|
1067 | s = (s << 2) | (0x361e9cb4U >> 2 * row) & 3; |
|
|
1068 | state = (0x8fe65831U >> 2 * row) & 3; |
|
|
1069 | } |
|
|
1070 | while (n > 0); |
|
|
1071 | |
|
|
1072 | return s; |
|
|
1073 | } |
|
|
1074 | |
|
|
1075 | /* 64 bit, essentially the same as 32 bit */ |
|
|
1076 | ecb_function_ uint64_t |
|
|
1077 | ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) |
|
|
1078 | { |
|
|
1079 | uint32_t row; |
|
|
1080 | uint32_t state = 0; |
|
|
1081 | uint64_t s = 0; |
|
|
1082 | |
|
|
1083 | do |
|
|
1084 | { |
|
|
1085 | --n; |
|
|
1086 | |
|
|
1087 | row = 4 * state |
|
|
1088 | | (2 & (xy >> n >> 31)) |
|
|
1089 | | (1 & (xy >> n )); |
|
|
1090 | |
|
|
1091 | /* these funky constants are lookup tables for two-bit values */ |
|
|
1092 | s = (s << 2) | (0x361e9cb4U >> 2 * row) & 3; |
|
|
1093 | state = (0x8fe65831U >> 2 * row) & 3; |
|
|
1094 | } |
|
|
1095 | while (n > 0); |
|
|
1096 | |
|
|
1097 | return s; |
|
|
1098 | } |
|
|
1099 | |
|
|
1100 | /*****************************************************************************/ |
953 | /* division */ |
1101 | /* division */ |
954 | |
1102 | |
955 | #if ECB_GCC_VERSION(3,0) || ECB_C99 |
1103 | #if ECB_GCC_VERSION(3,0) || ECB_C99 |
956 | /* C99 tightened the definition of %, so we can use a more efficient version */ |
1104 | /* C99 tightened the definition of %, so we can use a more efficient version */ |
957 | #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) |
1105 | #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) |