ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libecb/ecb.h
(Generate patch)

Comparing libecb/ecb.h (file contents):
Revision 1.203 by root, Thu Mar 24 00:57:30 2022 UTC vs.
Revision 1.204 by root, Fri Mar 25 08:44:14 2022 UTC

40 40
41#ifndef ECB_H 41#ifndef ECB_H
42#define ECB_H 42#define ECB_H
43 43
44/* 16 bits major, 16 bits minor */ 44/* 16 bits major, 16 bits minor */
45#define ECB_VERSION 0x0001000b 45#define ECB_VERSION 0x0001000c
46 46
47#include <string.h> /* for memcpy */ 47#include <string.h> /* for memcpy */
48 48
49#if defined (_WIN32) && !defined (__MINGW32__) 49#if defined (_WIN32) && !defined (__MINGW32__)
50 typedef signed char int8_t; 50 typedef signed char int8_t;
948ecb_function_ uint64_t ecb_gray_decode (uint64_t g) { return ecb_gray64_decode (g); } 948ecb_function_ uint64_t ecb_gray_decode (uint64_t g) { return ecb_gray64_decode (g); }
949 949
950#endif 950#endif
951 951
952/*****************************************************************************/ 952/*****************************************************************************/
953/* 2d hilbert curves */
954
955/* algorithm from the book Hacker's Delight, modified to not */
956/* run into undefined behaviour for n==16 */
957static uint32_t
958ecb_hilbert2d_index_to_coord32 (int n, uint32_t s)
959{
960 uint32_t comp, swap, cs, t, sr;
961
962 /* pad s on the left (unused) bits with 01 (no change groups) */
963 s |= 0x55555555U << n << n;
964 /* "s shift right" */
965 sr = (s >> 1) & 0x55555555U;
966 /* compute complement and swap info in two-bit groups */
967 cs = ((s & 0x55555555U) + sr) ^ 0x55555555U;
968
969 /* parallel prefix xor op to propagate both complement
970 * and swap info together from left to right (there is
971 * no step "cs ^= cs >> 1", so in effect it computes
972 * two independent parallel prefix operations on two
973 * interleaved sets of sixteen bits).
974 */
975 cs ^= cs >> 2;
976 cs ^= cs >> 4;
977 cs ^= cs >> 8;
978 cs ^= cs >> 16;
979
980 /* separate swap and complement bits */
981 swap = cs & 0x55555555U;
982 comp = (cs >> 1) & 0x55555555U;
983
984 /* calculate coordinates in odd and even bit positions */
985 t = (s & swap) ^ comp;
986 s = s ^ sr ^ t ^ (t << 1);
987
988 /* unpad/clear out any junk on the left */
989 s = s & ((1 << n << n) - 1);
990
991 /* Now "unshuffle" to separate the x and y bits. */
992 t = (s ^ (s >> 1)) & 0x22222222U; s ^= t ^ (t << 1);
993 t = (s ^ (s >> 2)) & 0x0c0c0c0cU; s ^= t ^ (t << 2);
994 t = (s ^ (s >> 4)) & 0x00f000f0U; s ^= t ^ (t << 4);
995 t = (s ^ (s >> 8)) & 0x0000ff00U; s ^= t ^ (t << 8);
996
997 /* now s contains two 16-bit coordinates */
998 return s;
999}
1000
1001/* 64 bit, a straightforward extension to the 32 bit case */
1002static uint64_t
1003ecb_hilbert2d_index_to_coord64 (int n, uint64_t s)
1004{
1005 uint64_t comp, swap, cs, t, sr;
1006
1007 /* pad s on the left (unused) bits with 01 (no change groups) */
1008 s |= 0x5555555555555555U << n << n;
1009 /* "s shift right" */
1010 sr = (s >> 1) & 0x5555555555555555U;
1011 /* compute complement and swap info in two-bit groups */
1012 cs = ((s & 0x5555555555555555U) + sr) ^ 0x5555555555555555U;
1013
1014 /* parallel prefix xor op to propagate both complement
1015 * and swap info together from left to right (there is
1016 * no step "cs ^= cs >> 1", so in effect it computes
1017 * two independent parallel prefix operations on two
1018 * interleaved sets of thirty-two bits).
1019 */
1020 cs ^= cs >> 2;
1021 cs ^= cs >> 4;
1022 cs ^= cs >> 8;
1023 cs ^= cs >> 16;
1024 cs ^= cs >> 32;
1025
1026 /* separate swap and complement bits */
1027 swap = cs & 0x5555555555555555U;
1028 comp = (cs >> 1) & 0x5555555555555555U;
1029
1030 /* calculate coordinates in odd and even bit positions */
1031 t = (s & swap) ^ comp;
1032 s = s ^ sr ^ t ^ (t << 1);
1033
1034 /* unpad/clear out any junk on the left */
1035 s = s & ((1 << n << n) - 1);
1036
1037 /* Now "unshuffle" to separate the x and y bits. */
1038 t = (s ^ (s >> 1)) & 0x2222222222222222U; s ^= t ^ (t << 1);
1039 t = (s ^ (s >> 2)) & 0x0c0c0c0c0c0c0c0cU; s ^= t ^ (t << 2);
1040 t = (s ^ (s >> 4)) & 0x00f000f000f000f0U; s ^= t ^ (t << 4);
1041 t = (s ^ (s >> 8)) & 0x0000ff000000ff00U; s ^= t ^ (t << 8);
1042 t = (s ^ (s >> 16)) & 0x00000000ffff0000U; s ^= t ^ (t << 16);
1043
1044 /* now s contains two 32-bit coordinates */
1045 return s;
1046}
1047
1048/* algorithm from the book Hacker's Delight, but a similar algorithm*/
1049/* is given in https://doi.org/10.1002/spe.4380160103 */
1050/* this has been slightly improved over the original version */
1051ecb_function_ uint32_t
1052ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy)
1053{
1054 uint32_t row;
1055 uint32_t state = 0;
1056 uint32_t s = 0;
1057
1058 do
1059 {
1060 --n;
1061
1062 row = 4 * state
1063 | (2 & (xy >> n >> 15))
1064 | (1 & (xy >> n ));
1065
1066 /* these funky constants are lookup tables for two-bit values */
1067 s = (s << 2) | (0x361e9cb4U >> 2 * row) & 3;
1068 state = (0x8fe65831U >> 2 * row) & 3;
1069 }
1070 while (n > 0);
1071
1072 return s;
1073}
1074
1075/* 64 bit, essentially the same as 32 bit */
1076ecb_function_ uint64_t
1077ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy)
1078{
1079 uint32_t row;
1080 uint32_t state = 0;
1081 uint64_t s = 0;
1082
1083 do
1084 {
1085 --n;
1086
1087 row = 4 * state
1088 | (2 & (xy >> n >> 31))
1089 | (1 & (xy >> n ));
1090
1091 /* these funky constants are lookup tables for two-bit values */
1092 s = (s << 2) | (0x361e9cb4U >> 2 * row) & 3;
1093 state = (0x8fe65831U >> 2 * row) & 3;
1094 }
1095 while (n > 0);
1096
1097 return s;
1098}
1099
1100/*****************************************************************************/
953/* division */ 1101/* division */
954 1102
955#if ECB_GCC_VERSION(3,0) || ECB_C99 1103#if ECB_GCC_VERSION(3,0) || ECB_C99
956 /* C99 tightened the definition of %, so we can use a more efficient version */ 1104 /* C99 tightened the definition of %, so we can use a more efficient version */
957 #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) 1105 #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0))

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines