ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libecb/ecb.h
(Generate patch)

Comparing libecb/ecb.h (file contents):
Revision 1.214 by root, Fri Mar 25 15:36:36 2022 UTC vs.
Revision 1.215 by root, Fri Mar 25 15:51:15 2022 UTC

579 return ecb_clz32 (l ? l : x) + shift; 579 return ecb_clz32 (l ? l : x) + shift;
580#endif 580#endif
581 } 581 }
582 582
583 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); 583 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
584 ecb_function_ ecb_const int 584 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x)
585 ecb_popcount32 (uint32_t x)
586 { 585 {
587 x -= (x >> 1) & 0x55555555; 586 x -= (x >> 1) & 0x55555555;
588 x = ((x >> 2) & 0x33333333) + (x & 0x33333333); 587 x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
589 x = ((x >> 4) + x) & 0x0f0f0f0f; 588 x = ((x >> 4) + x) & 0x0f0f0f0f;
590 x *= 0x01010101; 589 x *= 0x01010101;
674#else 673#else
675 return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); 674 return ecb_popcount32 (x) + ecb_popcount32 (x >> 32);
676#endif 675#endif
677} 676}
678 677
679ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count);
680ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count);
681ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count);
682ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count);
683ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count);
684ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count);
685ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count);
686ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count);
687
688ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } 678ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); }
689ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } 679ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); }
690ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } 680ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); }
691ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } 681ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); }
692ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } 682ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); }
693ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } 683ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); }
694ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } 684ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); }
695ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } 685ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); }
696 686
697#if ECB_CPP 687#if ECB_CPP
698 688
699inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } 689inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); }
700inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } 690inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); }
774#endif 764#endif
775 765
776/* try to tell the compiler that some condition is definitely true */ 766/* try to tell the compiler that some condition is definitely true */
777#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 767#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0
778 768
779ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); 769ecb_inline uint32_t ecb_byteorder_helper (void);
780ecb_inline ecb_const uint32_t ecb_byteorder_helper (void) 770ecb_inline uint32_t ecb_byteorder_helper (void)
781{ 771{
782 /* the union code still generates code under pressure in gcc, */ 772 /* the union code still generates code under pressure in gcc, */
783 /* but less than using pointers, and always seems to */ 773 /* but less than using pointers, and always seems to */
784 /* successfully return a constant. */ 774 /* successfully return a constant. */
785 /* the reason why we have this horrible preprocessor mess */ 775 /* the reason why we have this horrible preprocessor mess */
801 } u = { 0x11, 0x22, 0x33, 0x44 }; 791 } u = { 0x11, 0x22, 0x33, 0x44 };
802 return u.u; 792 return u.u;
803#endif 793#endif
804} 794}
805 795
806ecb_inline ecb_const ecb_bool ecb_big_endian (void);
807ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } 796ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; }
808ecb_inline ecb_const ecb_bool ecb_little_endian (void);
809ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } 797ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; }
810 798
811/*****************************************************************************/ 799/*****************************************************************************/
812/* unaligned load/store */ 800/* unaligned load/store */
813 801
880 868
881/*****************************************************************************/ 869/*****************************************************************************/
882/* pointer/integer hashing */ 870/* pointer/integer hashing */
883 871
884/* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ 872/* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */
885ecb_function_ uint32_t ecb_mix32 (uint32_t v); 873ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v);
886ecb_function_ uint32_t ecb_mix32 (uint32_t v) 874ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v)
887{ 875{
888 v ^= v >> 16; v *= 0x7feb352dU; 876 v ^= v >> 16; v *= 0x7feb352dU;
889 v ^= v >> 15; v *= 0x846ca68bU; 877 v ^= v >> 15; v *= 0x846ca68bU;
890 v ^= v >> 16; 878 v ^= v >> 16;
891 return v; 879 return v;
892} 880}
893 881
894ecb_function_ uint32_t ecb_unmix32 (uint32_t v); 882ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v);
895ecb_function_ uint32_t ecb_unmix32 (uint32_t v) 883ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v)
896{ 884{
897 v ^= v >> 16 ; v *= 0x43021123U; 885 v ^= v >> 16 ; v *= 0x43021123U;
898 v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; 886 v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U;
899 v ^= v >> 16 ; 887 v ^= v >> 16 ;
900 return v; 888 return v;
901} 889}
902 890
903/* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ 891/* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */
904ecb_function_ uint64_t ecb_mix64 (uint64_t v); 892ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v);
905ecb_function_ uint64_t ecb_mix64 (uint64_t v) 893ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v)
906{ 894{
907 v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; 895 v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U;
908 v ^= v >> 27; v *= 0x94d049bb133111ebU; 896 v ^= v >> 27; v *= 0x94d049bb133111ebU;
909 v ^= v >> 31; 897 v ^= v >> 31;
910 return v; 898 return v;
911} 899}
912 900
913ecb_function_ uint64_t ecb_unmix64 (uint64_t v); 901ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v);
914ecb_function_ uint64_t ecb_unmix64 (uint64_t v) 902ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v)
915{ 903{
916 v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; 904 v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U;
917 v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; 905 v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U;
918 v ^= v >> 30 ^ v >> 60; 906 v ^= v >> 30 ^ v >> 60;
919 return v; 907 return v;
920} 908}
921 909
922ecb_function_ uintptr_t ecb_ptrmix (void *p); 910ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p);
923ecb_function_ uintptr_t ecb_ptrmix (void *p) 911ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p)
924{ 912{
925 #if ECB_PTRSIZE <= 4 913 #if ECB_PTRSIZE <= 4
926 return ecb_mix32 ((uint32_t)p); 914 return ecb_mix32 ((uint32_t)p);
927 #else 915 #else
928 return ecb_mix64 ((uint64_t)p); 916 return ecb_mix64 ((uint64_t)p);
929 #endif 917 #endif
930} 918}
931 919
932ecb_function_ void *ecb_ptrunmix (uintptr_t v); 920ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v);
933ecb_function_ void *ecb_ptrunmix (uintptr_t v) 921ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v)
934{ 922{
935 #if ECB_PTRSIZE <= 4 923 #if ECB_PTRSIZE <= 4
936 return (void *)ecb_unmix32 (v); 924 return (void *)ecb_unmix32 (v);
937 #else 925 #else
938 return (void *)ecb_unmix64 (v); 926 return (void *)ecb_unmix64 (v);
961ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } 949ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); }
962ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } 950ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); }
963ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } 951ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); }
964ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } 952ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); }
965 953
966ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g); 954ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g);
967ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g) 955ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g)
968{ 956{
969 g ^= g >> 1; 957 g ^= g >> 1;
970 g ^= g >> 2; 958 g ^= g >> 2;
971 g ^= g >> 4; 959 g ^= g >> 4;
972 960
973 return g; 961 return g;
974} 962}
975 963
976ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g); 964ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g);
977ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g) 965ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g)
978{ 966{
979 g ^= g >> 1; 967 g ^= g >> 1;
980 g ^= g >> 2; 968 g ^= g >> 2;
981 g ^= g >> 4; 969 g ^= g >> 4;
982 g ^= g >> 8; 970 g ^= g >> 8;
983 971
984 return g; 972 return g;
985} 973}
986 974
987ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g); 975ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g);
988ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g) 976ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g)
989{ 977{
990 g ^= g >> 1; 978 g ^= g >> 1;
991 g ^= g >> 2; 979 g ^= g >> 2;
992 g ^= g >> 4; 980 g ^= g >> 4;
993 g ^= g >> 8; 981 g ^= g >> 8;
994 g ^= g >> 16; 982 g ^= g >> 16;
995 983
996 return g; 984 return g;
997} 985}
998 986
999ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g); 987ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g);
1000ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g) 988ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g)
1001{ 989{
1002 g ^= g >> 1; 990 g ^= g >> 1;
1003 g ^= g >> 2; 991 g ^= g >> 2;
1004 g ^= g >> 4; 992 g ^= g >> 4;
1005 g ^= g >> 8; 993 g ^= g >> 8;
1026/*****************************************************************************/ 1014/*****************************************************************************/
1027/* 2d hilbert curves */ 1015/* 2d hilbert curves */
1028 1016
1029/* algorithm from the book Hacker's Delight, modified to not */ 1017/* algorithm from the book Hacker's Delight, modified to not */
1030/* run into undefined behaviour for n==16 */ 1018/* run into undefined behaviour for n==16 */
1031static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); 1019ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s);
1032static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) 1020ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s)
1033{ 1021{
1034 uint32_t comp, swap, cs, t, sr; 1022 uint32_t comp, swap, cs, t, sr;
1035 1023
1036 /* pad s on the left (unused) bits with 01 (no change groups) */ 1024 /* pad s on the left (unused) bits with 01 (no change groups) */
1037 s |= 0x55555555U << n << n; 1025 s |= 0x55555555U << n << n;
1071 /* now s contains two 16-bit coordinates */ 1059 /* now s contains two 16-bit coordinates */
1072 return s; 1060 return s;
1073} 1061}
1074 1062
1075/* 64 bit, a straightforward extension to the 32 bit case */ 1063/* 64 bit, a straightforward extension to the 32 bit case */
1076static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); 1064ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s);
1077static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) 1065ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s)
1078{ 1066{
1079 uint64_t comp, swap, cs, t, sr; 1067 uint64_t comp, swap, cs, t, sr;
1080 1068
1081 /* pad s on the left (unused) bits with 01 (no change groups) */ 1069 /* pad s on the left (unused) bits with 01 (no change groups) */
1082 s |= 0x5555555555555555U << n << n; 1070 s |= 0x5555555555555555U << n << n;
1120} 1108}
1121 1109
1122/* algorithm from the book Hacker's Delight, but a similar algorithm*/ 1110/* algorithm from the book Hacker's Delight, but a similar algorithm*/
1123/* is given in https://doi.org/10.1002/spe.4380160103 */ 1111/* is given in https://doi.org/10.1002/spe.4380160103 */
1124/* this has been slightly improved over the original version */ 1112/* this has been slightly improved over the original version */
1125ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); 1113ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy);
1126ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) 1114ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy)
1127{ 1115{
1128 uint32_t row; 1116 uint32_t row;
1129 uint32_t state = 0; 1117 uint32_t state = 0;
1130 uint32_t s = 0; 1118 uint32_t s = 0;
1131 1119
1145 1133
1146 return s; 1134 return s;
1147} 1135}
1148 1136
1149/* 64 bit, essentially the same as 32 bit */ 1137/* 64 bit, essentially the same as 32 bit */
1150ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); 1138ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy);
1151ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) 1139ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy)
1152{ 1140{
1153 uint32_t row; 1141 uint32_t row;
1154 uint32_t state = 0; 1142 uint32_t state = 0;
1155 uint64_t s = 0; 1143 uint64_t s = 0;
1156 1144

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines