ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/libecb/ecb.h
(Generate patch)

Comparing libecb/ecb.h (file contents):
Revision 1.212 by root, Fri Mar 25 15:31:22 2022 UTC vs.
Revision 1.215 by root, Fri Mar 25 15:51:15 2022 UTC

468 #define ecb_ld64(x) (ecb_clz64 (x) ^ 63) 468 #define ecb_ld64(x) (ecb_clz64 (x) ^ 63)
469 #define ecb_popcount32(x) __builtin_popcount (x) 469 #define ecb_popcount32(x) __builtin_popcount (x)
470 /* ecb_popcount64 is more difficult, see below */ 470 /* ecb_popcount64 is more difficult, see below */
471#else 471#else
472 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); 472 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x);
473 ecb_function_ ecb_const int 473 ecb_function_ ecb_const int ecb_ctz32 (uint32_t x)
474 ecb_ctz32 (uint32_t x)
475 { 474 {
476#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) 475#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
477 unsigned long r; 476 unsigned long r;
478 _BitScanForward (&r, x); 477 _BitScanForward (&r, x);
479 return (int)r; 478 return (int)r;
518 return r; 517 return r;
519#endif 518#endif
520 } 519 }
521 520
522 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); 521 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
523 ecb_function_ ecb_const int 522 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x)
524 ecb_ctz64 (uint64_t x)
525 { 523 {
526#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) 524#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
527 unsigned long r; 525 unsigned long r;
528 _BitScanForward64 (&r, x); 526 _BitScanForward64 (&r, x);
529 return (int)r; 527 return (int)r;
532 return ecb_ctz32 (x >> shift) + shift; 530 return ecb_ctz32 (x >> shift) + shift;
533#endif 531#endif
534 } 532 }
535 533
536 ecb_function_ ecb_const int ecb_clz32 (uint32_t x); 534 ecb_function_ ecb_const int ecb_clz32 (uint32_t x);
537 ecb_function_ ecb_const int 535 ecb_function_ ecb_const int ecb_clz32 (uint32_t x)
538 ecb_clz32 (uint32_t x)
539 { 536 {
540#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) 537#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
541 unsigned long r; 538 unsigned long r;
542 _BitScanReverse (&r, x); 539 _BitScanReverse (&r, x);
543 return (int)r; 540 return (int)r;
568 return table [x >> 26]; 565 return table [x >> 26];
569#endif 566#endif
570 } 567 }
571 568
572 ecb_function_ ecb_const int ecb_clz64 (uint64_t x); 569 ecb_function_ ecb_const int ecb_clz64 (uint64_t x);
573 ecb_function_ ecb_const int 570 ecb_function_ ecb_const int ecb_clz64 (uint64_t x)
574 ecb_clz64 (uint64_t x)
575 { 571 {
576#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) 572#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
577 unsigned long r; 573 unsigned long r;
578 _BitScanReverse64 (&r, x); 574 _BitScanReverse64 (&r, x);
579 return (int)r; 575 return (int)r;
583 return ecb_clz32 (l ? l : x) + shift; 579 return ecb_clz32 (l ? l : x) + shift;
584#endif 580#endif
585 } 581 }
586 582
587 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); 583 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
588 ecb_function_ ecb_const int 584 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x)
589 ecb_popcount32 (uint32_t x)
590 { 585 {
591 x -= (x >> 1) & 0x55555555; 586 x -= (x >> 1) & 0x55555555;
592 x = ((x >> 2) & 0x33333333) + (x & 0x33333333); 587 x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
593 x = ((x >> 4) + x) & 0x0f0f0f0f; 588 x = ((x >> 4) + x) & 0x0f0f0f0f;
594 x *= 0x01010101; 589 x *= 0x01010101;
667 662
668 return x; 663 return x;
669} 664}
670 665
671ecb_function_ ecb_const int ecb_popcount64 (uint64_t x); 666ecb_function_ ecb_const int ecb_popcount64 (uint64_t x);
672ecb_function_ ecb_const int 667ecb_function_ ecb_const int ecb_popcount64 (uint64_t x)
673ecb_popcount64 (uint64_t x)
674{ 668{
675 /* popcount64 is only available on 64 bit cpus as gcc builtin. */ 669 /* popcount64 is only available on 64 bit cpus as gcc builtin. */
676 /* also, gcc/clang make this surprisingly difficult to use */ 670 /* also, gcc/clang make this surprisingly difficult to use */
677#if (__SIZEOF_LONG__ == 8) && (ECB_GCC_VERSION(3,4) || ECB_CLANG_BUILTIN (__builtin_popcountl)) 671#if (__SIZEOF_LONG__ == 8) && (ECB_GCC_VERSION(3,4) || ECB_CLANG_BUILTIN (__builtin_popcountl))
678 return __builtin_popcountl (x); 672 return __builtin_popcountl (x);
679#else 673#else
680 return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); 674 return ecb_popcount32 (x) + ecb_popcount32 (x >> 32);
681#endif 675#endif
682} 676}
683 677
684ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count);
685ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count);
686ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count);
687ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count);
688ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count);
689ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count);
690ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count);
691ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count);
692
693ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); } 678ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> (-count & 7)) | (x << (count & 7)); }
694ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); } 679ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << (-count & 7)) | (x >> (count & 7)); }
695ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); } 680ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (-count & 15)) | (x << (count & 15)); }
696ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); } 681ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (-count & 15)) | (x >> (count & 15)); }
697ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); } 682ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (-count & 31)) | (x << (count & 31)); }
698ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); } 683ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (-count & 31)) | (x >> (count & 31)); }
699ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); } 684ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (-count & 63)) | (x << (count & 63)); }
700ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); } 685ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (-count & 63)) | (x >> (count & 63)); }
701 686
702#if ECB_CPP 687#if ECB_CPP
703 688
704inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } 689inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); }
705inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } 690inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); }
750 #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x))) 735 #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x)))
751 #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x))) 736 #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x)))
752 #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x))) 737 #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x)))
753#else 738#else
754 ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); 739 ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x);
755 ecb_function_ ecb_const uint16_t 740 ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x)
756 ecb_bswap16 (uint16_t x)
757 { 741 {
758 return ecb_rotl16 (x, 8); 742 return ecb_rotl16 (x, 8);
759 } 743 }
760 744
761 ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x); 745 ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x);
762 ecb_function_ ecb_const uint32_t 746 ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x)
763 ecb_bswap32 (uint32_t x)
764 { 747 {
765 return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); 748 return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16);
766 } 749 }
767 750
768 ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x); 751 ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x);
769 ecb_function_ ecb_const uint64_t 752 ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x)
770 ecb_bswap64 (uint64_t x)
771 { 753 {
772 return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); 754 return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32);
773 } 755 }
774#endif 756#endif
775 757
782#endif 764#endif
783 765
784/* try to tell the compiler that some condition is definitely true */ 766/* try to tell the compiler that some condition is definitely true */
785#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 767#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0
786 768
787ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); 769ecb_inline uint32_t ecb_byteorder_helper (void);
788ecb_inline ecb_const uint32_t 770ecb_inline uint32_t ecb_byteorder_helper (void)
789ecb_byteorder_helper (void)
790{ 771{
791 /* the union code still generates code under pressure in gcc, */ 772 /* the union code still generates code under pressure in gcc, */
792 /* but less than using pointers, and always seems to */ 773 /* but less than using pointers, and always seems to */
793 /* successfully return a constant. */ 774 /* successfully return a constant. */
794 /* the reason why we have this horrible preprocessor mess */ 775 /* the reason why we have this horrible preprocessor mess */
810 } u = { 0x11, 0x22, 0x33, 0x44 }; 791 } u = { 0x11, 0x22, 0x33, 0x44 };
811 return u.u; 792 return u.u;
812#endif 793#endif
813} 794}
814 795
815ecb_inline ecb_const ecb_bool ecb_big_endian (void);
816ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } 796ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; }
817ecb_inline ecb_const ecb_bool ecb_little_endian (void);
818ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } 797ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; }
819 798
820/*****************************************************************************/ 799/*****************************************************************************/
821/* unaligned load/store */ 800/* unaligned load/store */
822 801
889 868
890/*****************************************************************************/ 869/*****************************************************************************/
891/* pointer/integer hashing */ 870/* pointer/integer hashing */
892 871
893/* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */ 872/* based on hash by Chris Wellons, https://nullprogram.com/blog/2018/07/31/ */
894ecb_function_ uint32_t ecb_mix32 (uint32_t v); 873ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v);
895ecb_function_ uint32_t ecb_mix32 (uint32_t v) 874ecb_function_ ecb_const uint32_t ecb_mix32 (uint32_t v)
896{ 875{
897 v ^= v >> 16; v *= 0x7feb352dU; 876 v ^= v >> 16; v *= 0x7feb352dU;
898 v ^= v >> 15; v *= 0x846ca68bU; 877 v ^= v >> 15; v *= 0x846ca68bU;
899 v ^= v >> 16; 878 v ^= v >> 16;
900 return v; 879 return v;
901} 880}
902 881
903ecb_function_ uint32_t ecb_unmix32 (uint32_t v); 882ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v);
904ecb_function_ uint32_t ecb_unmix32 (uint32_t v) 883ecb_function_ ecb_const uint32_t ecb_unmix32 (uint32_t v)
905{ 884{
906 v ^= v >> 16 ; v *= 0x43021123U; 885 v ^= v >> 16 ; v *= 0x43021123U;
907 v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U; 886 v ^= v >> 15 ^ v >> 30; v *= 0x1d69e2a5U;
908 v ^= v >> 16 ; 887 v ^= v >> 16 ;
909 return v; 888 return v;
910} 889}
911 890
912/* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */ 891/* based on splitmix64, by Sebastiona Vigna, https://prng.di.unimi.it/splitmix64.c */
913ecb_function_ uint64_t ecb_mix64 (uint64_t v); 892ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v);
914ecb_function_ uint64_t ecb_mix64 (uint64_t v) 893ecb_function_ ecb_const uint64_t ecb_mix64 (uint64_t v)
915{ 894{
916 v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U; 895 v ^= v >> 30; v *= 0xbf58476d1ce4e5b9U;
917 v ^= v >> 27; v *= 0x94d049bb133111ebU; 896 v ^= v >> 27; v *= 0x94d049bb133111ebU;
918 v ^= v >> 31; 897 v ^= v >> 31;
919 return v; 898 return v;
920} 899}
921 900
922ecb_function_ uint64_t ecb_unmix64 (uint64_t v); 901ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v);
923ecb_function_ uint64_t ecb_unmix64 (uint64_t v) 902ecb_function_ ecb_const uint64_t ecb_unmix64 (uint64_t v)
924{ 903{
925 v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U; 904 v ^= v >> 31 ^ v >> 62; v *= 0x319642b2d24d8ec3U;
926 v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U; 905 v ^= v >> 27 ^ v >> 54; v *= 0x96de1b173f119089U;
927 v ^= v >> 30 ^ v >> 60; 906 v ^= v >> 30 ^ v >> 60;
928 return v; 907 return v;
929} 908}
930 909
931ecb_function_ uintptr_t ecb_ptrmix (void *p); 910ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p);
932ecb_function_ uintptr_t ecb_ptrmix (void *p) 911ecb_function_ ecb_const uintptr_t ecb_ptrmix (void *p)
933{ 912{
934 #if ECB_PTRSIZE <= 4 913 #if ECB_PTRSIZE <= 4
935 return ecb_mix32 ((uint32_t)p); 914 return ecb_mix32 ((uint32_t)p);
936 #else 915 #else
937 return ecb_mix64 ((uint64_t)p); 916 return ecb_mix64 ((uint64_t)p);
938 #endif 917 #endif
939} 918}
940 919
941ecb_function_ void *ecb_ptrunmix (uintptr_t v); 920ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v);
942ecb_function_ void *ecb_ptrunmix (uintptr_t v) 921ecb_function_ ecb_const void *ecb_ptrunmix (uintptr_t v)
943{ 922{
944 #if ECB_PTRSIZE <= 4 923 #if ECB_PTRSIZE <= 4
945 return (void *)ecb_unmix32 (v); 924 return (void *)ecb_unmix32 (v);
946 #else 925 #else
947 return (void *)ecb_unmix64 (v); 926 return (void *)ecb_unmix64 (v);
970ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); } 949ecb_inline uint_fast8_t ecb_gray_encode8 (uint_fast8_t b) { return b ^ (b >> 1); }
971ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); } 950ecb_inline uint_fast16_t ecb_gray_encode16 (uint_fast16_t b) { return b ^ (b >> 1); }
972ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); } 951ecb_inline uint_fast32_t ecb_gray_encode32 (uint_fast32_t b) { return b ^ (b >> 1); }
973ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); } 952ecb_inline uint_fast64_t ecb_gray_encode64 (uint_fast64_t b) { return b ^ (b >> 1); }
974 953
975ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g); 954ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g);
976ecb_function_ uint8_t ecb_gray_decode8 (uint8_t g) 955ecb_function_ ecb_const uint8_t ecb_gray_decode8 (uint8_t g)
977{ 956{
978 g ^= g >> 1; 957 g ^= g >> 1;
979 g ^= g >> 2; 958 g ^= g >> 2;
980 g ^= g >> 4; 959 g ^= g >> 4;
981 960
982 return g; 961 return g;
983} 962}
984 963
985ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g); 964ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g);
986ecb_function_ uint16_t ecb_gray_decode16 (uint16_t g) 965ecb_function_ ecb_const uint16_t ecb_gray_decode16 (uint16_t g)
987{ 966{
988 g ^= g >> 1; 967 g ^= g >> 1;
989 g ^= g >> 2; 968 g ^= g >> 2;
990 g ^= g >> 4; 969 g ^= g >> 4;
991 g ^= g >> 8; 970 g ^= g >> 8;
992 971
993 return g; 972 return g;
994} 973}
995 974
996ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g); 975ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g);
997ecb_function_ uint32_t ecb_gray_decode32 (uint32_t g) 976ecb_function_ ecb_const uint32_t ecb_gray_decode32 (uint32_t g)
998{ 977{
999 g ^= g >> 1; 978 g ^= g >> 1;
1000 g ^= g >> 2; 979 g ^= g >> 2;
1001 g ^= g >> 4; 980 g ^= g >> 4;
1002 g ^= g >> 8; 981 g ^= g >> 8;
1003 g ^= g >> 16; 982 g ^= g >> 16;
1004 983
1005 return g; 984 return g;
1006} 985}
1007 986
1008ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g); 987ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g);
1009ecb_function_ uint64_t ecb_gray_decode64 (uint64_t g) 988ecb_function_ ecb_const uint64_t ecb_gray_decode64 (uint64_t g)
1010{ 989{
1011 g ^= g >> 1; 990 g ^= g >> 1;
1012 g ^= g >> 2; 991 g ^= g >> 2;
1013 g ^= g >> 4; 992 g ^= g >> 4;
1014 g ^= g >> 8; 993 g ^= g >> 8;
1035/*****************************************************************************/ 1014/*****************************************************************************/
1036/* 2d hilbert curves */ 1015/* 2d hilbert curves */
1037 1016
1038/* algorithm from the book Hacker's Delight, modified to not */ 1017/* algorithm from the book Hacker's Delight, modified to not */
1039/* run into undefined behaviour for n==16 */ 1018/* run into undefined behaviour for n==16 */
1040static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s); 1019ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s);
1041static uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s) 1020ecb_function_ ecb_const uint32_t ecb_hilbert2d_index_to_coord32 (int n, uint32_t s)
1042{ 1021{
1043 uint32_t comp, swap, cs, t, sr; 1022 uint32_t comp, swap, cs, t, sr;
1044 1023
1045 /* pad s on the left (unused) bits with 01 (no change groups) */ 1024 /* pad s on the left (unused) bits with 01 (no change groups) */
1046 s |= 0x55555555U << n << n; 1025 s |= 0x55555555U << n << n;
1080 /* now s contains two 16-bit coordinates */ 1059 /* now s contains two 16-bit coordinates */
1081 return s; 1060 return s;
1082} 1061}
1083 1062
1084/* 64 bit, a straightforward extension to the 32 bit case */ 1063/* 64 bit, a straightforward extension to the 32 bit case */
1085static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s); 1064ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s);
1086static uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s) 1065ecb_function_ ecb_const uint64_t ecb_hilbert2d_index_to_coord64 (int n, uint64_t s)
1087{ 1066{
1088 uint64_t comp, swap, cs, t, sr; 1067 uint64_t comp, swap, cs, t, sr;
1089 1068
1090 /* pad s on the left (unused) bits with 01 (no change groups) */ 1069 /* pad s on the left (unused) bits with 01 (no change groups) */
1091 s |= 0x5555555555555555U << n << n; 1070 s |= 0x5555555555555555U << n << n;
1129} 1108}
1130 1109
1131/* algorithm from the book Hacker's Delight, but a similar algorithm*/ 1110/* algorithm from the book Hacker's Delight, but a similar algorithm*/
1132/* is given in https://doi.org/10.1002/spe.4380160103 */ 1111/* is given in https://doi.org/10.1002/spe.4380160103 */
1133/* this has been slightly improved over the original version */ 1112/* this has been slightly improved over the original version */
1134ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy); 1113ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy);
1135ecb_function_ uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy) 1114ecb_function_ ecb_const uint32_t ecb_hilbert2d_coord_to_index32 (int n, uint32_t xy)
1136{ 1115{
1137 uint32_t row; 1116 uint32_t row;
1138 uint32_t state = 0; 1117 uint32_t state = 0;
1139 uint32_t s = 0; 1118 uint32_t s = 0;
1140 1119
1154 1133
1155 return s; 1134 return s;
1156} 1135}
1157 1136
1158/* 64 bit, essentially the same as 32 bit */ 1137/* 64 bit, essentially the same as 32 bit */
1159ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy); 1138ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy);
1160ecb_function_ uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy) 1139ecb_function_ ecb_const uint64_t ecb_hilbert2d_coord_to_index64 (int n, uint64_t xy)
1161{ 1140{
1162 uint32_t row; 1141 uint32_t row;
1163 uint32_t state = 0; 1142 uint32_t state = 0;
1164 uint64_t s = 0; 1143 uint64_t s = 0;
1165 1144
1222 1201
1223/*****************************************************************************/ 1202/*****************************************************************************/
1224/* IEEE 754-2008 half float conversions */ 1203/* IEEE 754-2008 half float conversions */
1225 1204
1226ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); 1205ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
1227ecb_function_ ecb_const uint32_t 1206ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x)
1228ecb_binary16_to_binary32 (uint32_t x)
1229{ 1207{
1230 unsigned int s = (x & 0x8000) << (31 - 15); 1208 unsigned int s = (x & 0x8000) << (31 - 15);
1231 int e = (x >> 10) & 0x001f; 1209 int e = (x >> 10) & 0x001f;
1232 unsigned int m = x & 0x03ff; 1210 unsigned int m = x & 0x03ff;
1233 1211
1254 1232
1255 return s | (e << 23) | (m << (23 - 10)); 1233 return s | (e << 23) | (m << (23 - 10));
1256} 1234}
1257 1235
1258ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); 1236ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x);
1259ecb_function_ ecb_const uint16_t 1237ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x)
1260ecb_binary32_to_binary16 (uint32_t x)
1261{ 1238{
1262 unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ 1239 unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */
1263 int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ 1240 int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */
1264 unsigned int m = x & 0x007fffff; 1241 unsigned int m = x & 0x007fffff;
1265 1242
1406#define ECB_I2A_U32_DIGITS 10 1383#define ECB_I2A_U32_DIGITS 10
1407#define ECB_I2A_I64_DIGITS 20 1384#define ECB_I2A_I64_DIGITS 20
1408#define ECB_I2A_U64_DIGITS 21 1385#define ECB_I2A_U64_DIGITS 21
1409#define ECB_I2A_MAX_DIGITS 21 1386#define ECB_I2A_MAX_DIGITS 21
1410 1387
1411ecb_inline char * 1388ecb_function_ char * ecb_i2a_u32 (char *ptr, uint32_t u);
1412ecb_i2a_u32 (char *ptr, uint32_t u) 1389ecb_function_ char * ecb_i2a_u32 (char *ptr, uint32_t u)
1413{ 1390{
1414 #if ECB_64BIT_NATIVE 1391 #if ECB_64BIT_NATIVE
1415 if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) 1392 if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
1416 ptr = ecb_i2a_x10 (ptr, u); 1393 ptr = ecb_i2a_x10 (ptr, u);
1417 else /* x10 almost, but not fully, covers 32 bit */ 1394 else /* x10 almost, but not fully, covers 32 bit */
1447 #endif 1424 #endif
1448 1425
1449 return ptr; 1426 return ptr;
1450} 1427}
1451 1428
1452ecb_inline char * 1429ecb_function_ char * ecb_i2a_i32 (char *ptr, int32_t v);
1453ecb_i2a_i32 (char *ptr, int32_t v) 1430ecb_function_ char * ecb_i2a_i32 (char *ptr, int32_t v)
1454{ 1431{
1455 *ptr = '-'; ptr += v < 0; 1432 *ptr = '-'; ptr += v < 0;
1456 uint32_t u = v < 0 ? -(uint32_t)v : v; 1433 uint32_t u = v < 0 ? -(uint32_t)v : v;
1457 1434
1458 #if ECB_64BIT_NATIVE 1435 #if ECB_64BIT_NATIVE
1462 #endif 1439 #endif
1463 1440
1464 return ptr; 1441 return ptr;
1465} 1442}
1466 1443
1467ecb_inline char * 1444ecb_function_ char * ecb_i2a_u64 (char *ptr, uint64_t u);
1468ecb_i2a_u64 (char *ptr, uint64_t u) 1445ecb_function_ char * ecb_i2a_u64 (char *ptr, uint64_t u)
1469{ 1446{
1470 #if ECB_64BIT_NATIVE 1447 #if ECB_64BIT_NATIVE
1471 if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) 1448 if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
1472 ptr = ecb_i2a_x10 (ptr, u); 1449 ptr = ecb_i2a_x10 (ptr, u);
1473 else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) 1450 else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000))
1503 #endif 1480 #endif
1504 1481
1505 return ptr; 1482 return ptr;
1506} 1483}
1507 1484
1508ecb_inline char * 1485ecb_function_ char * ecb_i2a_i64 (char *ptr, int64_t v);
1509ecb_i2a_i64 (char *ptr, int64_t v) 1486ecb_function_ char * ecb_i2a_i64 (char *ptr, int64_t v)
1510{ 1487{
1511 *ptr = '-'; ptr += v < 0; 1488 *ptr = '-'; ptr += v < 0;
1512 uint64_t u = v < 0 ? -(uint64_t)v : v; 1489 uint64_t u = v < 0 ? -(uint64_t)v : v;
1513 1490
1514 #if ECB_64BIT_NATIVE 1491 #if ECB_64BIT_NATIVE
1591 #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) 1568 #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e))
1592 #endif 1569 #endif
1593 1570
1594 /* convert a float to ieee single/binary32 */ 1571 /* convert a float to ieee single/binary32 */
1595 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); 1572 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x);
1596 ecb_function_ ecb_const uint32_t 1573 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x)
1597 ecb_float_to_binary32 (float x)
1598 { 1574 {
1599 uint32_t r; 1575 uint32_t r;
1600 1576
1601 #if ECB_STDFP 1577 #if ECB_STDFP
1602 memcpy (&r, &x, 4); 1578 memcpy (&r, &x, 4);
1631 return r; 1607 return r;
1632 } 1608 }
1633 1609
1634 /* converts an ieee single/binary32 to a float */ 1610 /* converts an ieee single/binary32 to a float */
1635 ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x); 1611 ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x);
1636 ecb_function_ ecb_const float 1612 ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x)
1637 ecb_binary32_to_float (uint32_t x)
1638 { 1613 {
1639 float r; 1614 float r;
1640 1615
1641 #if ECB_STDFP 1616 #if ECB_STDFP
1642 memcpy (&r, &x, 4); 1617 memcpy (&r, &x, 4);
1661 return r; 1636 return r;
1662 } 1637 }
1663 1638
1664 /* convert a double to ieee double/binary64 */ 1639 /* convert a double to ieee double/binary64 */
1665 ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x); 1640 ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x);
1666 ecb_function_ ecb_const uint64_t 1641 ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x)
1667 ecb_double_to_binary64 (double x)
1668 { 1642 {
1669 uint64_t r; 1643 uint64_t r;
1670 1644
1671 #if ECB_STDFP 1645 #if ECB_STDFP
1672 memcpy (&r, &x, 8); 1646 memcpy (&r, &x, 8);
1701 return r; 1675 return r;
1702 } 1676 }
1703 1677
1704 /* converts an ieee double/binary64 to a double */ 1678 /* converts an ieee double/binary64 to a double */
1705 ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x); 1679 ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x);
1706 ecb_function_ ecb_const double 1680 ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x)
1707 ecb_binary64_to_double (uint64_t x)
1708 { 1681 {
1709 double r; 1682 double r;
1710 1683
1711 #if ECB_STDFP 1684 #if ECB_STDFP
1712 memcpy (&r, &x, 8); 1685 memcpy (&r, &x, 8);
1731 return r; 1704 return r;
1732 } 1705 }
1733 1706
1734 /* convert a float to ieee half/binary16 */ 1707 /* convert a float to ieee half/binary16 */
1735 ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x); 1708 ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x);
1736 ecb_function_ ecb_const uint16_t 1709 ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x)
1737 ecb_float_to_binary16 (float x)
1738 { 1710 {
1739 return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x)); 1711 return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x));
1740 } 1712 }
1741 1713
1742 /* convert an ieee half/binary16 to float */ 1714 /* convert an ieee half/binary16 to float */
1743 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x); 1715 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
1744 ecb_function_ ecb_const float 1716 ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x)
1745 ecb_binary16_to_float (uint16_t x)
1746 { 1717 {
1747 return ecb_binary32_to_float (ecb_binary16_to_binary32 (x)); 1718 return ecb_binary32_to_float (ecb_binary16_to_binary32 (x));
1748 } 1719 }
1749 1720
1750#endif 1721#endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines