… | |
… | |
468 | #define ecb_ld64(x) (ecb_clz64 (x) ^ 63) |
468 | #define ecb_ld64(x) (ecb_clz64 (x) ^ 63) |
469 | #define ecb_popcount32(x) __builtin_popcount (x) |
469 | #define ecb_popcount32(x) __builtin_popcount (x) |
470 | /* ecb_popcount64 is more difficult, see below */ |
470 | /* ecb_popcount64 is more difficult, see below */ |
471 | #else |
471 | #else |
472 | ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); |
472 | ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); |
473 | ecb_function_ ecb_const int |
473 | ecb_function_ ecb_const int ecb_ctz32 (uint32_t x) |
474 | ecb_ctz32 (uint32_t x) |
|
|
475 | { |
474 | { |
476 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
475 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
477 | unsigned long r; |
476 | unsigned long r; |
478 | _BitScanForward (&r, x); |
477 | _BitScanForward (&r, x); |
479 | return (int)r; |
478 | return (int)r; |
… | |
… | |
518 | return r; |
517 | return r; |
519 | #endif |
518 | #endif |
520 | } |
519 | } |
521 | |
520 | |
522 | ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); |
521 | ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); |
523 | ecb_function_ ecb_const int |
522 | ecb_function_ ecb_const int ecb_ctz64 (uint64_t x) |
524 | ecb_ctz64 (uint64_t x) |
|
|
525 | { |
523 | { |
526 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
524 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
527 | unsigned long r; |
525 | unsigned long r; |
528 | _BitScanForward64 (&r, x); |
526 | _BitScanForward64 (&r, x); |
529 | return (int)r; |
527 | return (int)r; |
… | |
… | |
532 | return ecb_ctz32 (x >> shift) + shift; |
530 | return ecb_ctz32 (x >> shift) + shift; |
533 | #endif |
531 | #endif |
534 | } |
532 | } |
535 | |
533 | |
536 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x); |
534 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x); |
537 | ecb_function_ ecb_const int |
535 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x) |
538 | ecb_clz32 (uint32_t x) |
|
|
539 | { |
536 | { |
540 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
537 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
541 | unsigned long r; |
538 | unsigned long r; |
542 | _BitScanReverse (&r, x); |
539 | _BitScanReverse (&r, x); |
543 | return (int)r; |
540 | return (int)r; |
… | |
… | |
568 | return table [x >> 26]; |
565 | return table [x >> 26]; |
569 | #endif |
566 | #endif |
570 | } |
567 | } |
571 | |
568 | |
572 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x); |
569 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x); |
573 | ecb_function_ ecb_const int |
570 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x) |
574 | ecb_clz64 (uint64_t x) |
|
|
575 | { |
571 | { |
576 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
572 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
577 | unsigned long r; |
573 | unsigned long r; |
578 | _BitScanReverse64 (&r, x); |
574 | _BitScanReverse64 (&r, x); |
579 | return (int)r; |
575 | return (int)r; |
… | |
… | |
667 | |
663 | |
668 | return x; |
664 | return x; |
669 | } |
665 | } |
670 | |
666 | |
671 | ecb_function_ ecb_const int ecb_popcount64 (uint64_t x); |
667 | ecb_function_ ecb_const int ecb_popcount64 (uint64_t x); |
672 | ecb_function_ ecb_const int |
668 | ecb_function_ ecb_const int ecb_popcount64 (uint64_t x) |
673 | ecb_popcount64 (uint64_t x) |
|
|
674 | { |
669 | { |
675 | /* popcount64 is only available on 64 bit cpus as gcc builtin. */ |
670 | /* popcount64 is only available on 64 bit cpus as gcc builtin. */ |
676 | /* also, gcc/clang make this surprisingly difficult to use */ |
671 | /* also, gcc/clang make this surprisingly difficult to use */ |
677 | #if (__SIZEOF_LONG__ == 8) && (ECB_GCC_VERSION(3,4) || ECB_CLANG_BUILTIN (__builtin_popcountl)) |
672 | #if (__SIZEOF_LONG__ == 8) && (ECB_GCC_VERSION(3,4) || ECB_CLANG_BUILTIN (__builtin_popcountl)) |
678 | return __builtin_popcountl (x); |
673 | return __builtin_popcountl (x); |
… | |
… | |
750 | #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x))) |
745 | #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x))) |
751 | #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x))) |
746 | #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x))) |
752 | #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x))) |
747 | #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x))) |
753 | #else |
748 | #else |
754 | ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); |
749 | ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); |
755 | ecb_function_ ecb_const uint16_t |
750 | ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x) |
756 | ecb_bswap16 (uint16_t x) |
|
|
757 | { |
751 | { |
758 | return ecb_rotl16 (x, 8); |
752 | return ecb_rotl16 (x, 8); |
759 | } |
753 | } |
760 | |
754 | |
761 | ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x); |
755 | ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x); |
762 | ecb_function_ ecb_const uint32_t |
756 | ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x) |
763 | ecb_bswap32 (uint32_t x) |
|
|
764 | { |
757 | { |
765 | return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); |
758 | return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); |
766 | } |
759 | } |
767 | |
760 | |
768 | ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x); |
761 | ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x); |
769 | ecb_function_ ecb_const uint64_t |
762 | ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x) |
770 | ecb_bswap64 (uint64_t x) |
|
|
771 | { |
763 | { |
772 | return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); |
764 | return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); |
773 | } |
765 | } |
774 | #endif |
766 | #endif |
775 | |
767 | |
… | |
… | |
783 | |
775 | |
784 | /* try to tell the compiler that some condition is definitely true */ |
776 | /* try to tell the compiler that some condition is definitely true */ |
785 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
777 | #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
786 | |
778 | |
787 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); |
779 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); |
788 | ecb_inline ecb_const uint32_t |
780 | ecb_inline ecb_const uint32_t ecb_byteorder_helper (void) |
789 | ecb_byteorder_helper (void) |
|
|
790 | { |
781 | { |
791 | /* the union code still generates code under pressure in gcc, */ |
782 | /* the union code still generates code under pressure in gcc, */ |
792 | /* but less than using pointers, and always seems to */ |
783 | /* but less than using pointers, and always seems to */ |
793 | /* successfully return a constant. */ |
784 | /* successfully return a constant. */ |
794 | /* the reason why we have this horrible preprocessor mess */ |
785 | /* the reason why we have this horrible preprocessor mess */ |
… | |
… | |
1222 | |
1213 | |
1223 | /*****************************************************************************/ |
1214 | /*****************************************************************************/ |
1224 | /* IEEE 754-2008 half float conversions */ |
1215 | /* IEEE 754-2008 half float conversions */ |
1225 | |
1216 | |
1226 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
1217 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
1227 | ecb_function_ ecb_const uint32_t |
1218 | ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x) |
1228 | ecb_binary16_to_binary32 (uint32_t x) |
|
|
1229 | { |
1219 | { |
1230 | unsigned int s = (x & 0x8000) << (31 - 15); |
1220 | unsigned int s = (x & 0x8000) << (31 - 15); |
1231 | int e = (x >> 10) & 0x001f; |
1221 | int e = (x >> 10) & 0x001f; |
1232 | unsigned int m = x & 0x03ff; |
1222 | unsigned int m = x & 0x03ff; |
1233 | |
1223 | |
… | |
… | |
1254 | |
1244 | |
1255 | return s | (e << 23) | (m << (23 - 10)); |
1245 | return s | (e << 23) | (m << (23 - 10)); |
1256 | } |
1246 | } |
1257 | |
1247 | |
1258 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
1248 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
1259 | ecb_function_ ecb_const uint16_t |
1249 | ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x) |
1260 | ecb_binary32_to_binary16 (uint32_t x) |
|
|
1261 | { |
1250 | { |
1262 | unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
1251 | unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
1263 | int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
1252 | int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
1264 | unsigned int m = x & 0x007fffff; |
1253 | unsigned int m = x & 0x007fffff; |
1265 | |
1254 | |
… | |
… | |
1406 | #define ECB_I2A_U32_DIGITS 10 |
1395 | #define ECB_I2A_U32_DIGITS 10 |
1407 | #define ECB_I2A_I64_DIGITS 20 |
1396 | #define ECB_I2A_I64_DIGITS 20 |
1408 | #define ECB_I2A_U64_DIGITS 21 |
1397 | #define ECB_I2A_U64_DIGITS 21 |
1409 | #define ECB_I2A_MAX_DIGITS 21 |
1398 | #define ECB_I2A_MAX_DIGITS 21 |
1410 | |
1399 | |
1411 | ecb_inline char * |
|
|
1412 | ecb_i2a_u32 (char *ptr, uint32_t u) |
1400 | ecb_function_ char * ecb_i2a_u32 (char *ptr, uint32_t u) |
|
|
1401 | ecb_function_ char * ecb_i2a_u32 (char *ptr, uint32_t u) |
1413 | { |
1402 | { |
1414 | #if ECB_64BIT_NATIVE |
1403 | #if ECB_64BIT_NATIVE |
1415 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1404 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1416 | ptr = ecb_i2a_x10 (ptr, u); |
1405 | ptr = ecb_i2a_x10 (ptr, u); |
1417 | else /* x10 almost, but not fully, covers 32 bit */ |
1406 | else /* x10 almost, but not fully, covers 32 bit */ |
… | |
… | |
1447 | #endif |
1436 | #endif |
1448 | |
1437 | |
1449 | return ptr; |
1438 | return ptr; |
1450 | } |
1439 | } |
1451 | |
1440 | |
1452 | ecb_inline char * |
1441 | ecb_function_ char * ecb_i2a_i32 (char *ptr, int32_t v); |
1453 | ecb_i2a_i32 (char *ptr, int32_t v) |
1442 | ecb_function_ char * ecb_i2a_i32 (char *ptr, int32_t v) |
1454 | { |
1443 | { |
1455 | *ptr = '-'; ptr += v < 0; |
1444 | *ptr = '-'; ptr += v < 0; |
1456 | uint32_t u = v < 0 ? -(uint32_t)v : v; |
1445 | uint32_t u = v < 0 ? -(uint32_t)v : v; |
1457 | |
1446 | |
1458 | #if ECB_64BIT_NATIVE |
1447 | #if ECB_64BIT_NATIVE |
… | |
… | |
1462 | #endif |
1451 | #endif |
1463 | |
1452 | |
1464 | return ptr; |
1453 | return ptr; |
1465 | } |
1454 | } |
1466 | |
1455 | |
1467 | ecb_inline char * |
1456 | ecb_function_ char * ecb_i2a_u64 (char *ptr, uint64_t u); |
1468 | ecb_i2a_u64 (char *ptr, uint64_t u) |
1457 | ecb_function_ char * ecb_i2a_u64 (char *ptr, uint64_t u) |
1469 | { |
1458 | { |
1470 | #if ECB_64BIT_NATIVE |
1459 | #if ECB_64BIT_NATIVE |
1471 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1460 | if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1472 | ptr = ecb_i2a_x10 (ptr, u); |
1461 | ptr = ecb_i2a_x10 (ptr, u); |
1473 | else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) |
1462 | else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) |
… | |
… | |
1503 | #endif |
1492 | #endif |
1504 | |
1493 | |
1505 | return ptr; |
1494 | return ptr; |
1506 | } |
1495 | } |
1507 | |
1496 | |
1508 | ecb_inline char * |
|
|
1509 | ecb_i2a_i64 (char *ptr, int64_t v) |
1497 | ecb_function_ char * ecb_i2a_i64 (char *ptr, int64_t v) |
|
|
1498 | ecb_function_ char * ecb_i2a_i64 (char *ptr, int64_t v) |
1510 | { |
1499 | { |
1511 | *ptr = '-'; ptr += v < 0; |
1500 | *ptr = '-'; ptr += v < 0; |
1512 | uint64_t u = v < 0 ? -(uint64_t)v : v; |
1501 | uint64_t u = v < 0 ? -(uint64_t)v : v; |
1513 | |
1502 | |
1514 | #if ECB_64BIT_NATIVE |
1503 | #if ECB_64BIT_NATIVE |
… | |
… | |
1591 | #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) |
1580 | #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) |
1592 | #endif |
1581 | #endif |
1593 | |
1582 | |
1594 | /* convert a float to ieee single/binary32 */ |
1583 | /* convert a float to ieee single/binary32 */ |
1595 | ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); |
1584 | ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); |
1596 | ecb_function_ ecb_const uint32_t |
1585 | ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x) |
1597 | ecb_float_to_binary32 (float x) |
|
|
1598 | { |
1586 | { |
1599 | uint32_t r; |
1587 | uint32_t r; |
1600 | |
1588 | |
1601 | #if ECB_STDFP |
1589 | #if ECB_STDFP |
1602 | memcpy (&r, &x, 4); |
1590 | memcpy (&r, &x, 4); |
… | |
… | |
1631 | return r; |
1619 | return r; |
1632 | } |
1620 | } |
1633 | |
1621 | |
1634 | /* converts an ieee single/binary32 to a float */ |
1622 | /* converts an ieee single/binary32 to a float */ |
1635 | ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x); |
1623 | ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x); |
1636 | ecb_function_ ecb_const float |
1624 | ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x) |
1637 | ecb_binary32_to_float (uint32_t x) |
|
|
1638 | { |
1625 | { |
1639 | float r; |
1626 | float r; |
1640 | |
1627 | |
1641 | #if ECB_STDFP |
1628 | #if ECB_STDFP |
1642 | memcpy (&r, &x, 4); |
1629 | memcpy (&r, &x, 4); |
… | |
… | |
1661 | return r; |
1648 | return r; |
1662 | } |
1649 | } |
1663 | |
1650 | |
1664 | /* convert a double to ieee double/binary64 */ |
1651 | /* convert a double to ieee double/binary64 */ |
1665 | ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x); |
1652 | ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x); |
1666 | ecb_function_ ecb_const uint64_t |
1653 | ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x) |
1667 | ecb_double_to_binary64 (double x) |
|
|
1668 | { |
1654 | { |
1669 | uint64_t r; |
1655 | uint64_t r; |
1670 | |
1656 | |
1671 | #if ECB_STDFP |
1657 | #if ECB_STDFP |
1672 | memcpy (&r, &x, 8); |
1658 | memcpy (&r, &x, 8); |
… | |
… | |
1701 | return r; |
1687 | return r; |
1702 | } |
1688 | } |
1703 | |
1689 | |
1704 | /* converts an ieee double/binary64 to a double */ |
1690 | /* converts an ieee double/binary64 to a double */ |
1705 | ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x); |
1691 | ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x); |
1706 | ecb_function_ ecb_const double |
1692 | ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x) |
1707 | ecb_binary64_to_double (uint64_t x) |
|
|
1708 | { |
1693 | { |
1709 | double r; |
1694 | double r; |
1710 | |
1695 | |
1711 | #if ECB_STDFP |
1696 | #if ECB_STDFP |
1712 | memcpy (&r, &x, 8); |
1697 | memcpy (&r, &x, 8); |
… | |
… | |
1731 | return r; |
1716 | return r; |
1732 | } |
1717 | } |
1733 | |
1718 | |
1734 | /* convert a float to ieee half/binary16 */ |
1719 | /* convert a float to ieee half/binary16 */ |
1735 | ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x); |
1720 | ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x); |
1736 | ecb_function_ ecb_const uint16_t |
1721 | ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x) |
1737 | ecb_float_to_binary16 (float x) |
|
|
1738 | { |
1722 | { |
1739 | return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x)); |
1723 | return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x)); |
1740 | } |
1724 | } |
1741 | |
1725 | |
1742 | /* convert an ieee half/binary16 to float */ |
1726 | /* convert an ieee half/binary16 to float */ |
1743 | ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x); |
1727 | ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x); |
1744 | ecb_function_ ecb_const float |
1728 | ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x) |
1745 | ecb_binary16_to_float (uint16_t x) |
|
|
1746 | { |
1729 | { |
1747 | return ecb_binary32_to_float (ecb_binary16_to_binary32 (x)); |
1730 | return ecb_binary32_to_float (ecb_binary16_to_binary32 (x)); |
1748 | } |
1731 | } |
1749 | |
1732 | |
1750 | #endif |
1733 | #endif |