… | |
… | |
472 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
472 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
473 | unsigned long r; |
473 | unsigned long r; |
474 | _BitScanForward (&r, x); |
474 | _BitScanForward (&r, x); |
475 | return (int)r; |
475 | return (int)r; |
476 | #else |
476 | #else |
477 | int r = 0; |
477 | int r; |
478 | |
|
|
479 | /* todo: use david seal's algorithm */ |
|
|
480 | |
478 | |
481 | x &= ~x + 1; /* this isolates the lowest bit */ |
479 | x &= ~x + 1; /* this isolates the lowest bit */ |
482 | |
480 | |
483 | #if ECB_branchless_on_i386 |
481 | #if 1 |
|
|
482 | /* David Seal's algorithm, Message-ID: <32975@armltd.uucp> from 1994 */ |
|
|
483 | /* This happens to return 32 for x == 0, but the API does not support this */ |
|
|
484 | |
|
|
485 | /* -0 marks unused entries */ |
|
|
486 | static unsigned char table[64] = |
|
|
487 | { |
|
|
488 | 32, 0, 1, 12, 2, 6, -0, 13, 3, -0, 7, -0, -0, -0, -0, 14, |
|
|
489 | 10, 4, -0, -0, 8, -0, -0, 25, -0, -0, -0, -0, -0, 21, 27, 15, |
|
|
490 | 31, 11, 5, -0, -0, -0, -0, -0, 9, -0, -0, 24, -0, -0, 20, 26, |
|
|
491 | 30, -0, -0, -0, -0, 23, -0, 19, 29, -0, 22, 18, 28, 17, 16, -0 |
|
|
492 | }; |
|
|
493 | |
|
|
494 | /* magic constant results in 33 unique values in the upper 6 bits */ |
|
|
495 | x *= 0x0450fbafU; /* == 17 * 65 * 65535 */ |
|
|
496 | |
|
|
497 | r = table [x >> 26]; |
|
|
498 | #elif 0 /* branchless on i386, typically */ |
|
|
499 | r = 0; |
484 | r += !!(x & 0xaaaaaaaa) << 0; |
500 | r += !!(x & 0xaaaaaaaa) << 0; |
485 | r += !!(x & 0xcccccccc) << 1; |
501 | r += !!(x & 0xcccccccc) << 1; |
486 | r += !!(x & 0xf0f0f0f0) << 2; |
502 | r += !!(x & 0xf0f0f0f0) << 2; |
487 | r += !!(x & 0xff00ff00) << 3; |
503 | r += !!(x & 0xff00ff00) << 3; |
488 | r += !!(x & 0xffff0000) << 4; |
504 | r += !!(x & 0xffff0000) << 4; |
489 | #else |
505 | #else /* branchless on modern compilers, typically */ |
|
|
506 | r = 0; |
490 | if (x & 0xaaaaaaaa) r += 1; |
507 | if (x & 0xaaaaaaaa) r += 1; |
491 | if (x & 0xcccccccc) r += 2; |
508 | if (x & 0xcccccccc) r += 2; |
492 | if (x & 0xf0f0f0f0) r += 4; |
509 | if (x & 0xf0f0f0f0) r += 4; |
493 | if (x & 0xff00ff00) r += 8; |
510 | if (x & 0xff00ff00) r += 8; |
494 | if (x & 0xffff0000) r += 16; |
511 | if (x & 0xffff0000) r += 16; |
… | |
… | |
507 | _BitScanForward64 (&r, x); |
524 | _BitScanForward64 (&r, x); |
508 | return (int)r; |
525 | return (int)r; |
509 | #else |
526 | #else |
510 | int shift = x & 0xffffffff ? 0 : 32; |
527 | int shift = x & 0xffffffff ? 0 : 32; |
511 | return ecb_ctz32 (x >> shift) + shift; |
528 | return ecb_ctz32 (x >> shift) + shift; |
|
|
529 | #endif |
|
|
530 | } |
|
|
531 | |
|
|
532 | ecb_function_ ecb_const int ecb_clz32 (uint32_t x); |
|
|
533 | ecb_function_ ecb_const int |
|
|
534 | ecb_clz32 (uint32_t x) |
|
|
535 | { |
|
|
536 | #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
|
|
537 | unsigned long r; |
|
|
538 | _BitScanReverse (&r, x); |
|
|
539 | return (int)r; |
|
|
540 | #else |
|
|
541 | |
|
|
542 | /* Robert Harley's algorithm from comp.arch 1996-12-07 */ |
|
|
543 | /* This happens to return 32 for x == 0, but the API does not support this */ |
|
|
544 | |
|
|
545 | /* -0 marks unused table elements */ |
|
|
546 | static unsigned char table[64] = |
|
|
547 | { |
|
|
548 | 32, 31, -0, 16, -0, 30, 3, -0, 15, -0, -0, -0, 29, 10, 2, -0, |
|
|
549 | -0, -0, 12, 14, 21, -0, 19, -0, -0, 28, -0, 25, -0, 9, 1, -0, |
|
|
550 | 17, -0, 4, -0, -0, -0, 11, -0, 13, 22, 20, -0, 26, -0, -0, 18, |
|
|
551 | 5, -0, -0, 23, -0, 27, -0, 6, -0, 24, 7, -0, 8, -0, 0, -0 |
|
|
552 | }; |
|
|
553 | |
|
|
554 | /* propagate leftmost 1 bit to the right */ |
|
|
555 | x |= x >> 1; |
|
|
556 | x |= x >> 2; |
|
|
557 | x |= x >> 4; |
|
|
558 | x |= x >> 8; |
|
|
559 | x |= x >> 16; |
|
|
560 | |
|
|
561 | /* magic constant results in 33 unique values in the upper 6 bits */ |
|
|
562 | x *= 0x06EB14F9U; /* == 7 * 255 * 255 * 255 */ |
|
|
563 | |
|
|
564 | return table [x >> 26]; |
|
|
565 | #endif |
|
|
566 | } |
|
|
567 | |
|
|
568 | ecb_function_ ecb_const int ecb_clz64 (uint64_t x); |
|
|
569 | ecb_function_ ecb_const int |
|
|
570 | ecb_clz64 (uint64_t x) |
|
|
571 | { |
|
|
572 | #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
|
|
573 | unsigned long r; |
|
|
574 | _BitScanReverse64 (&r, x); |
|
|
575 | return (int)r; |
|
|
576 | #else |
|
|
577 | uint32_t l = x >> 32; |
|
|
578 | int shift = l ? 0 : 32; |
|
|
579 | return ecb_clz32 (l ? l : x) + shift; |
512 | #endif |
580 | #endif |
513 | } |
581 | } |
514 | |
582 | |
515 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
583 | ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
516 | ecb_function_ ecb_const int |
584 | ecb_function_ ecb_const int |