--- rxvt-unicode/src/rxvtimg.C 2012/07/07 07:00:17 1.102 +++ rxvt-unicode/src/rxvtimg.C 2012/07/14 08:26:56 1.103 @@ -351,35 +351,28 @@ unsigned char *src = row; uint32_t *dst = (uint32_t *)line; - if (!pb_has_alpha) - for (int x = 0; x < width; x++) - { - uint8_t r = *src++; - uint8_t g = *src++; - uint8_t b = *src++; - - uint32_t v = (255 << 24) | (r << 16) | (g << 8) | b; - - if (ecb_big_endian () ? !byte_order_mismatch : byte_order_mismatch) - v = ecb_bswap32 (v); - - *dst++ = v; - } - else - for (int x = 0; x < width; x++) - { - uint32_t v = *(uint32_t *)src; src += 4; - - if (ecb_big_endian ()) - v = ecb_bswap32 (v); + for (int x = 0; x < width; x++) + { + uint8_t r = *src++; + uint8_t g = *src++; + uint8_t b = *src++; + uint8_t a = *src; + + // this is done so it can be jump-free, but newer gcc's clone inner the loop + a = pb_has_alpha ? a : 255; + src += pb_has_alpha; + + r = (r * a + 127) / 255; + g = (g * a + 127) / 255; + b = (b * a + 127) / 255; - v = ecb_rotl32 (v, 8); // abgr to bgra + uint32_t v = (a << 24) | (r << 16) | (g << 8) | b; - if (!byte_order_mismatch) - v = ecb_bswap32 (v); + if (ecb_big_endian () ? !byte_order_mismatch : byte_order_mismatch) + v = ecb_bswap32 (v); - *dst++ = v; - } + *dst++ = v; + } row += rowstride; line += xi.bytes_per_line; @@ -606,9 +599,9 @@ // why the hell does XRenderSetPictureTransform want a writable matrix :( // that keeps us from just static const'ing this matrix. XTransform h_double = { - 32768, 0, 0, - 0, 65536, 0, - 0, 0, 65536 + 0x8000, 0, 0, + 0, 0x1000, 0, + 0, 0, 0x1000 }; XRenderSetPictureFilter (cc.dpy, cc.src, "nearest", 0, 0); @@ -640,9 +633,9 @@ }; XTransform h_halve = { - 131072, 0, 0, - 0, 65536, 0, - 0, 0, 65536 + 0x2000, 0, 0, + 0, 0x1000, 0, + 0, 0, 0x1000 }; XRenderSetPictureFilter (cc.dpy, cc2.src, "nearest", 0, 0);