diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-01-31 16:13:16 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-02-07 14:27:47 +0000 |
commit | 60addee9384cb8c589f7abf9020f7d6d7a6f4d63 (patch) | |
tree | 2a55252db920f78421c03e9130a2080cba7793c3 | |
parent | 928cab5ff1d931d00074d8930c41537109814371 (diff) | |
download | qtbase-60addee9384cb8c589f7abf9020f7d6d7a6f4d63.tar.gz |
Improve ARGB32ToRGBA64 conversions
Improves the precision so 255 values map to 65535 exactly.
Change-Id: I366f408e8c6047d52acbed35e9d665249bbaba2b
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r-- | src/gui/painting/qdrawhelper_avx2.cpp | 28 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse4.cpp | 36 |
2 files changed, 29 insertions, 35 deletions
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index f05cc0926e..2b3cc9b226 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -1135,14 +1135,12 @@ static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizety const __m256i src1 = _mm256_unpacklo_epi8(srcVector, srcVector); const __m256i src2 = _mm256_unpackhi_epi8(srcVector, srcVector); if (!cf) { - dst1 = _mm256_unpacklo_epi8(srcVector, zero); - dst2 = _mm256_unpackhi_epi8(srcVector, zero); - const __m256i alpha1 = _mm256_shuffle_epi8(dst1, shuffleMask); - const __m256i alpha2 = _mm256_shuffle_epi8(dst2, shuffleMask); - dst1 = _mm256_mullo_epi16(dst1, alpha1); - dst2 = _mm256_mullo_epi16(dst2, alpha2); - dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 7)); - dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 7)); + const __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask); + const __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask); + dst1 = _mm256_mulhi_epu16(src1, alpha1); + dst2 = _mm256_mulhi_epu16(src2, alpha2); + dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 15)); + dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 15)); dst1 = _mm256_blend_epi16(dst1, src1, 0x88); dst2 = _mm256_blend_epi16(dst2, src2, 0x88); } else { @@ -1171,14 +1169,12 @@ static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizety const __m256i src1 = _mm256_unpacklo_epi8(srcVector, srcVector); const __m256i src2 = _mm256_unpackhi_epi8(srcVector, srcVector); if (!cf) { - dst1 = _mm256_unpacklo_epi8(srcVector, zero); - dst2 = _mm256_unpackhi_epi8(srcVector, zero); - const __m256i alpha1 = _mm256_shuffle_epi8(dst1, shuffleMask); - const __m256i alpha2 = _mm256_shuffle_epi8(dst2, shuffleMask); - dst1 = _mm256_mullo_epi16(dst1, alpha1); - dst2 = _mm256_mullo_epi16(dst2, alpha2); - dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 7)); - dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 7)); + const __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask); + const __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask); + dst1 = _mm256_mulhi_epu16(src1, alpha1); + dst2 = _mm256_mulhi_epu16(src2, alpha2); + dst1 = _mm256_add_epi16(dst1, _mm256_srli_epi16(dst1, 15)); + dst2 = _mm256_add_epi16(dst2, _mm256_srli_epi16(dst2, 15)); dst1 = _mm256_blend_epi16(dst1, src1, 0x88); dst2 = _mm256_blend_epi16(dst2, src2, 0x88); } else { diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index 1da3b75ade..d9a687b1b4 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -107,28 +107,26 @@ static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int cou for (; i < count - 3; i += 4) { __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); if (!_mm_testz_si128(srcVector, alphaMask)) { - if (!_mm_testc_si128(srcVector, alphaMask)) { - if (!RGBA) - srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); - __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); - __m128i src2 = _mm_unpackhi_epi8(srcVector, zero); + bool cf = _mm_testc_si128(srcVector, alphaMask); + + if (!RGBA) + srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); + const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); + const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); + if (!cf) { __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); - src1 = _mm_mullo_epi16(src1, alpha1); - src2 = _mm_mullo_epi16(src2, alpha2); - alpha1 = _mm_unpacklo_epi8(srcVector, srcVector); - alpha2 = _mm_unpackhi_epi8(srcVector, srcVector); - src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 7)); - src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 7)); - src1 = _mm_blend_epi16(src1, alpha1, 0x88); - src2 = _mm_blend_epi16(src2, alpha2, 0x88); - _mm_storeu_si128((__m128i *)&buffer[i], src1); - _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); + __m128i dst1 = _mm_mulhi_epu16(src1, alpha1); + __m128i dst2 = _mm_mulhi_epu16(src2, alpha2); + // Map 0->0xfffe to 0->0xffff + dst1 = _mm_add_epi16(dst1, _mm_srli_epi16(dst1, 15)); + dst2 = _mm_add_epi16(dst2, _mm_srli_epi16(dst2, 15)); + // correct alpha value: + dst1 = _mm_blend_epi16(dst1, src1, 0x88); + dst2 = _mm_blend_epi16(dst2, src2, 0x88); + _mm_storeu_si128((__m128i *)&buffer[i], dst1); + _mm_storeu_si128((__m128i *)&buffer[i + 2], dst2); } else { - if (!RGBA) - srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); - const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); - const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); _mm_storeu_si128((__m128i *)&buffer[i], src1); _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); } |