diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-01-28 11:07:14 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-02-04 19:37:22 +0000 |
commit | 89edf43c44294888781c308d9b1f1d9bab63645b (patch) | |
tree | 7653d09ab651f237bdef38d24917183352847c78 /src/gui/image | |
parent | 5432f2c7a1ff02e1ee0e07e442ceb6c12ca66098 (diff) | |
download | qtbase-89edf43c44294888781c308d9b1f1d9bab63645b.tar.gz |
Generate SSE4.1 versions of premultiplying methods where convenient
The autovectorized versions of premultiplying conversions are almost
twice as fast with SSE4.1 as with SSE2. Therefore this patch lets
compilers that can make those versions convenient without duplicating
code do that and lets us use them when available.
Change-Id: I699035963abe55a38b9ef8ba7b4a8c961c8dfcdd
Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/image')
-rw-r--r-- | src/gui/image/qimage_conversions.cpp | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index 2e8fc1963d..696f95b565 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -195,7 +195,7 @@ static bool convert_passthrough_inplace(QImageData *data, Qt::ImageConversionFla return true; } -static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888); Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -219,6 +219,15 @@ static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt: } } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + convert_ARGB_to_ARGB_PM(dest, src, flags); +} +#endif + extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); #ifndef __SSE2__ @@ -232,7 +241,7 @@ static bool convert_ARGB_to_ARGB_PM_inplace(QImageData *data, Qt::ImageConversio for (int i = 0; i < data->height; ++i) { const QRgb *end = rgb_data + data->width; while (rgb_data < end) { - *rgb_data = PREMUL(*rgb_data); + *rgb_data = qPremultiply(*rgb_data); ++rgb_data; } rgb_data += pad; @@ -312,7 +321,7 @@ static bool convert_ARGB_to_RGBA_inplace(QImageData *data, Qt::ImageConversionFl return true; } -static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static inline void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32); Q_ASSERT(dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -336,6 +345,15 @@ static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt: } } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static void convert_ARGB_to_RGBA_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + convert_ARGB_to_RGBA_PM(dest, src, flags); +} +#endif + static void convert_RGBA_to_ARGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_RGBX8888 || src->format == QImage::Format_RGBA8888 || src->format == QImage::Format_RGBA8888_Premultiplied); @@ -2945,6 +2963,14 @@ void qInitImageConversions() } #endif +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) + if (qCpuHasFeature(SSE4_1)) { + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; + qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_RGBA_PM_sse4; + } +#endif + #if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon; |