summaryrefslogtreecommitdiff
path: root/src/gui/image
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-01-28 11:07:14 +0100
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-02-04 19:37:22 +0000
commit89edf43c44294888781c308d9b1f1d9bab63645b (patch)
tree7653d09ab651f237bdef38d24917183352847c78 /src/gui/image
parent5432f2c7a1ff02e1ee0e07e442ceb6c12ca66098 (diff)
downloadqtbase-89edf43c44294888781c308d9b1f1d9bab63645b.tar.gz
Generate SSE4.1 versions of premultiplying methods where convenient
The autovectorized versions of premultiplying conversions are almost twice as fast with SSE4.1 as with SSE2. Therefore this patch lets compilers that can make those versions convenient without duplicating code do that and lets us use them when available. Change-Id: I699035963abe55a38b9ef8ba7b4a8c961c8dfcdd Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/image')
-rw-r--r--src/gui/image/qimage_conversions.cpp32
1 files changed, 29 insertions, 3 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp
index 2e8fc1963d..696f95b565 100644
--- a/src/gui/image/qimage_conversions.cpp
+++ b/src/gui/image/qimage_conversions.cpp
@@ -195,7 +195,7 @@ static bool convert_passthrough_inplace(QImageData *data, Qt::ImageConversionFla
return true;
}
-static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
+static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888);
Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied);
@@ -219,6 +219,15 @@ static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt:
}
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ convert_ARGB_to_ARGB_PM(dest, src, flags);
+}
+#endif
+
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
#ifndef __SSE2__
@@ -232,7 +241,7 @@ static bool convert_ARGB_to_ARGB_PM_inplace(QImageData *data, Qt::ImageConversio
for (int i = 0; i < data->height; ++i) {
const QRgb *end = rgb_data + data->width;
while (rgb_data < end) {
- *rgb_data = PREMUL(*rgb_data);
+ *rgb_data = qPremultiply(*rgb_data);
++rgb_data;
}
rgb_data += pad;
@@ -312,7 +321,7 @@ static bool convert_ARGB_to_RGBA_inplace(QImageData *data, Qt::ImageConversionFl
return true;
}
-static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
+static inline void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_ARGB32);
Q_ASSERT(dest->format == QImage::Format_RGBA8888_Premultiplied);
@@ -336,6 +345,15 @@ static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt:
}
}
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+QT_FUNCTION_TARGET(SSE4_1)
+static void convert_ARGB_to_RGBA_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags)
+{
+ // Twice as fast autovectorized due to SSE4.1 PMULLD instructions.
+ convert_ARGB_to_RGBA_PM(dest, src, flags);
+}
+#endif
+
static void convert_RGBA_to_ARGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_RGBX8888 || src->format == QImage::Format_RGBA8888 || src->format == QImage::Format_RGBA8888_Premultiplied);
@@ -2945,6 +2963,14 @@ void qInitImageConversions()
}
#endif
+#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__)
+ if (qCpuHasFeature(SSE4_1)) {
+ qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4;
+ qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4;
+ qimage_converter_map[QImage::Format_ARGB32][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_RGBA_PM_sse4;
+ }
+#endif
+
#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;