Add an UTF-8 conversion on trusted data and no BOM.

This assumes that there are no overlong sequences, no continuation characters without the leading, no missing continuations and no BOM.
author: Thiago Macieira <thiago.macieira@nokia.com> 2011-03-19 22:07:11 +0100
committer: Thiago Macieira <thiago.macieira@nokia.com> 2011-03-22 15:52:02 +0100
commit: 3110ab6391971fb7b914ed1f797a0ff9e403501f (patch)
tree: 822182b8479950425f435c4877aace205bf170ca /tests/benchmarks/corelib
parent: 0552c0f64146a18f021e36bcbff106cb815e6fbb (diff)
download: qt4-tools-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 4b1ab57be2..d926aa55c1 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -2148,6 +2148,53 @@ int fromUtf8_sse2_optimised_for_ascii(ushort *qch, const char *chars, int len)
     return dst + counter - qch;
 }
 
+int fromUtf8_sse2_trusted_no_bom(ushort *qch, const char *chars, int len)
+{
+    qptrdiff counter = 0;
+    ushort *dst = qch;
+
+    len -= 16;
+    const __m128i nullMask = _mm_set1_epi32(0);
+    while (counter < len) {
+        const __m128i chunk = _mm_loadu_si128((__m128i*)(chars + counter)); // load
+        ushort highbytes = _mm_movemask_epi8(chunk);
+
+        // unpack the first 8 bytes, padding with zeros
+        const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+        _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store
+
+        if (!uchar(highbytes)) {
+            // unpack the last 8 bytes, padding with zeros
+            const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+            _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store
+
+            if (!highbytes) {
+                counter += 16;
+                continue;
+            }
+        }
+
+        // UTF-8 character found
+        // which one?
+        counter += bsf_nonzero(highbytes);
+        extract_utf8_multibyte<true>(dst, chars, counter, len);
+    }
+    len += 16;
+
+    while (counter < len) {
+        uchar ch = chars[counter];
+        if ((ch & 0x80) == 0) {
+            dst[counter] = ch;
+            ++counter;
+            continue;
+        }
+
+        // UTF-8 character found
+        extract_utf8_multibyte<true>(dst, chars, counter, len);
+    }
+    return dst + counter - qch;
+}
+
 void tst_QString::fromUtf8Alternatives_data() const
 {
     QTest::addColumn<FromUtf8Function>("function");
@@ -2158,6 +2205,7 @@ void tst_QString::fromUtf8Alternatives_data() const
     QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless;
     QTest::newRow("optimized-for-ascii") << &fromUtf8_optimised_for_ascii;
     QTest::newRow("sse2-optimized-for-ascii") << &fromUtf8_sse2_optimised_for_ascii;
+    QTest::newRow("sse2-trusted-no-bom") << &fromUtf8_sse2_trusted_no_bom;
 }
 
 extern StringData fromUtf8Data;
author	Thiago Macieira <thiago.macieira@nokia.com>	2011-03-19 22:07:11 +0100
committer	Thiago Macieira <thiago.macieira@nokia.com>	2011-03-22 15:52:02 +0100
commit	3110ab6391971fb7b914ed1f797a0ff9e403501f (patch)
tree	822182b8479950425f435c4877aace205bf170ca /tests/benchmarks/corelib
parent	0552c0f64146a18f021e36bcbff106cb815e6fbb (diff)
download	qt4-tools-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz