summaryrefslogtreecommitdiff
path: root/tests/benchmarks/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2011-03-19 22:07:11 +0100
committerThiago Macieira <thiago.macieira@nokia.com>2011-03-22 15:52:02 +0100
commit3110ab6391971fb7b914ed1f797a0ff9e403501f (patch)
tree822182b8479950425f435c4877aace205bf170ca /tests/benchmarks/corelib
parent0552c0f64146a18f021e36bcbff106cb815e6fbb (diff)
downloadqt4-tools-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz
Add an UTF-8 conversion on trusted data and no BOM.
This assumes that there are no overlong sequences, no continuation characters without the leading, no missing continuations and no BOM.
Diffstat (limited to 'tests/benchmarks/corelib')
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp48
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 4b1ab57be2..d926aa55c1 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -2148,6 +2148,53 @@ int fromUtf8_sse2_optimised_for_ascii(ushort *qch, const char *chars, int len)
return dst + counter - qch;
}
+int fromUtf8_sse2_trusted_no_bom(ushort *qch, const char *chars, int len)
+{
+ qptrdiff counter = 0;
+ ushort *dst = qch;
+
+ len -= 16;
+ const __m128i nullMask = _mm_set1_epi32(0);
+ while (counter < len) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)(chars + counter)); // load
+ ushort highbytes = _mm_movemask_epi8(chunk);
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store
+
+ if (!uchar(highbytes)) {
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store
+
+ if (!highbytes) {
+ counter += 16;
+ continue;
+ }
+ }
+
+ // UTF-8 character found
+ // which one?
+ counter += bsf_nonzero(highbytes);
+ extract_utf8_multibyte<true>(dst, chars, counter, len);
+ }
+ len += 16;
+
+ while (counter < len) {
+ uchar ch = chars[counter];
+ if ((ch & 0x80) == 0) {
+ dst[counter] = ch;
+ ++counter;
+ continue;
+ }
+
+ // UTF-8 character found
+ extract_utf8_multibyte<true>(dst, chars, counter, len);
+ }
+ return dst + counter - qch;
+}
+
void tst_QString::fromUtf8Alternatives_data() const
{
QTest::addColumn<FromUtf8Function>("function");
@@ -2158,6 +2205,7 @@ void tst_QString::fromUtf8Alternatives_data() const
QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless;
QTest::newRow("optimized-for-ascii") << &fromUtf8_optimised_for_ascii;
QTest::newRow("sse2-optimized-for-ascii") << &fromUtf8_sse2_optimised_for_ascii;
+ QTest::newRow("sse2-trusted-no-bom") << &fromUtf8_sse2_trusted_no_bom;
}
extern StringData fromUtf8Data;