summaryrefslogtreecommitdiff
path: root/tests/auto/qtextcodec/tst_qtextcodec.cpp
diff options
context:
space:
mode:
authorDenis Dzyubenko <denis.dzyubenko@nokia.com>2009-04-28 14:08:59 +0200
committerDenis Dzyubenko <denis.dzyubenko@nokia.com>2009-05-04 15:34:03 +0200
commit7031e1d110bb1bc97cfe0377adc211030e1e7320 (patch)
treeb1260d28e0865eaab3c91f1330a1df0dca2ba858 /tests/auto/qtextcodec/tst_qtextcodec.cpp
parent1e0e67406c3865717fef8b98d2c69adbefc54245 (diff)
downloadqt4-tools-7031e1d110bb1bc97cfe0377adc211030e1e7320.tar.gz
When data was copied from Mozilla Firefox to Qt, the text format was not valid.
Mozilla encodes the text/html format in UTF16 and adds a BOM, however it doesn't specify the charset in the html header. The fix is to guess the encoding by either charset in the html header or BOM for text/html format, or by BOM for non html formats. This commit adds a new public function QTextCodec::codecForUtfText() which can be used to guess encoding out of the BOM. Task-number: 250555 Reviewed-by: Benjamin Poulain Reviewed-by: Simon Hausmann Reviewed-by: Andreas Aardal Hanssen
Diffstat (limited to 'tests/auto/qtextcodec/tst_qtextcodec.cpp')
-rw-r--r--tests/auto/qtextcodec/tst_qtextcodec.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/tests/auto/qtextcodec/tst_qtextcodec.cpp b/tests/auto/qtextcodec/tst_qtextcodec.cpp
index cf4135bba8..22f95570d8 100644
--- a/tests/auto/qtextcodec/tst_qtextcodec.cpp
+++ b/tests/auto/qtextcodec/tst_qtextcodec.cpp
@@ -79,6 +79,9 @@ private slots:
void codecForHtml();
+ void codecForUtfText_data();
+ void codecForUtfText();
+
#ifdef Q_OS_UNIX
void toLocal8Bit();
#endif
@@ -1744,6 +1747,62 @@ void tst_QTextCodec::codecForHtml()
QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15
}
+void tst_QTextCodec::codecForUtfText_data()
+{
+ QTest::addColumn<QByteArray>("encoded");
+ QTest::addColumn<bool>("detected");
+ QTest::addColumn<int>("mib");
+
+
+ QTest::newRow("utf8 bom")
+ << QByteArray("\xef\xbb\xbfhello")
+ << true
+ << 106;
+ QTest::newRow("utf8 nobom")
+ << QByteArray("hello")
+ << false
+ << 0;
+
+ QTest::newRow("utf16 bom be")
+ << QByteArray("\xfe\xff\0h\0e\0l", 8)
+ << true
+ << 1013;
+ QTest::newRow("utf16 bom le")
+ << QByteArray("\xff\xfeh\0e\0l\0", 8)
+ << true
+ << 1014;
+ QTest::newRow("utf16 nobom")
+ << QByteArray("\0h\0e\0l", 6)
+ << false
+ << 0;
+
+ QTest::newRow("utf32 bom be")
+ << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
+ << true
+ << 1018;
+ QTest::newRow("utf32 bom le")
+ << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
+ << true
+ << 1019;
+ QTest::newRow("utf32 nobom")
+ << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
+ << false
+ << 0;
+}
+
+void tst_QTextCodec::codecForUtfText()
+{
+ QFETCH(QByteArray, encoded);
+ QFETCH(bool, detected);
+ QFETCH(int, mib);
+
+ QTextCodec *codec = QTextCodec::codecForUtfText(encoded, 0);
+ if (detected)
+ QCOMPARE(codec->mibEnum(), mib);
+ else
+ QVERIFY(codec == 0);
+}
+
#ifdef Q_OS_UNIX
void tst_QTextCodec::toLocal8Bit()
{