diff options
author | Lars Knoll <lars.knoll@qt.io> | 2022-05-31 11:58:50 +0200 |
---|---|---|
committer | Fabian Kosmale <fabian.kosmale@qt.io> | 2022-06-21 01:04:12 +0200 |
commit | 9c1f3b6d4d5a5fe59396062c6f68cc1201665c62 (patch) | |
tree | 0a59a4dd0293f5c323433ffd314b34057e3d5152 /tests | |
parent | d531c4b65dc47312256f9de2786a852f6ac4c1f5 (diff) | |
download | qtbase-9c1f3b6d4d5a5fe59396062c6f68cc1201665c62.tar.gz |
Add QStringDecoder::decoderForHtml()
Now that QStringConverter can handle non UTF encodings through ICU,
add a way to get a decoder for arbitrary HTML code.
Opposed to QStringConverter::encodingForHtml(), this method will
try to create a valid string decoder also for non unicode codecs.
Pick-to: 6.4
Change-Id: I343584da1b114396c744f482d9b433c9cedcc511
Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp | 84 |
1 files changed, 64 insertions, 20 deletions
diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp index 07f29a6429..541a0bee45 100644 --- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp +++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp @@ -2256,65 +2256,102 @@ void tst_QStringConverter::encodingForHtml_data() { QTest::addColumn<QByteArray>("html"); QTest::addColumn<std::optional<QStringConverter::Encoding>>("encoding"); + QTest::addColumn<QByteArray>("name"); // ICU name if we have ICU support QByteArray html = "<html><head></head><body>blah</body></html>"; - QTest::newRow("no charset") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("no charset") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-15\" /></head></html>"; - QTest::newRow("latin 15") << html << std::optional<QStringConverter::Encoding>(); + QTest::newRow("latin 15") << html << std::optional<QStringConverter::Encoding>() << QByteArray("ISO-8859-15"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=SJIS\" /></head></html>"; + QTest::newRow("sjis") << html << std::optional<QStringConverter::Encoding>() << QByteArray("Shift_JIS"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-2022-JP\" /></head></html>"; + QTest::newRow("ISO-2022-JP") << html << std::optional<QStringConverter::Encoding>() << QByteArray("ISO-2022-JP"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-2022\" /></head></html>"; + QTest::newRow("ISO-2022") << html << std::optional<QStringConverter::Encoding>() << QByteArray("ISO-2022-JP"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=GB2312\" /></head></html>"; + QTest::newRow("GB2312") << html << std::optional<QStringConverter::Encoding>() << QByteArray("GB2312"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=Big5\" /></head></html>"; + QTest::newRow("Big5") << html << std::optional<QStringConverter::Encoding>() << QByteArray("Big5"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=GB18030\" /></head></html>"; + QTest::newRow("GB18030") << html << std::optional<QStringConverter::Encoding>() << QByteArray("GB18030"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=GB2312-HKSCS\" /></head></html>"; + QTest::newRow("GB2312-HKSCS") << html << std::optional<QStringConverter::Encoding>() << QByteArray("GB2312-HKSCS"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=Big5-HKSCS\" /></head></html>"; + QTest::newRow("Big5-HKSCS") << html << std::optional<QStringConverter::Encoding>() << QByteArray("Big5-HKSCS"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=EucJP\" /></head></html>"; + QTest::newRow("EucJP") << html << std::optional<QStringConverter::Encoding>() << QByteArray("EUC-JP"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=EucKR\" /></head></html>"; + QTest::newRow("EucKR") << html << std::optional<QStringConverter::Encoding>() << QByteArray("EUC-KR"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=KOI8-R\" /></head></html>"; + QTest::newRow("KOI8-R") << html << std::optional<QStringConverter::Encoding>() << QByteArray("KOI8-R"); + + html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=KOI8-U\" /></head></html>"; + QTest::newRow("KOI8-U") << html << std::optional<QStringConverter::Encoding>() << QByteArray("KOI8-U"); html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-1\" /></head></html>"; - QTest::newRow("latin 1") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1); + QTest::newRow("latin 1") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1) << QByteArray("ISO-8859-1"); html = "<!DOCTYPE html><html><head><meta charset=\"ISO_8859-1:1987\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>"; - QTest::newRow("latin 1 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1); + QTest::newRow("latin 1 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1) << QByteArray("ISO-8859-1"); html = "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>"; - QTest::newRow("UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8\"><title>Test</title></head>"; - QTest::newRow("UTF-8 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("UTF-8 (#2)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8/></head></html>"; - QTest::newRow("UTF-8, no quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("UTF-8, no quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset='UTF-8'/></head></html>"; - QTest::newRow("UTF-8, single quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("UTF-8, single quotes") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta charset=utf-8><title>Test</title></head>"; - QTest::newRow("UTF-8, > terminator") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("UTF-8, > terminator") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta charset= utf-8 ><title>Test</title></head>"; - QTest::newRow("UTF-8, > terminator with spaces") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("UTF-8, > terminator with spaces") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); // Test invalid charsets. html = "<!DOCTYPE html><html><head><meta charset= utf/8 ><title>Test</title></head>"; - QTest::newRow("utf/8") << html << std::optional<QStringConverter::Encoding>(); + QTest::newRow("utf/8") << html << std::optional<QStringConverter::Encoding>() << QByteArray(); html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=invalid-foo\" /></head></html>"; - QTest::newRow("invalid charset, no default") << html << std::optional<QStringConverter::Encoding>(); + QTest::newRow("invalid charset, no default") << html << std::optional<QStringConverter::Encoding>() << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\""; html.prepend(QByteArray().fill(' ', 512 - html.size())); - QTest::newRow("invalid charset (large header)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("invalid charset (large header)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8"; - QTest::newRow("invalid charset (no closing double quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("invalid charset (no closing double quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset='utf-8"; - QTest::newRow("invalid charset (no closing single quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("invalid charset (no closing single quote)") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta charset=utf-8 foo=bar><title>Test</title></head>"; - QTest::newRow("invalid (space terminator)") << html << std::optional<QStringConverter::Encoding>(); + QTest::newRow("invalid (space terminator)") << html << std::optional<QStringConverter::Encoding>() << QByteArray(); html = "<!DOCTYPE html><html><head><meta charset=\" utf' 8 /><title>Test</title></head>"; - QTest::newRow("invalid charset, early terminator (')") << html << std::optional<QStringConverter::Encoding>(); + QTest::newRow("invalid charset, early terminator (')") << html << std::optional<QStringConverter::Encoding>() << QByteArray(); const char src[] = { char(0xff), char(0xfe), char(0x7a), char(0x03), 0, 0 }; html = src; - QTest::newRow("greek text UTF-16LE") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16LE); + QTest::newRow("greek text UTF-16LE") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16LE) << QByteArray("UTF-16LE"); html = "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><span style=\"color: rgb(0, 0, 0); font-family: " @@ -2322,19 +2359,26 @@ void tst_QStringConverter::encodingForHtml_data() "line-height: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: " "auto; word-spacing: 0px; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; display: inline !important; float: " "none;\">ͻ</span>\000"; - QTest::newRow("greek text UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("greek text UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); html = "<!DOCTYPE html><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=unicode\">" "<head/><body><p>bla</p></body></html>"; // QTBUG-41998, ICU will return UTF-16. - QTest::newRow("legacy unicode UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); + QTest::newRow("legacy unicode UTF-8") << html << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8) << QByteArray("UTF-8"); } void tst_QStringConverter::encodingForHtml() { QFETCH(QByteArray, html); QFETCH(std::optional<QStringConverter::Encoding>, encoding); + QFETCH(QByteArray, name); QCOMPARE(QStringConverter::encodingForHtml(html), encoding); + + QStringDecoder decoder = QStringDecoder::decoderForHtml(html); + if (encoding || // we should have a valid decoder independent of ICU support + decoder.isValid()) { // we got a valid decoder through ICU + QCOMPARE(decoder.name(), name); + } } class LoadAndConvert: public QRunnable |