From 07553d03534e66b13602bf41937e9221020e70d9 Mon Sep 17 00:00:00 2001 From: Shawn Rutledge Date: Sat, 6 Jul 2019 13:13:42 +0200 Subject: QTextBrowser: assume Markdown is UTF-8 That's how CommonMark specifies it. The HTML codec-guessing algorithm was making it fall back to Latin1 in practice, which was screwing up any Unicode characters found in the markdown source. Change-Id: I4021adc4a68591ecfd56ef24971af53ce3e9c96d Reviewed-by: Gatis Paeglis --- src/widgets/widgets/qtextbrowser.cpp | 12 ++++++---- .../widgets/qtextbrowser/quotesAndFractions.md | 1 + .../widgets/qtextbrowser/tst_qtextbrowser.cpp | 28 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 tests/auto/widgets/widgets/qtextbrowser/quotesAndFractions.md diff --git a/src/widgets/widgets/qtextbrowser.cpp b/src/widgets/widgets/qtextbrowser.cpp index 2f992b1cff..7a77f86de2 100644 --- a/src/widgets/widgets/qtextbrowser.cpp +++ b/src/widgets/widgets/qtextbrowser.cpp @@ -312,13 +312,17 @@ void QTextBrowserPrivate::setSource(const QUrl &url, QTextDocument::ResourceType if (data.type() == QVariant::String) { txt = data.toString(); } else if (data.type() == QVariant::ByteArray) { + if (type == QTextDocument::HtmlResource) { #if QT_CONFIG(textcodec) - QByteArray ba = data.toByteArray(); - QTextCodec *codec = Qt::codecForHtml(ba); - txt = codec->toUnicode(ba); + QByteArray ba = data.toByteArray(); + QTextCodec *codec = Qt::codecForHtml(ba); + txt = codec->toUnicode(ba); #else - txt = data.toString(); + txt = data.toString(); #endif + } else { + txt = QString::fromUtf8(data.toByteArray()); + } } if (Q_UNLIKELY(txt.isEmpty())) qWarning("QTextBrowser: No document for %s", url.toString().toLatin1().constData()); diff --git a/tests/auto/widgets/widgets/qtextbrowser/quotesAndFractions.md b/tests/auto/widgets/widgets/qtextbrowser/quotesAndFractions.md new file mode 100644 index 0000000000..6bad9cb95b --- /dev/null +++ b/tests/auto/widgets/widgets/qtextbrowser/quotesAndFractions.md @@ -0,0 +1 @@ +you’ll hope to see ❝quotes❞ ﹠1½ ⅔ ¼ ⅗ ⅚ ⅝ some “vulgar” fractions (pardon my «French») diff --git a/tests/auto/widgets/widgets/qtextbrowser/tst_qtextbrowser.cpp b/tests/auto/widgets/widgets/qtextbrowser/tst_qtextbrowser.cpp index 083e297941..27bf0ce7be 100644 --- a/tests/auto/widgets/widgets/qtextbrowser/tst_qtextbrowser.cpp +++ b/tests/auto/widgets/widgets/qtextbrowser/tst_qtextbrowser.cpp @@ -94,6 +94,8 @@ private slots: void urlEncoding(); void sourceType_data(); void sourceType(); + void unicode_data(); + void unicode(); private: TestBrowser *browser; @@ -721,5 +723,31 @@ void tst_QTextBrowser::sourceType() QCOMPARE(maxHeadingLevel, expectedMaxHeadingLevel); } +void tst_QTextBrowser::unicode_data() +{ + QTest::addColumn("sourceFile"); + QTest::addColumn("sourceType"); + QTest::addColumn("expectedText"); + +#if QT_CONFIG(textmarkdownreader) + QTest::newRow("markdown with quotes and fractions") << "quotesAndFractions.md" << QTextDocument::MarkdownResource << + "you\u2019ll hope to see \u275Dquotes\u275E \uFE601\u00BD \u2154 \u00BC \u2157 \u215A \u215D some \u201Cvulgar\u201D fractions (pardon my \u00ABFrench\u00BB)"; +#endif +} + +void tst_QTextBrowser::unicode() +{ + QFETCH(QString, sourceFile); + QFETCH(QTextDocument::ResourceType, sourceType); + QFETCH(QString, expectedText); + browser->setSource(QUrl::fromLocalFile(QFINDTESTDATA(sourceFile)), sourceType); + QTextFrame::iterator iterator = browser->document()->rootFrame()->begin(); + while (!iterator.atEnd()) { + QString blockText = iterator++.currentBlock().text(); + if (!blockText.isEmpty()) + QCOMPARE(blockText, expectedText); + } +} + QTEST_MAIN(tst_QTextBrowser) #include "tst_qtextbrowser.moc" -- cgit v1.2.1