diff options
| -rw-r--r-- | src/corelib/serialization/qxmlutils.cpp | 15 | ||||
| -rw-r--r-- | src/corelib/serialization/qxmlutils_p.h | 2 | ||||
| -rw-r--r-- | src/xml/dom/qdom.cpp | 9 | ||||
| -rw-r--r-- | tests/auto/xml/dom/qdom/tst_qdom.cpp | 22 |
4 files changed, 37 insertions, 11 deletions
diff --git a/src/corelib/serialization/qxmlutils.cpp b/src/corelib/serialization/qxmlutils.cpp index 74a0cf0c23..778e8de72d 100644 --- a/src/corelib/serialization/qxmlutils.cpp +++ b/src/corelib/serialization/qxmlutils.cpp @@ -235,13 +235,16 @@ bool QXmlUtils::isLetter(const QChar c) \sa {http://www.w3.org/TR/REC-xml/#NT-Char}, {Extensible Markup Language (XML) 1.0 (Fourth Edition), [2] Char} */ -bool QXmlUtils::isChar(const QChar c) +bool QXmlUtils::isChar(const char32_t c) { - return (c.unicode() >= 0x0020 && c.unicode() <= 0xD7FF) - || c.unicode() == 0x0009 - || c.unicode() == 0x000A - || c.unicode() == 0x000D - || (c.unicode() >= 0xE000 && c.unicode() <= 0xFFFD); + // The valid range is defined by https://www.w3.org/TR/REC-xml/#NT-Char as following: + // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + return (c >= 0x0020 && c <= 0xD7FF) + || c == 0x0009 + || c == 0x000A + || c == 0x000D + || (c >= 0xE000 && c <= 0xFFFD) + || (c >= 0x10000 && c <= 0x10FFFF); } /*! diff --git a/src/corelib/serialization/qxmlutils_p.h b/src/corelib/serialization/qxmlutils_p.h index 2e709e8323..0ad1758979 100644 --- a/src/corelib/serialization/qxmlutils_p.h +++ b/src/corelib/serialization/qxmlutils_p.h @@ -33,7 +33,7 @@ class Q_CORE_EXPORT QXmlUtils { public: static bool isEncName(QStringView encName); - static bool isChar(const QChar c); + static bool isChar(const char32_t c); static bool isNameChar(const QChar c); static bool isLetter(const QChar c); static bool isNCName(QStringView ncName); diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp index 486894019c..e0e9a465ef 100644 --- a/src/xml/dom/qdom.cpp +++ b/src/xml/dom/qdom.cpp @@ -22,7 +22,7 @@ #include <qdebug.h> #include <qxmlstream.h> #include <private/qduplicatetracker_p.h> - +#include <private/qstringiterator_p.h> #include <stdio.h> #include <limits> @@ -156,10 +156,11 @@ static QString fixedCharData(const QString &data, bool *ok) } QString result; - for (int i = 0; i < data.size(); ++i) { - QChar c = data.at(i); + QStringIterator it(data); + while (it.hasNext()) { + const char32_t c = it.next(QChar::Null); if (QXmlUtils::isChar(c)) { - result.append(c); + result.append(QChar::fromUcs4(c)); } else if (QDomImplementationPrivate::invalidDataPolicy == QDomImplementation::ReturnNullNode) { *ok = false; return QString(); diff --git a/tests/auto/xml/dom/qdom/tst_qdom.cpp b/tests/auto/xml/dom/qdom/tst_qdom.cpp index e1c2b12ab5..f05020f61c 100644 --- a/tests/auto/xml/dom/qdom/tst_qdom.cpp +++ b/tests/auto/xml/dom/qdom/tst_qdom.cpp @@ -9,6 +9,7 @@ #include <QFile> #include <QList> #include <QRegularExpression> +#include <QScopeGuard> #include <QTextStream> #include <QTest> #include <QtXml> @@ -62,6 +63,7 @@ private slots: void invalidQualifiedName(); void invalidCharData_data(); void invalidCharData(); + void nonBMPCharacters(); void roundTripAttributes() const; void roundTripCDATA() const; @@ -1342,6 +1344,10 @@ void tst_QDom::invalidCharData_data() QTest::newRow( "f<o&o" ) << QString("f<o&o") << true << true << true << QString("f<o&o"); QTest::newRow( "empty" ) << QString() << true << true << true << QString(); QTest::newRow("f\\x07o\\x02")<< QString("f\x07o\x02")<< true << true << false << QString("fo"); + + const QChar pair[2] = { QChar(0xdc00), QChar(0xe000) }; + QString invalid(pair, 2); + QTest::newRow("\\xdc00\\xe000") << invalid << true << true << false << invalid.last(1); } void tst_QDom::invalidCharData() @@ -1385,6 +1391,22 @@ void tst_QDom::invalidCharData() } } +void tst_QDom::nonBMPCharacters() +{ + const auto invalidDataPolicy = QDomImplementation::invalidDataPolicy(); + auto resetInvalidDataPolicy = qScopeGuard( + [invalidDataPolicy] { QDomImplementation::setInvalidDataPolicy(invalidDataPolicy); }); + QDomImplementation::setInvalidDataPolicy(QDomImplementation::DropInvalidChars); + + const QString input = u"<text>Supplementary Plane: 𝄞 😂 🀄 🀶 🃪 🃋</text>"_qs; + + QString errorMsg; + QDomDocument doc; + doc.setContent(input, &errorMsg); + QVERIFY(errorMsg.isEmpty()); + QCOMPARE(doc.toString(-1), input); +} + void tst_QDom::roundTripAttributes() const { /* Create an attribute via the QDom API with weird whitespace content. */ |
