summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/corelib/serialization/qxmlutils.cpp15
-rw-r--r--src/corelib/serialization/qxmlutils_p.h2
-rw-r--r--src/xml/dom/qdom.cpp9
-rw-r--r--tests/auto/xml/dom/qdom/tst_qdom.cpp22
4 files changed, 37 insertions, 11 deletions
diff --git a/src/corelib/serialization/qxmlutils.cpp b/src/corelib/serialization/qxmlutils.cpp
index 74a0cf0c23..778e8de72d 100644
--- a/src/corelib/serialization/qxmlutils.cpp
+++ b/src/corelib/serialization/qxmlutils.cpp
@@ -235,13 +235,16 @@ bool QXmlUtils::isLetter(const QChar c)
\sa {http://www.w3.org/TR/REC-xml/#NT-Char},
{Extensible Markup Language (XML) 1.0 (Fourth Edition), [2] Char}
*/
-bool QXmlUtils::isChar(const QChar c)
+bool QXmlUtils::isChar(const char32_t c)
{
- return (c.unicode() >= 0x0020 && c.unicode() <= 0xD7FF)
- || c.unicode() == 0x0009
- || c.unicode() == 0x000A
- || c.unicode() == 0x000D
- || (c.unicode() >= 0xE000 && c.unicode() <= 0xFFFD);
+ // The valid range is defined by https://www.w3.org/TR/REC-xml/#NT-Char as following:
+ // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+ return (c >= 0x0020 && c <= 0xD7FF)
+ || c == 0x0009
+ || c == 0x000A
+ || c == 0x000D
+ || (c >= 0xE000 && c <= 0xFFFD)
+ || (c >= 0x10000 && c <= 0x10FFFF);
}
/*!
diff --git a/src/corelib/serialization/qxmlutils_p.h b/src/corelib/serialization/qxmlutils_p.h
index 2e709e8323..0ad1758979 100644
--- a/src/corelib/serialization/qxmlutils_p.h
+++ b/src/corelib/serialization/qxmlutils_p.h
@@ -33,7 +33,7 @@ class Q_CORE_EXPORT QXmlUtils
{
public:
static bool isEncName(QStringView encName);
- static bool isChar(const QChar c);
+ static bool isChar(const char32_t c);
static bool isNameChar(const QChar c);
static bool isLetter(const QChar c);
static bool isNCName(QStringView ncName);
diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp
index 486894019c..e0e9a465ef 100644
--- a/src/xml/dom/qdom.cpp
+++ b/src/xml/dom/qdom.cpp
@@ -22,7 +22,7 @@
#include <qdebug.h>
#include <qxmlstream.h>
#include <private/qduplicatetracker_p.h>
-
+#include <private/qstringiterator_p.h>
#include <stdio.h>
#include <limits>
@@ -156,10 +156,11 @@ static QString fixedCharData(const QString &data, bool *ok)
}
QString result;
- for (int i = 0; i < data.size(); ++i) {
- QChar c = data.at(i);
+ QStringIterator it(data);
+ while (it.hasNext()) {
+ const char32_t c = it.next(QChar::Null);
if (QXmlUtils::isChar(c)) {
- result.append(c);
+ result.append(QChar::fromUcs4(c));
} else if (QDomImplementationPrivate::invalidDataPolicy == QDomImplementation::ReturnNullNode) {
*ok = false;
return QString();
diff --git a/tests/auto/xml/dom/qdom/tst_qdom.cpp b/tests/auto/xml/dom/qdom/tst_qdom.cpp
index e1c2b12ab5..f05020f61c 100644
--- a/tests/auto/xml/dom/qdom/tst_qdom.cpp
+++ b/tests/auto/xml/dom/qdom/tst_qdom.cpp
@@ -9,6 +9,7 @@
#include <QFile>
#include <QList>
#include <QRegularExpression>
+#include <QScopeGuard>
#include <QTextStream>
#include <QTest>
#include <QtXml>
@@ -62,6 +63,7 @@ private slots:
void invalidQualifiedName();
void invalidCharData_data();
void invalidCharData();
+ void nonBMPCharacters();
void roundTripAttributes() const;
void roundTripCDATA() const;
@@ -1342,6 +1344,10 @@ void tst_QDom::invalidCharData_data()
QTest::newRow( "f<o&o" ) << QString("f<o&o") << true << true << true << QString("f<o&o");
QTest::newRow( "empty" ) << QString() << true << true << true << QString();
QTest::newRow("f\\x07o\\x02")<< QString("f\x07o\x02")<< true << true << false << QString("fo");
+
+ const QChar pair[2] = { QChar(0xdc00), QChar(0xe000) };
+ QString invalid(pair, 2);
+ QTest::newRow("\\xdc00\\xe000") << invalid << true << true << false << invalid.last(1);
}
void tst_QDom::invalidCharData()
@@ -1385,6 +1391,22 @@ void tst_QDom::invalidCharData()
}
}
+void tst_QDom::nonBMPCharacters()
+{
+ const auto invalidDataPolicy = QDomImplementation::invalidDataPolicy();
+ auto resetInvalidDataPolicy = qScopeGuard(
+ [invalidDataPolicy] { QDomImplementation::setInvalidDataPolicy(invalidDataPolicy); });
+ QDomImplementation::setInvalidDataPolicy(QDomImplementation::DropInvalidChars);
+
+ const QString input = u"<text>Supplementary Plane: 𝄞 😂 🀄 🀶 🃪 🃋</text>"_qs;
+
+ QString errorMsg;
+ QDomDocument doc;
+ doc.setContent(input, &errorMsg);
+ QVERIFY(errorMsg.isEmpty());
+ QCOMPARE(doc.toString(-1), input);
+}
+
void tst_QDom::roundTripAttributes() const
{
/* Create an attribute via the QDom API with weird whitespace content. */