summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2022-11-04 02:58:51 +0100
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2022-12-09 16:29:11 +0100
commit9e75a4cca3e4ef55d6d1c6b7dddd8565e3b412d0 (patch)
treedd5a5812a85bda06f4b966fe2d886b37851f09c2
parent6294ee060eeb564d7fce3eb56624b0711d30af15 (diff)
downloadqtbase-9e75a4cca3e4ef55d6d1c6b7dddd8565e3b412d0.tar.gz
QDomDocument: ensure a defined order of attributes when saving
XML does not impose any semantics based on the order of the attributes; they're an unordered set. Quoting [1]: > Note that the order of attribute specifications in a start-tag or empty-element tag is not significant and [2] 2.2.5: > An unordered set of attribute information items Still, using a QHash-based implementation to store attributes is annoying, because one cannot serialize the document in a stable way. The order of attributes is going to depend on the QString hash function (which we can change at any time) and the QHash seed (which is random and changes at every run). In other words, saving the same DOM will yield non deterministic outputs. That's annoying for testing, reproducible builds, and so on. Switching to an _ordered_ associative container for storing attributes won't, on its own, ensure any specific ordering. That's because: * attributes are currently kept associated using their name as the key, ignoring an eventual namespace prefix; * there's some convoluted logic that sometimes emits attributes in the xmlns namespace (to qualify a prefix). Hence, just go for the straightforward implementation and sort the attributes before streaming them. In the main loop I could have used a range-based for loop over the associative container used for attributes; since it's a Qt container, it would have yielded just the values in the map, and we are not interested into the keys. However I'm preparing for further changes down the road, so I'm opting for key/value iteration. I'm deliberately not offering an opt-out because: * I don't think this is so expensive to justify an opt-out; * I'm going to remove QHash anyways in a follow up commit. [1] https://www.w3.org/TR/xml11/#sec-starttags [2] https://www.w3.org/TR/xml-infoset/#infoitem.element Task-number: QTBUG-76800 Task-number: QTBUG-25071 Change-Id: I6282ae2ccbee9c0099f138de48b94bb7c40b3680 Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
-rw-r--r--src/xml/dom/qdom.cpp97
-rw-r--r--tests/auto/xml/dom/qdom/tst_qdom.cpp9
2 files changed, 75 insertions, 31 deletions
diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp
index e8e2e5902a..557f5c10b4 100644
--- a/src/xml/dom/qdom.cpp
+++ b/src/xml/dom/qdom.cpp
@@ -23,6 +23,7 @@
#include <qxmlstream.h>
#include <private/qduplicatetracker_p.h>
#include <private/qstringiterator_p.h>
+#include <qvarlengtharray.h>
#include <stdio.h>
#include <limits>
@@ -4026,32 +4027,82 @@ void QDomElementPrivate::save(QTextStream& s, int depth, int indent) const
/* Write out attributes. */
if (!m_attr->map.isEmpty()) {
+ /*
+ * To ensure that we always output attributes in a consistent
+ * order, sort the attributes before writing them into the
+ * stream. (Note that the order may be different than the one
+ * that e.g. we've read from a file, or the program order in
+ * which these attributes have been populated. We just want to
+ * guarantee reproducibile outputs.)
+ */
+ struct SavedAttribute {
+ QString prefix;
+ QString name;
+ QString encodedValue;
+ };
+
+ /* Gather all the attributes to save. */
+ QVarLengthArray<SavedAttribute, 8> attributesToSave;
+ attributesToSave.reserve(m_attr->map.size());
+
QDuplicateTracker<QString> outputtedPrefixes;
- auto it = m_attr->map.constBegin();
- for (; it != m_attr->map.constEnd(); ++it) {
- s << ' ';
- if (it.value()->namespaceURI.isNull()) {
- s << it.value()->name << "=\"" << encodeText(it.value()->value, true, true) << '\"';
- } else {
- s << it.value()->prefix << ':' << it.value()->name << "=\"" << encodeText(it.value()->value, true, true) << '\"';
- /* This is a fix for 138243, as good as it gets.
- *
- * QDomElementPrivate::save() output a namespace declaration if
- * the element is in a namespace, no matter what. This function do as well, meaning
- * that we get two identical namespace declaration if we don't have the if-
- * statement below.
- *
- * This doesn't work when the parent element has the same prefix as us but
- * a different namespace. However, this can only occur by the user modifying the element,
- * and we don't do fixups by that anyway, and hence it's the user responsibility to not
- * arrive in those situations. */
- if ((!it.value()->ownerNode ||
- it.value()->ownerNode->prefix != it.value()->prefix) &&
- !outputtedPrefixes.hasSeen(it.value()->prefix)) {
- s << " xmlns:" << it.value()->prefix << "=\"" << encodeText(it.value()->namespaceURI, true, true) << '\"';
- }
+ for (const auto &[key, value] : std::as_const(m_attr->map).asKeyValueRange()) {
+ Q_UNUSED(key); /* We extract the attribute name from the value. */
+ bool mayNeedXmlNS = false;
+
+ SavedAttribute attr;
+ attr.name = value->name;
+ attr.encodedValue = encodeText(value->value, true, true);
+ if (!value->namespaceURI.isNull()) {
+ attr.prefix = value->prefix;
+ mayNeedXmlNS = true;
+ }
+
+ attributesToSave.push_back(std::move(attr));
+
+ /*
+ * This is a fix for 138243, as good as it gets.
+ *
+ * QDomElementPrivate::save() output a namespace
+ * declaration if the element is in a namespace, no matter
+ * what. This function do as well, meaning that we get two
+ * identical namespace declaration if we don't have the if-
+ * statement below.
+ *
+ * This doesn't work when the parent element has the same
+ * prefix as us but a different namespace. However, this
+ * can only occur by the user modifying the element, and we
+ * don't do fixups by that anyway, and hence it's the user
+ * responsibility to avoid those situations.
+ */
+
+ if (mayNeedXmlNS
+ && ((!value->ownerNode || value->ownerNode->prefix != value->prefix)
+ && !outputtedPrefixes.hasSeen(value->prefix)))
+ {
+ attr.prefix = QStringLiteral("xmlns");
+ attr.name = value->prefix;
+ attr.encodedValue = encodeText(value->namespaceURI, true, true);
+ attributesToSave.push_back(std::move(attr));
}
}
+
+ /* Sort the attributes by prefix and name. */
+ const auto savedAttributeComparator = [](const SavedAttribute &lhs, const SavedAttribute &rhs)
+ {
+ const int cmp = QString::compare(lhs.prefix, rhs.prefix);
+ return (cmp < 0) || ((cmp == 0) && (lhs.name < rhs.name));
+ };
+
+ std::sort(attributesToSave.begin(), attributesToSave.end(), savedAttributeComparator);
+
+ /* Actually stream the sorted attributes. */
+ for (const auto &attr : attributesToSave) {
+ s << ' ';
+ if (!attr.prefix.isEmpty())
+ s << attr.prefix << ':';
+ s << attr.name << "=\"" << attr.encodedValue << '\"';
+ }
}
if (last) {
diff --git a/tests/auto/xml/dom/qdom/tst_qdom.cpp b/tests/auto/xml/dom/qdom/tst_qdom.cpp
index 9ea09215a8..cfc869949f 100644
--- a/tests/auto/xml/dom/qdom/tst_qdom.cpp
+++ b/tests/auto/xml/dom/qdom/tst_qdom.cpp
@@ -2011,15 +2011,8 @@ void tst_QDom::doubleNamespaceDeclarations() const
QXmlStreamReader streamReader(&file);
QVERIFY(doc.setContent(&streamReader, QDomDocument::ParseOption::UseNamespaceProcessing));
- // tst_QDom relies on a specific QHash ordering, see QTBUG-25071
QString docAsString = doc.toString(0);
- QVERIFY(docAsString == QString::fromLatin1("<a>\n<b p:c=\"\" xmlns:p=\"NS\" p:d=\"\"/>\n</a>\n") ||
- docAsString == QString::fromLatin1("<a>\n<b p:c=\"\" p:d=\"\" xmlns:p=\"NS\"/>\n</a>\n") ||
- docAsString == QString::fromLatin1("<a>\n<b p:d=\"\" p:c=\"\" xmlns:p=\"NS\"/>\n</a>\n") ||
- docAsString == QString::fromLatin1("<a>\n<b p:d=\"\" xmlns:p=\"NS\" p:c=\"\"/>\n</a>\n") ||
- docAsString == QString::fromLatin1("<a>\n<b xmlns:p=\"NS\" p:c=\"\" p:d=\"\"/>\n</a>\n") ||
- docAsString == QString::fromLatin1("<a>\n<b xmlns:p=\"NS\" p:d=\"\" p:c=\"\"/>\n</a>\n")
- );
+ QCOMPARE(docAsString, "<a>\n<b p:c=\"\" p:d=\"\" xmlns:p=\"NS\"/>\n</a>\n");
}
void tst_QDom::setContentQXmlReaderOverload() const