summaryrefslogtreecommitdiff
path: root/xpath.c
diff options
context:
space:
mode:
authorKasimier T. Buchcik <kbuchcik@src.gnome.org>2006-05-19 11:26:15 +0000
committerKasimier T. Buchcik <kbuchcik@src.gnome.org>2006-05-19 11:26:15 +0000
commit2bdabbd711356e534940431053523f1538d1a93e (patch)
tree4e33fb15e1ccb1b1d0669d1e8b91771e066c55f6 /xpath.c
parent6ed2eb47fcf485283c6dc2dbd0ba0270f22a23b6 (diff)
downloadlibxml2-2bdabbd711356e534940431053523f1538d1a93e.tar.gz
Optimized the comparison for non-element nodes in xmlXPathCmpNodesExt();
* xpath.c: Optimized the comparison for non-element nodes in xmlXPathCmpNodesExt(); the comparison is used for sorting of node-sets. This enhancement is related to bug #165547. There are other places where the old comparison function xmlXPathCmpNodes() is still called, but I currently don't know exactly what those calls are for; thus if they can be substituted (if it makes sense) for the new function.
Diffstat (limited to 'xpath.c')
-rw-r--r--xpath.c297
1 files changed, 296 insertions, 1 deletions
diff --git a/xpath.c b/xpath.c
index 0e42893d..43a8070b 100644
--- a/xpath.c
+++ b/xpath.c
@@ -75,6 +75,17 @@
* only element-nodes and the document node.
*/
#define XP_PATTERN_TO_ANY_NODE_ENABLED
+
+/*
+* XP_FAST_NON_ELEM_COMPARISON:
+* If defined, this will use xmlXPathCmpNodesExt() instead of
+* xmlXPathCmpNodes(). The new function is optimized comparison of
+* non-element nodes; actually it will speed up comparison only if
+* xmlXPathOrderDocElems() was called in order to index the elements of
+* a tree in document order; Libxslt does such an indexing, thus it will
+* benefit from this optimization.
+*/
+#define XP_FAST_NON_ELEM_COMPARISON
/*
* TODO:
* There are a few spots where some tests are done which depend upon ascii
@@ -1691,6 +1702,284 @@ xmlXPathCmpNodes(xmlNodePtr node1, xmlNodePtr node2) {
return(-1); /* assume there is no sibling list corruption */
}
+#ifdef XP_FAST_NON_ELEM_COMPARISON
+/**
+ * xmlXPathCmpNodesExt:
+ * @node1: the first node
+ * @node2: the second node
+ *
+ * Compare two nodes w.r.t document order.
+ * This one is optimized for handling of non-element nodes.
+ *
+ * Returns -2 in case of error 1 if first point < second point, 0 if
+ * it's the same node, -1 otherwise
+ */
+static int
+xmlXPathCmpNodesExt(xmlNodePtr node1, xmlNodePtr node2) {
+ int depth1, depth2;
+ int misc = 0, precedence1 = 0, precedence2 = 0;
+ xmlNodePtr miscNode1 = NULL, miscNode2 = NULL;
+ xmlNodePtr cur, root;
+
+ if ((node1 == NULL) || (node2 == NULL))
+ return(-2);
+
+ if (node1 == node2)
+ return(0);
+
+ /*
+ * a couple of optimizations which will avoid computations in most cases
+ */
+
+ switch (node1->type) {
+ case XML_ELEMENT_NODE:
+ break;
+ case XML_ATTRIBUTE_NODE:
+ precedence1 = 1; /* element is owner */
+ miscNode1 = node1;
+ node1 = node1->parent;
+ misc = 1;
+ break;
+ case XML_TEXT_NODE:
+ case XML_CDATA_SECTION_NODE:
+ case XML_COMMENT_NODE:
+ case XML_PI_NODE: {
+ miscNode1 = node1;
+ /*
+ * Find nearest element node.
+ */
+ if (node1->prev != NULL) {
+ do {
+ node1 = node1->prev;
+ if (node1->type == XML_ELEMENT_NODE) {
+ precedence1 = 3; /* element in prev-sibl axis */
+ break;
+ }
+ if (node1->prev == NULL) {
+ precedence1 = 2; /* element is parent */
+ /*
+ * URGENT TODO: Are there any cases, where the
+ * parent of such a node is not an element node?
+ */
+ node1 = node1->parent;
+ break;
+ }
+ } while (1);
+ } else {
+ precedence1 = 2; /* element is parent */
+ node1 = node1->parent;
+ }
+ if ((node1 == NULL) || (node1->type != XML_ELEMENT_NODE)) {
+ /*
+ * Fallback for whatever case.
+ */
+ node1 = miscNode1;
+ precedence1 = 0;
+ } else
+ misc = 1;
+ }
+ break;
+ case XML_NAMESPACE_DECL:
+ /*
+ * TODO: why do we return 1 for namespace nodes?
+ */
+ return(1);
+ default:
+ break;
+ }
+ switch (node2->type) {
+ case XML_ELEMENT_NODE:
+ break;
+ case XML_ATTRIBUTE_NODE:
+ precedence2 = 1; /* element is owner */
+ miscNode2 = node2;
+ node2 = node2->parent;
+ misc = 1;
+ break;
+ case XML_TEXT_NODE:
+ case XML_CDATA_SECTION_NODE:
+ case XML_COMMENT_NODE:
+ case XML_PI_NODE: {
+ miscNode2 = node2;
+ if (node2->prev != NULL) {
+ do {
+ node2 = node2->prev;
+ if (node2->type == XML_ELEMENT_NODE) {
+ precedence2 = 3; /* element in prev-sibl axis */
+ break;
+ }
+ if (node2->prev == NULL) {
+ precedence2 = 2; /* element is parent */
+ node2 = node2->parent;
+ break;
+ }
+ } while (1);
+ } else {
+ precedence2 = 2; /* element is parent */
+ node2 = node2->parent;
+ }
+ if ((node2 == NULL) || (node2->type != XML_ELEMENT_NODE) ||
+ (0 <= (long) node1->content))
+ {
+ node2 = miscNode2;
+ precedence2 = 0;
+ } else
+ misc = 1;
+ }
+ break;
+ case XML_NAMESPACE_DECL:
+ return(1);
+ default:
+ break;
+ }
+ if (misc) {
+ if (node1 == node2) {
+ if (precedence1 == precedence2) {
+ /*
+ * The ugly case; but normally there aren't many
+ * adjacent non-element nodes around.
+ */
+ cur = miscNode2->prev;
+ while (cur != NULL) {
+ if (cur == miscNode1)
+ return(1);
+ if (cur->type == XML_ELEMENT_NODE)
+ return(-1);
+ cur = cur->prev;
+ }
+ return (-1);
+ } else {
+ /*
+ * Evaluate based on higher precedence wrt to the element.
+ * TODO: This assumes attributes are sorted before content.
+ * Is this 100% correct?
+ */
+ if (precedence1 < precedence2)
+ return(1);
+ else
+ return(-1);
+ }
+ }
+ /*
+ * Special case: One of the helper-elements is contained by the other.
+ * <foo>
+ * <node2>
+ * <node1>Text-1(precedence1 == 2)</node1>
+ * </node2>
+ * Text-6(precedence2 == 3)
+ * </foo>
+ */
+ if ((precedence2 == 3) && (precedence1 > 1)) {
+ cur = node1->parent;
+ while (cur) {
+ if (cur == node2)
+ return(1);
+ cur = cur->parent;
+ }
+ }
+ if ((precedence1 == 3) && (precedence2 > 1)) {
+ cur = node2->parent;
+ while (cur) {
+ if (cur == node1)
+ return(-1);
+ cur = cur->parent;
+ }
+ }
+ }
+
+ if (node1 == node2->prev)
+ return(1);
+ if (node1 == node2->next)
+ return(-1);
+
+ /*
+ * Speedup using document order if availble.
+ */
+ if ((node1->type == XML_ELEMENT_NODE) &&
+ (node2->type == XML_ELEMENT_NODE) &&
+ (0 > (long) node1->content) &&
+ (0 > (long) node2->content) &&
+ (node1->doc == node2->doc)) {
+ long l1, l2;
+
+ l1 = -((long) node1->content);
+ l2 = -((long) node2->content);
+ if (l1 < l2)
+ return(1);
+ if (l1 > l2)
+ return(-1);
+ }
+
+ /*
+ * compute depth to root
+ */
+ for (depth2 = 0, cur = node2;cur->parent != NULL;cur = cur->parent) {
+ if (cur == node1)
+ return(1);
+ depth2++;
+ }
+ root = cur;
+ for (depth1 = 0, cur = node1;cur->parent != NULL;cur = cur->parent) {
+ if (cur == node2)
+ return(-1);
+ depth1++;
+ }
+ /*
+ * Distinct document (or distinct entities :-( ) case.
+ */
+ if (root != cur) {
+ return(-2);
+ }
+ /*
+ * get the nearest common ancestor.
+ */
+ while (depth1 > depth2) {
+ depth1--;
+ node1 = node1->parent;
+ }
+ while (depth2 > depth1) {
+ depth2--;
+ node2 = node2->parent;
+ }
+ while (node1->parent != node2->parent) {
+ node1 = node1->parent;
+ node2 = node2->parent;
+ /* should not happen but just in case ... */
+ if ((node1 == NULL) || (node2 == NULL))
+ return(-2);
+ }
+ /*
+ * Find who's first.
+ */
+ if (node1 == node2->prev)
+ return(1);
+ if (node1 == node2->next)
+ return(-1);
+ /*
+ * Speedup using document order if availble.
+ */
+ if ((node1->type == XML_ELEMENT_NODE) &&
+ (node2->type == XML_ELEMENT_NODE) &&
+ (0 > (long) node1->content) &&
+ (0 > (long) node2->content) &&
+ (node1->doc == node2->doc)) {
+ long l1, l2;
+
+ l1 = -((long) node1->content);
+ l2 = -((long) node2->content);
+ if (l1 < l2)
+ return(1);
+ if (l1 > l2)
+ return(-1);
+ }
+
+ for (cur = node1->next;cur != NULL;cur = cur->next)
+ if (cur == node2)
+ return(1);
+ return(-1); /* assume there is no sibling list corruption */
+}
+#endif /* XP_FAST_NON_ELEM_COMPARISON */
+
/**
* xmlXPathNodeSetSort:
* @set: the node set
@@ -1711,8 +2000,14 @@ xmlXPathNodeSetSort(xmlNodeSetPtr set) {
for (i = incr; i < len; i++) {
j = i - incr;
while (j >= 0) {
+#ifdef XP_FAST_NON_ELEM_COMPARISON
+ if (xmlXPathCmpNodesExt(set->nodeTab[j],
+ set->nodeTab[j + incr]) == -1)
+#else
if (xmlXPathCmpNodes(set->nodeTab[j],
- set->nodeTab[j + incr]) == -1) {
+ set->nodeTab[j + incr]) == -1)
+#endif
+ {
tmp = set->nodeTab[j];
set->nodeTab[j] = set->nodeTab[j + incr];
set->nodeTab[j + incr] = tmp;