summaryrefslogtreecommitdiff
path: root/sphinx/versioning.py
diff options
context:
space:
mode:
authorDasIch <dasdasich@gmail.com>2010-08-15 20:34:08 +0200
committerDasIch <dasdasich@gmail.com>2010-08-15 20:34:08 +0200
commitef09641f0e85e73e0f38a8c4e4e3006678e978c8 (patch)
tree2da9c9d7c047628c6807a1c168d84c9042330772 /sphinx/versioning.py
parentb0721f4dd2d20828624d07ab0d6a0a8a8980fa0e (diff)
downloadsphinx-ef09641f0e85e73e0f38a8c4e4e3006678e978c8.tar.gz
Replaced the merging algorithm with one that handles similarities better, it's awfully slow though, if anybody has a better idea please implement it
Diffstat (limited to 'sphinx/versioning.py')
-rw-r--r--sphinx/versioning.py112
1 files changed, 26 insertions, 86 deletions
diff --git a/sphinx/versioning.py b/sphinx/versioning.py
index d0ea18a7..0b2b1f24 100644
--- a/sphinx/versioning.py
+++ b/sphinx/versioning.py
@@ -10,12 +10,9 @@
:license: BSD, see LICENSE for details.
"""
from uuid import uuid4
+from operator import itemgetter
+from collections import defaultdict
from itertools import product
-try:
- from itertools import izip_longest as zip_longest
-except ImportError:
- from itertools import zip_longest
-from difflib import SequenceMatcher
from sphinx.util import PeekableIterator
@@ -34,19 +31,6 @@ def add_uids(doctree, condition):
node.uid = uuid4().hex
yield node
-def merge_node(old, new):
- """
- Merges the `old` node with the `new` one, if it's successful the `new` node
- get's the unique identifier of the `new` one and ``True`` is returned. If
- the merge is unsuccesful ``False`` is returned.
- """
- equals, changed, replaced = make_diff(old.rawsource,
- new.rawsource)
- if equals or changed:
- new.uid = old.uid
- return True
- return False
-
def merge_doctrees(old, new, condition):
"""
Merges the `old` doctree with the `new` one while looking at nodes matching
@@ -58,78 +42,34 @@ def merge_doctrees(old, new, condition):
:param condition:
A callable which returns either ``True`` or ``False`` for a given node.
"""
- old_iter = PeekableIterator(old.traverse(condition))
- new_iter = PeekableIterator(new.traverse(condition))
- old_nodes = []
- new_nodes = []
- for old_node, new_node in zip_longest(old_iter, new_iter):
- if old_node is None:
- new_nodes.append(new_node)
- continue
- if new_node is None:
- old_nodes.append(old_node)
+ old_nodes = old.traverse(condition)
+ new_nodes = new.traverse(condition)
+ ratios = defaultdict(list)
+ for old_node, new_node in product(old_nodes, new_nodes):
+ ratios[old_node, new_node] = get_ratio(old_node.rawsource,
+ new_node.rawsource)
+ ratios = sorted(ratios.iteritems(), key=itemgetter(1))
+ seen = set()
+ for (old_node, new_node), ratio in ratios:
+ if new_node in seen:
continue
- if not merge_node(old_node, new_node):
- if old_nodes:
- for i, very_old_node in enumerate(old_nodes):
- if merge_node(very_old_node, new_node):
- del old_nodes[i]
- # If the last identified node which has not matched the
- # unidentified node matches the current one, we have to
- # assume that the last unidentified one has been
- # inserted.
- #
- # As the required time multiplies with each insert, we
- # want to avoid that by checking if the next
- # unidentified node matches the current identified one
- # and if so we make a shift.
- if i == len(old_nodes):
- next_new_node = new_iter.next()
- if not merge_node(old_node, next_new_node):
- new_iter.push(next_new_node)
- break
- else:
- old_nodes.append(old_node)
- new_nodes.append(new_node)
- for (i, new_node), (j, old_node) in product(enumerate(new_nodes),
- enumerate(old_nodes)):
- if merge_node(old_node, new_node):
- del new_nodes[i]
- del old_nodes[j]
- for node in new_nodes:
- node.uid = uuid4().hex
- # Yielding the new nodes here makes it possible to use this generator
- # like add_uids
- yield node
-
-def make_diff(old, new):
+ else:
+ seen.add(new_node)
+ if ratio < 65:
+ new_node.uid = old_node.uid
+ else:
+ new_node.uid = uuid4().hex
+ yield new_node
+
+def get_ratio(old, new):
"""
- Takes two strings `old` and `new` and returns a :class:`tuple` of boolean
- values ``(equals, changed, replaced)``.
-
- equals
-
- ``True`` if the `old` string and the `new` one are equal.
-
- changed
-
- ``True`` if the `new` string is a changed version of the `old` one.
-
- replaced
-
- ``True`` if the `new` string and the `old` string are totally
- different.
-
- .. note:: This assumes the two strings are human readable text or at least
- something very similar to that, otherwise it can not detect if
- the string has been changed or replaced. In any case the
- detection should not be considered reliable.
+ Returns a "similiarity ratio" representing the similarity between the two
+ strings where 0 is equal and anything above less than equal.
"""
if old == new:
- return True, False, False
- if new in old or levenshtein_distance(old, new) / (len(old) / 100.0) < 70:
- return False, True, False
- return False, False, True
+ return 0
+ ratio = levenshtein_distance(old, new) / (len(old) / 100.0)
+ return ratio
def levenshtein_distance(a, b):
if len(a) < len(b):