diff options
author | Jean Abou Samra <jean@abou-samra.fr> | 2023-02-12 02:46:30 +0100 |
---|---|---|
committer | Jean Abou Samra <jean@abou-samra.fr> | 2023-02-12 02:54:21 +0100 |
commit | 63bb71a5acd0d49a2ceee15098485bc34b0e8864 (patch) | |
tree | 4a4dbdc4f96d5b8a89a6cd0e45231e7f8f1473dc | |
parent | 08af5e2bab184c1b5d357ebde8c0efdbe6288e2c (diff) | |
download | babel-63bb71a5acd0d49a2ceee15098485bc34b0e8864.tar.gz |
In fuzzy matching, also .lower().strip() fuzzy candidates
This seems intended at easing fuzzy matching with trivial edits in the
msgstr (changing case and adding whitespace), but it was only done on
the new msgstr, not on the old mgstr candidates, so it was possible for
merging catalogs to miss messages.
-rw-r--r-- | babel/messages/catalog.py | 15 | ||||
-rw-r--r-- | tests/messages/test_catalog.py | 10 |
2 files changed, 14 insertions, 11 deletions
diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index dead4aa..1902643 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -803,10 +803,13 @@ class Catalog: # Prepare for fuzzy matching fuzzy_candidates = [] if not no_fuzzy_matching: - fuzzy_candidates = { - self._key_for(msgid): messages[msgid].context - for msgid in messages if msgid and messages[msgid].string - } + fuzzy_candidates = {} + for msgid in messages: + if msgid and messages[msgid].string: + key = self._key_for(msgid) + ctxt = messages[msgid].context + modified_key = key.lower().strip() + fuzzy_candidates[modified_key] = (key, ctxt) fuzzy_matches = set() def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: @@ -861,8 +864,8 @@ class Catalog: matches = get_close_matches(matchkey.lower().strip(), fuzzy_candidates.keys(), 1) if matches: - newkey = matches[0] - newctxt = fuzzy_candidates[newkey] + modified_key = matches[0] + newkey, newctxt = fuzzy_candidates[modified_key] if newctxt is not None: newkey = newkey, newctxt _merge(message, newkey, key) diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py index 273c83f..c2e7aed 100644 --- a/tests/messages/test_catalog.py +++ b/tests/messages/test_catalog.py @@ -121,16 +121,16 @@ class CatalogTestCase(unittest.TestCase): def test_update_fuzzy_matching_with_case_change(self): cat = catalog.Catalog() - cat.add('foo', 'Voh') + cat.add('FOO', 'Voh') cat.add('bar', 'Bahr') tmpl = catalog.Catalog() - tmpl.add('Foo') + tmpl.add('foo') cat.update(tmpl) assert len(cat.obsolete) == 1 - assert 'foo' not in cat + assert 'FOO' not in cat - assert cat['Foo'].string == 'Voh' - assert cat['Foo'].fuzzy is True + assert cat['foo'].string == 'Voh' + assert cat['foo'].fuzzy is True def test_update_fuzzy_matching_with_char_change(self): cat = catalog.Catalog() |