summaryrefslogtreecommitdiff
path: root/hg-fast-export
diff options
context:
space:
mode:
authorBen Hutchings <ben.hutchings@codethink.co.uk>2020-09-29 17:37:27 +0100
committerBen Hutchings <ben.hutchings@codethink.co.uk>2020-10-01 14:22:30 +0100
commit9b3e1939465593b72957b882765623ae416d7796 (patch)
tree5664b912674f6d8d6fb200375a91fcccb212d626 /hg-fast-export
parent641ebd75f9868753ba472e780062a7f2aed5abbb (diff)
downloadlorry-9b3e1939465593b72957b882765623ae416d7796.tar.gz
lorry: Install and use fudge_user_ids plugin for hg-fast-export
Mercurial allows arbitrary strings as user (committer) ids, while Git requires a name and email address, and specific punctuation around the address. hg-fast-export has some provision for automatically fixing-up invalid committer and author ids, but it doesn't catch everything. Its maintainer does not want to extend this, so we use a plugin instead. * Add a plugin (fudge_user_ids) that should fix up all invalid ids. * In setup.py: - Compile it at build time - Install it under a private data directory (/usr/share/lorry) - Clean up the bytecode * In gitify_hg, check whether hg-fast-export supports plugins, and where our plugins are. If this succeeds, add --plugin-path and --plugin options to enable fudge_user_ids. Closes #11.
Diffstat (limited to 'hg-fast-export')
-rw-r--r--hg-fast-export/plugins/fudge_user_ids/__init__.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/hg-fast-export/plugins/fudge_user_ids/__init__.py b/hg-fast-export/plugins/fudge_user_ids/__init__.py
new file mode 100644
index 0000000..9e81195
--- /dev/null
+++ b/hg-fast-export/plugins/fudge_user_ids/__init__.py
@@ -0,0 +1,47 @@
+# Fudge committer and author ids that git fast-import considers invalid
+# Copyright 2020 Codethink Ltd
+
+import re
+import sys
+
+from mercurial import templatefilters
+
+
+def build_filter(args):
+ return Filter(args)
+
+
+class Filter:
+ # What git considers valid (see parse_ident() in fast-import.c)
+ _valid_id_re = re.compile(rb'^[^<>]* <[^<>]+>$')
+
+ # Special characters we may need to replace
+ _id_special_re = re.compile(rb'[<>]')
+
+ def __init__(self, args):
+ pass
+
+ def commit_message_filter(self, commit_data):
+ for key in ['author', 'committer']:
+ try:
+ user_id = commit_data[key]
+ except KeyError:
+ continue
+
+ if self._valid_id_re.match(user_id):
+ continue
+
+ name = templatefilters.person(user_id)
+ email = templatefilters.email(user_id)
+
+ # Replace any special characters left in the name and email
+ name = self._id_special_re.sub(b'?', name)
+ email = self._id_special_re.sub(b'?', email)
+
+ commit_data[key] = b'%s <%s>' % (name, email)
+
+ sys.stderr.write(
+ 'Replaced %s id "%s" with "%s"\n'
+ % (key,
+ user_id.decode('utf-8', errors='replace'),
+ commit_data[key].decode('utf-8', errors='replace')))