From 5cbecd98c416f004928b8be21f87a1af57f0b800 Mon Sep 17 00:00:00 2001 From: Jelmer Vernooij Date: Wed, 4 Apr 2012 02:44:30 +0200 Subject: Allow less strict parsing of fastimport streams. --- NEWS | 4 ++++ fastimport/parser.py | 19 ++++++++++++++----- fastimport/tests/test_parser.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 969ef75..8702378 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,10 @@ * Cope with invalid timezones like +61800 a little bit better. (Jelmer Vernooij, #959154) + * Allow non-strict parsing of fastimport streams, when + a tagger is missing an email address. + (Jelmer Vernooij, #730607) + 0.9.1 2012-02-28 * Update FSF address in headers. (Dan Callaghan, #868800) diff --git a/fastimport/parser.py b/fastimport/parser.py index 88eecfa..cff8614 100644 --- a/fastimport/parser.py +++ b/fastimport/parser.py @@ -259,7 +259,7 @@ _WHO_RE = re.compile(r'([^<]*)<(.*)>') class ImportParser(LineBasedParser): def __init__(self, input, verbose=False, output=sys.stdout, - user_mapper=None): + user_mapper=None, strict=True): """A Parser of import commands. :param input: the file-like object to read from @@ -267,11 +267,13 @@ class ImportParser(LineBasedParser): :param output: the file-like object to write messages to (YAGNI?) :param user_mapper: if not None, the UserMapper used to adjust user-ids for authors, committers and taggers. + :param strict: Raise errors on strictly invalid data """ LineBasedParser.__init__(self, input) self.verbose = verbose self.output = output self.user_mapper = user_mapper + self.strict = strict # We auto-detect the date format when a date is first encountered self.date_parser = None self.features = {} @@ -421,7 +423,8 @@ class ImportParser(LineBasedParser): def _parse_tag(self, name): """Parse a tag command.""" from_ = self._get_from('tag') - tagger = self._get_user_info('tag', 'tagger', accept_just_who=True) + tagger = self._get_user_info('tag', 'tagger', + accept_just_who=True) message = self._get_data('tag', 'message') return commands.TagCommand(name, from_, tagger, message) @@ -524,19 +527,25 @@ class ImportParser(LineBasedParser): except ValueError: print "failed to parse datestr '%s'" % (datestr,) raise + name = match.group(1) + email = match.group(2) else: match = _WHO_RE.search(s) if accept_just_who and match: # HACK around missing time # TODO: output a warning here when = dates.DATE_PARSERS_BY_NAME['now']('now') - else: + name = match.group(1) + email = match.group(2) + elif self.strict: self.abort(errors.BadFormat, cmd, section, s) - name = match.group(1) + else: + name = s + email = None + when = dates.DATE_PARSERS_BY_NAME['now']('now') if len(name) > 0: if name[-1] == " ": name = name[:-1] - email = match.group(2) # While it shouldn't happen, some datasets have email addresses # which contain unicode characters. See bug 338186. We sanitize # the data at this level just in case. diff --git a/fastimport/tests/test_parser.py b/fastimport/tests/test_parser.py index 4bf11c7..97d062b 100644 --- a/fastimport/tests/test_parser.py +++ b/fastimport/tests/test_parser.py @@ -20,6 +20,7 @@ import StringIO import testtools from fastimport import ( + commands, errors, parser, ) @@ -304,3 +305,40 @@ class TestPathPairParsing(testtools.TestCase): p = parser.ImportParser("") self.assertEqual(['foo bar', 'baz'], p._path_pair('"foo bar" baz')) + + +class TestTagParsing(testtools.TestCase): + + def test_tagger_with_email(self): + p = parser.ImportParser(StringIO.StringIO( + "tag refs/tags/v1.0\n" + "from :xxx\n" + "tagger Joe Wong 1234567890 -0600\n" + "data 11\n" + "create v1.0")) + cmds = list(p.iter_commands()) + self.assertEquals(1, len(cmds)) + self.assertIsInstance(cmds[0], commands.TagCommand) + self.assertEquals(cmds[0].tagger, + ('Joe Wong', 'joe@example.com', 1234567890.0, -21600)) + + def test_tagger_no_email_strict(self): + p = parser.ImportParser(StringIO.StringIO( + "tag refs/tags/v1.0\n" + "from :xxx\n" + "tagger Joe Wong\n" + "data 11\n" + "create v1.0")) + self.assertRaises(errors.BadFormat, list, p.iter_commands()) + + def test_tagger_no_email_not_strict(self): + p = parser.ImportParser(StringIO.StringIO( + "tag refs/tags/v1.0\n" + "from :xxx\n" + "tagger Joe Wong\n" + "data 11\n" + "create v1.0"), strict=False) + cmds = list(p.iter_commands()) + self.assertEquals(1, len(cmds)) + self.assertIsInstance(cmds[0], commands.TagCommand) + self.assertEquals(cmds[0].tagger[:2], ('Joe Wong', None)) -- cgit v1.2.1