From f4f6653e6aa053724d2c6dc0ee71dcb928013352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Hamb=C3=BCchen?= Date: Tue, 28 Jan 2020 02:46:25 +0100 Subject: Introduce invariant that _invalid_pofile() takes unicode line. This makes debugging and reasoning about the code easier; otherwise it is surprising that sometimes `line` is a unicode and sometimes not. So far, when it was not, it could either be only `""` or `'Algo esta mal'`; thus this commit makes those two u"" strings. In all other cases, it was guaranteed that it's unicode, because all code paths leading to `_invalid_pofile()` went through if not isinstance(line, text_type): line = line.decode(self.catalog.charset) before. --- babel/messages/pofile.py | 3 ++- tests/messages/test_pofile.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 93b0697..f6771be 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -178,7 +178,7 @@ class PoFileParser(object): string = ['' for _ in range(self.catalog.num_plurals)] for idx, translation in self.translations: if idx >= self.catalog.num_plurals: - self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog") + self._invalid_pofile(u"", self.offset, "msg has more translations than num_plurals of catalog") continue string[idx] = translation.denormalize() string = tuple(string) @@ -319,6 +319,7 @@ class PoFileParser(object): self._add_message() def _invalid_pofile(self, line, lineno, msg): + assert isinstance(line, text_type) if self.abort_invalid: raise PoFileError(msg, self.catalog, line, lineno) print("WARNING:", msg) diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index e77fa6e..214ddf5 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -480,7 +480,7 @@ msgstr[2] "Vohs [text]" def test_invalid_pofile_with_abort_flag(self): parser = pofile.PoFileParser(None, abort_invalid=True) lineno = 10 - line = 'Algo esta mal' + line = u'Algo esta mal' msg = 'invalid file' with self.assertRaises(pofile.PoFileError) as e: parser._invalid_pofile(line, lineno, msg) -- cgit v1.2.1 From da7f31143847659b6b74d802618b03438aceb350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Hamb=C3=BCchen?= Date: Tue, 28 Jan 2020 00:37:22 +0100 Subject: Fix unicode printing error on Python 2 without TTY. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, on Python 2.7, `python setup.py test | cat` crashed in the test runner with ====================================================================== ERROR: test_abort_invalid_po_file (tests.messages.test_pofile.ReadPoTestCase) ---------------------------------------------------------------------- Traceback (most recent call last): File "src/babel/tests/messages/test_pofile.py", line 458, in test_abort_invalid_po_file output = pofile.read_po(buf, locale='fr', abort_invalid=False) File "src/babel/babel/messages/pofile.py", line 377, in read_po parser.parse(fileobj) File "src/babel/babel/messages/pofile.py", line 310, in parse self._process_message_line(lineno, line) File "src/babel/babel/messages/pofile.py", line 210, in _process_message_line self._process_keyword_line(lineno, line, obsolete) File "src/babel/babel/messages/pofile.py", line 222, in _process_keyword_line self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.") File "src/babel/babel/messages/pofile.py", line 325, in _invalid_pofile print(u"WARNING: Problem on line {0}: {1}".format(lineno + 1, line)) UnicodeEncodeError: 'ascii' codec can't encode character u'\xe0' in position 84: ordinal not in range(128) The test suite would show this when printing the `à` in the test pofile contents Pour toute question, veuillez communiquer avec Fulano à nadie@blah.com But this bug is not confined to the test suite only. Any call to `read_po()` with invalid .po file could trigger it in non-test code when `sys.stdout.encoding` is `None`, which is the default for Python 2 when `sys.stdout.isatty()` is false (as induced e.g. by `| cat`). The fix is to `repr()` the line when printing the WARNING. --- babel/messages/pofile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index f6771be..b86dd40 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -323,7 +323,10 @@ class PoFileParser(object): if self.abort_invalid: raise PoFileError(msg, self.catalog, line, lineno) print("WARNING:", msg) - print(u"WARNING: Problem on line {0}: {1}".format(lineno + 1, line)) + # `line` is guaranteed to be unicode so u"{}"-interpolating would always + # succeed, but on Python < 2 if not in a TTY, `sys.stdout.encoding` + # is `None`, unicode may not be printable so we `repr()` to ASCII. + print(u"WARNING: Problem on line {0}: {1}".format(lineno + 1, repr(line))) def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None, abort_invalid=False): -- cgit v1.2.1