diff options
author | Aarni Koskela <akx@iki.fi> | 2016-07-08 20:39:56 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-08 20:39:56 +0300 |
commit | 30d4897e6bc03a63eee672afa7b9080f66a13b2b (patch) | |
tree | 935fd65a4d6c1899fa057f3f4da0003b12e14413 | |
parent | 86170f409c195394753b603fdbde2ffa8583b1e7 (diff) | |
parent | 38a67790931ba3272fc395b8470e2d062d02961b (diff) | |
download | babel-30d4897e6bc03a63eee672afa7b9080f66a13b2b.tar.gz |
Merge pull request #429 from mbirtwell/multi_line_obsolete
Support reading multi line obsolete units in po files
-rw-r--r-- | .travis.yml | 6 | ||||
-rw-r--r-- | babel/messages/pofile.py | 293 | ||||
-rw-r--r-- | tests/messages/test_pofile.py | 42 |
3 files changed, 202 insertions, 139 deletions
diff --git a/.travis.yml b/.travis.yml index 978d377..08ac545 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,12 +33,6 @@ matrix: python: 3.4 - os: linux python: 3.5 - - os: osx - language: generic - env: - - PYTHON_VERSION=3.5.1 - - PYENV_ROOT=~/.pyenv - - PATH=$PYENV_ROOT/shims:$PATH:$PYENV_ROOT/bin install: - bash .ci/deps.${TRAVIS_OS_NAME}.sh diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index a775ec0..f3ba2fa 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -73,6 +73,164 @@ def denormalize(string): return unescape(string) +class PoFileParser(object): + """Support class to read messages from a ``gettext`` PO (portable object) file + and add them to a `Catalog` + + See `read_po` for simple cases. + """ + + def __init__(self, catalog, ignore_obsolete=False): + self.catalog = catalog + self.ignore_obsolete = ignore_obsolete + self.counter = 0 + self.offset = 0 + self.messages = [] + self.translations = [] + self.locations = [] + self.flags = [] + self.user_comments = [] + self.auto_comments = [] + self.obsolete = False + self.context = [] + self.in_msgid = False + self.in_msgstr = False + self.in_msgctxt = False + + def _add_message(self): + """ + Add a message to the catalog based on the current parser state and + clear the state ready to process the next message. + """ + self.translations.sort() + if len(self.messages) > 1: + msgid = tuple([denormalize(m) for m in self.messages]) + else: + msgid = denormalize(self.messages[0]) + if isinstance(msgid, (list, tuple)): + string = [] + for idx in range(self.catalog.num_plurals): + try: + string.append(self.translations[idx]) + except IndexError: + string.append((idx, '')) + string = tuple([denormalize(t[1]) for t in string]) + else: + string = denormalize(self.translations[0][1]) + if self.context: + msgctxt = denormalize('\n'.join(self.context)) + else: + msgctxt = None + message = Message(msgid, string, list(self.locations), set(self.flags), + self.auto_comments, self.user_comments, lineno=self.offset + 1, + context=msgctxt) + if self.obsolete: + if not self.ignore_obsolete: + self.catalog.obsolete[msgid] = message + else: + self.catalog[msgid] = message + del self.messages[:] + del self.translations[:] + del self.context[:] + del self.locations[:] + del self.flags[:] + del self.auto_comments[:] + del self.user_comments[:] + self.obsolete = False + self.counter += 1 + + def _process_message_line(self, lineno, line): + if line.startswith('msgid_plural'): + self.in_msgid = True + msg = line[12:].lstrip() + self.messages.append(msg) + elif line.startswith('msgid'): + self.in_msgid = True + self.offset = lineno + txt = line[5:].lstrip() + if self.messages: + self._add_message() + self.messages.append(txt) + elif line.startswith('msgstr'): + self.in_msgid = False + self.in_msgstr = True + msg = line[6:].lstrip() + if msg.startswith('['): + idx, msg = msg[1:].split(']', 1) + self.translations.append([int(idx), msg.lstrip()]) + else: + self.translations.append([0, msg]) + elif line.startswith('msgctxt'): + if self.messages: + self._add_message() + self.in_msgid = self.in_msgstr = False + self.context.append(line[7:].lstrip()) + elif line.startswith('"'): + if self.in_msgid: + self.messages[-1] += u'\n' + line.rstrip() + elif self.in_msgstr: + self.translations[-1][1] += u'\n' + line.rstrip() + elif self.in_msgctxt: + self.context.append(line.rstrip()) + + def _process_comment(self, line): + + self.in_msgid = self.in_msgstr = False + if self.messages and self.translations: + self._add_message() + if line[1:].startswith(':'): + for location in line[2:].lstrip().split(): + pos = location.rfind(':') + if pos >= 0: + try: + lineno = int(location[pos + 1:]) + except ValueError: + continue + self.locations.append((location[:pos], lineno)) + else: + self.locations.append((location, None)) + elif line[1:].startswith(','): + for flag in line[2:].lstrip().split(','): + self.flags.append(flag.strip()) + elif line[1:].startswith('.'): + # These are called auto-comments + comment = line[2:].strip() + if comment: # Just check that we're not adding empty comments + self.auto_comments.append(comment) + else: + # These are called user comments + self.user_comments.append(line[1:].strip()) + + def parse(self, fileobj): + """ + Reads from the file-like object `fileobj` and adds any po file + units found in it to the `Catalog` supplied to the constructor. + """ + + for lineno, line in enumerate(fileobj.readlines()): + line = line.strip() + if not isinstance(line, text_type): + line = line.decode(self.catalog.charset) + if line.startswith('#'): + if line[1:].startswith('~'): + self.obsolete = True + self._process_message_line(lineno, line[2:].lstrip()) + else: + self._process_comment(line) + else: + self._process_message_line(lineno, line) + + if self.messages: + self._add_message() + + # No actual messages found, but there was some info in comments, from which + # we'll construct an empty header message + elif not self.counter and (self.flags or self.user_comments or self.auto_comments): + self.messages.append(u'') + self.translations.append([0, u'']) + self._add_message() + + def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None): """Read messages from a ``gettext`` PO (portable object) file from the given file-like object and return a `Catalog`. @@ -120,139 +278,8 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=No :param charset: the character set of the catalog. """ catalog = Catalog(locale=locale, domain=domain, charset=charset) - - counter = [0] - offset = [0] - messages = [] - translations = [] - locations = [] - flags = [] - user_comments = [] - auto_comments = [] - obsolete = [False] - context = [] - in_msgid = [False] - in_msgstr = [False] - in_msgctxt = [False] - - def _add_message(): - translations.sort() - if len(messages) > 1: - msgid = tuple([denormalize(m) for m in messages]) - else: - msgid = denormalize(messages[0]) - if isinstance(msgid, (list, tuple)): - string = [] - for idx in range(catalog.num_plurals): - try: - string.append(translations[idx]) - except IndexError: - string.append((idx, '')) - string = tuple([denormalize(t[1]) for t in string]) - else: - string = denormalize(translations[0][1]) - if context: - msgctxt = denormalize('\n'.join(context)) - else: - msgctxt = None - message = Message(msgid, string, list(locations), set(flags), - auto_comments, user_comments, lineno=offset[0] + 1, - context=msgctxt) - if obsolete[0]: - if not ignore_obsolete: - catalog.obsolete[msgid] = message - else: - catalog[msgid] = message - del messages[:] - del translations[:] - del context[:] - del locations[:] - del flags[:] - del auto_comments[:] - del user_comments[:] - obsolete[0] = False - counter[0] += 1 - - def _process_message_line(lineno, line): - if line.startswith('msgid_plural'): - in_msgid[0] = True - msg = line[12:].lstrip() - messages.append(msg) - elif line.startswith('msgid'): - in_msgid[0] = True - offset[0] = lineno - txt = line[5:].lstrip() - if messages: - _add_message() - messages.append(txt) - elif line.startswith('msgstr'): - in_msgid[0] = False - in_msgstr[0] = True - msg = line[6:].lstrip() - if msg.startswith('['): - idx, msg = msg[1:].split(']', 1) - translations.append([int(idx), msg.lstrip()]) - else: - translations.append([0, msg]) - elif line.startswith('msgctxt'): - if messages: - _add_message() - in_msgid[0] = in_msgstr[0] = False - context.append(line[7:].lstrip()) - elif line.startswith('"'): - if in_msgid[0]: - messages[-1] += u'\n' + line.rstrip() - elif in_msgstr[0]: - translations[-1][1] += u'\n' + line.rstrip() - elif in_msgctxt[0]: - context.append(line.rstrip()) - - for lineno, line in enumerate(fileobj.readlines()): - line = line.strip() - if not isinstance(line, text_type): - line = line.decode(catalog.charset) - if line.startswith('#'): - in_msgid[0] = in_msgstr[0] = False - if messages and translations: - _add_message() - if line[1:].startswith(':'): - for location in line[2:].lstrip().split(): - pos = location.rfind(':') - if pos >= 0: - try: - lineno = int(location[pos + 1:]) - except ValueError: - continue - locations.append((location[:pos], lineno)) - else: - locations.append((location, None)) - elif line[1:].startswith(','): - for flag in line[2:].lstrip().split(','): - flags.append(flag.strip()) - elif line[1:].startswith('~'): - obsolete[0] = True - _process_message_line(lineno, line[2:].lstrip()) - elif line[1:].startswith('.'): - # These are called auto-comments - comment = line[2:].strip() - if comment: # Just check that we're not adding empty comments - auto_comments.append(comment) - else: - # These are called user comments - user_comments.append(line[1:].strip()) - else: - _process_message_line(lineno, line) - - if messages: - _add_message() - - # No actual messages found, but there was some info in comments, from which - # we'll construct an empty header message - elif not counter[0] and (flags or user_comments or auto_comments): - messages.append(u'') - translations.append([0, u'']) - _add_message() - + parser = PoFileParser(catalog, ignore_obsolete) + parser.parse(fileobj) return catalog diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index 1bac360..a271a84 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -164,6 +164,48 @@ msgstr "Bahr" self.assertEqual(1, len(catalog)) self.assertEqual(0, len(catalog.obsolete)) + def test_multi_line_obsolete_message(self): + buf = StringIO(r'''# This is an obsolete message +#~ msgid "" +#~ "foo" +#~ "foo" +#~ msgstr "" +#~ "Voh" +#~ "Vooooh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog.obsolete)) + message = catalog.obsolete[u'foofoo'] + self.assertEqual(u'foofoo', message.id) + self.assertEqual(u'VohVooooh', message.string) + self.assertEqual(['This is an obsolete message'], message.user_comments) + + def test_unit_following_multi_line_obsolete_message(self): + buf = StringIO(r'''# This is an obsolete message +#~ msgid "" +#~ "foo" +#~ "fooooooo" +#~ msgstr "" +#~ "Voh" +#~ "Vooooh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog)) + message = catalog[u'bar'] + self.assertEqual(u'bar', message.id) + self.assertEqual(u'Bahr', message.string) + self.assertEqual(['This message is not obsolete'], message.user_comments) + def test_with_context(self): buf = BytesIO(b'''# Some string in the menu #: main.py:1 |