Merge pull request #429 from mbirtwell/multi_line_obsolete

Support reading multi line obsolete units in po files
author: Aarni Koskela <akx@iki.fi> 2016-07-08 20:39:56 +0300
committer: GitHub <noreply@github.com> 2016-07-08 20:39:56 +0300
commit: 30d4897e6bc03a63eee672afa7b9080f66a13b2b (patch)
tree: 935fd65a4d6c1899fa057f3f4da0003b12e14413
parent: 86170f409c195394753b603fdbde2ffa8583b1e7 (diff)
parent: 38a67790931ba3272fc395b8470e2d062d02961b (diff)
download: babel-30d4897e6bc03a63eee672afa7b9080f66a13b2b.tar.gz
3 files changed, 202 insertions, 139 deletions
diff --git a/.travis.yml b/.travis.yml
index 978d377..08ac545 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -33,12 +33,6 @@ matrix:
       python: 3.4
     - os: linux
       python: 3.5
-    - os: osx
-      language: generic
-      env:
-      - PYTHON_VERSION=3.5.1
-      - PYENV_ROOT=~/.pyenv
-      - PATH=$PYENV_ROOT/shims:$PATH:$PYENV_ROOT/bin
 
 install:
   - bash .ci/deps.${TRAVIS_OS_NAME}.sh
diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py
index a775ec0..f3ba2fa 100644
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -73,6 +73,164 @@ def denormalize(string):
         return unescape(string)
 
 
+class PoFileParser(object):
+    """Support class to  read messages from a ``gettext`` PO (portable object) file
+    and add them to a `Catalog`
+
+    See `read_po` for simple cases.
+    """
+
+    def __init__(self, catalog, ignore_obsolete=False):
+        self.catalog = catalog
+        self.ignore_obsolete = ignore_obsolete
+        self.counter = 0
+        self.offset = 0
+        self.messages = []
+        self.translations = []
+        self.locations = []
+        self.flags = []
+        self.user_comments = []
+        self.auto_comments = []
+        self.obsolete = False
+        self.context = []
+        self.in_msgid = False
+        self.in_msgstr = False
+        self.in_msgctxt = False
+
+    def _add_message(self):
+        """
+        Add a message to the catalog based on the current parser state and
+        clear the state ready to process the next message.
+        """
+        self.translations.sort()
+        if len(self.messages) > 1:
+            msgid = tuple([denormalize(m) for m in self.messages])
+        else:
+            msgid = denormalize(self.messages[0])
+        if isinstance(msgid, (list, tuple)):
+            string = []
+            for idx in range(self.catalog.num_plurals):
+                try:
+                    string.append(self.translations[idx])
+                except IndexError:
+                    string.append((idx, ''))
+            string = tuple([denormalize(t[1]) for t in string])
+        else:
+            string = denormalize(self.translations[0][1])
+        if self.context:
+            msgctxt = denormalize('\n'.join(self.context))
+        else:
+            msgctxt = None
+        message = Message(msgid, string, list(self.locations), set(self.flags),
+                          self.auto_comments, self.user_comments, lineno=self.offset + 1,
+                          context=msgctxt)
+        if self.obsolete:
+            if not self.ignore_obsolete:
+                self.catalog.obsolete[msgid] = message
+        else:
+            self.catalog[msgid] = message
+        del self.messages[:]
+        del self.translations[:]
+        del self.context[:]
+        del self.locations[:]
+        del self.flags[:]
+        del self.auto_comments[:]
+        del self.user_comments[:]
+        self.obsolete = False
+        self.counter += 1
+
+    def _process_message_line(self, lineno, line):
+        if line.startswith('msgid_plural'):
+            self.in_msgid = True
+            msg = line[12:].lstrip()
+            self.messages.append(msg)
+        elif line.startswith('msgid'):
+            self.in_msgid = True
+            self.offset = lineno
+            txt = line[5:].lstrip()
+            if self.messages:
+                self._add_message()
+            self.messages.append(txt)
+        elif line.startswith('msgstr'):
+            self.in_msgid = False
+            self.in_msgstr = True
+            msg = line[6:].lstrip()
+            if msg.startswith('['):
+                idx, msg = msg[1:].split(']', 1)
+                self.translations.append([int(idx), msg.lstrip()])
+            else:
+                self.translations.append([0, msg])
+        elif line.startswith('msgctxt'):
+            if self.messages:
+                self._add_message()
+            self.in_msgid = self.in_msgstr = False
+            self.context.append(line[7:].lstrip())
+        elif line.startswith('"'):
+            if self.in_msgid:
+                self.messages[-1] += u'\n' + line.rstrip()
+            elif self.in_msgstr:
+                self.translations[-1][1] += u'\n' + line.rstrip()
+            elif self.in_msgctxt:
+                self.context.append(line.rstrip())
+
+    def _process_comment(self, line):
+
+        self.in_msgid = self.in_msgstr = False
+        if self.messages and self.translations:
+            self._add_message()
+        if line[1:].startswith(':'):
+            for location in line[2:].lstrip().split():
+                pos = location.rfind(':')
+                if pos >= 0:
+                    try:
+                        lineno = int(location[pos + 1:])
+                    except ValueError:
+                        continue
+                    self.locations.append((location[:pos], lineno))
+                else:
+                    self.locations.append((location, None))
+        elif line[1:].startswith(','):
+            for flag in line[2:].lstrip().split(','):
+                self.flags.append(flag.strip())
+        elif line[1:].startswith('.'):
+            # These are called auto-comments
+            comment = line[2:].strip()
+            if comment:  # Just check that we're not adding empty comments
+                self.auto_comments.append(comment)
+        else:
+            # These are called user comments
+            self.user_comments.append(line[1:].strip())
+
+    def parse(self, fileobj):
+        """
+        Reads from the file-like object `fileobj` and adds any po file
+        units found in it to the `Catalog` supplied to the constructor.
+        """
+
+        for lineno, line in enumerate(fileobj.readlines()):
+            line = line.strip()
+            if not isinstance(line, text_type):
+                line = line.decode(self.catalog.charset)
+            if line.startswith('#'):
+                if line[1:].startswith('~'):
+                    self.obsolete = True
+                    self._process_message_line(lineno, line[2:].lstrip())
+                else:
+                    self._process_comment(line)
+            else:
+                self._process_message_line(lineno, line)
+
+        if self.messages:
+            self._add_message()
+
+        # No actual messages found, but there was some info in comments, from which
+        # we'll construct an empty header message
+        elif not self.counter and (self.flags or self.user_comments or self.auto_comments):
+            self.messages.append(u'')
+            self.translations.append([0, u''])
+            self._add_message()
+
+
 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
     """Read messages from a ``gettext`` PO (portable object) file from the given
     file-like object and return a `Catalog`.
@@ -120,139 +278,8 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=No
     :param charset: the character set of the catalog.
     """
     catalog = Catalog(locale=locale, domain=domain, charset=charset)
-
-    counter = [0]
-    offset = [0]
-    messages = []
-    translations = []
-    locations = []
-    flags = []
-    user_comments = []
-    auto_comments = []
-    obsolete = [False]
-    context = []
-    in_msgid = [False]
-    in_msgstr = [False]
-    in_msgctxt = [False]
-
-    def _add_message():
-        translations.sort()
-        if len(messages) > 1:
-            msgid = tuple([denormalize(m) for m in messages])
-        else:
-            msgid = denormalize(messages[0])
-        if isinstance(msgid, (list, tuple)):
-            string = []
-            for idx in range(catalog.num_plurals):
-                try:
-                    string.append(translations[idx])
-                except IndexError:
-                    string.append((idx, ''))
-            string = tuple([denormalize(t[1]) for t in string])
-        else:
-            string = denormalize(translations[0][1])
-        if context:
-            msgctxt = denormalize('\n'.join(context))
-        else:
-            msgctxt = None
-        message = Message(msgid, string, list(locations), set(flags),
-                          auto_comments, user_comments, lineno=offset[0] + 1,
-                          context=msgctxt)
-        if obsolete[0]:
-            if not ignore_obsolete:
-                catalog.obsolete[msgid] = message
-        else:
-            catalog[msgid] = message
-        del messages[:]
-        del translations[:]
-        del context[:]
-        del locations[:]
-        del flags[:]
-        del auto_comments[:]
-        del user_comments[:]
-        obsolete[0] = False
-        counter[0] += 1
-
-    def _process_message_line(lineno, line):
-        if line.startswith('msgid_plural'):
-            in_msgid[0] = True
-            msg = line[12:].lstrip()
-            messages.append(msg)
-        elif line.startswith('msgid'):
-            in_msgid[0] = True
-            offset[0] = lineno
-            txt = line[5:].lstrip()
-            if messages:
-                _add_message()
-            messages.append(txt)
-        elif line.startswith('msgstr'):
-            in_msgid[0] = False
-            in_msgstr[0] = True
-            msg = line[6:].lstrip()
-            if msg.startswith('['):
-                idx, msg = msg[1:].split(']', 1)
-                translations.append([int(idx), msg.lstrip()])
-            else:
-                translations.append([0, msg])
-        elif line.startswith('msgctxt'):
-            if messages:
-                _add_message()
-            in_msgid[0] = in_msgstr[0] = False
-            context.append(line[7:].lstrip())
-        elif line.startswith('"'):
-            if in_msgid[0]:
-                messages[-1] += u'\n' + line.rstrip()
-            elif in_msgstr[0]:
-                translations[-1][1] += u'\n' + line.rstrip()
-            elif in_msgctxt[0]:
-                context.append(line.rstrip())
-
-    for lineno, line in enumerate(fileobj.readlines()):
-        line = line.strip()
-        if not isinstance(line, text_type):
-            line = line.decode(catalog.charset)
-        if line.startswith('#'):
-            in_msgid[0] = in_msgstr[0] = False
-            if messages and translations:
-                _add_message()
-            if line[1:].startswith(':'):
-                for location in line[2:].lstrip().split():
-                    pos = location.rfind(':')
-                    if pos >= 0:
-                        try:
-                            lineno = int(location[pos + 1:])
-                        except ValueError:
-                            continue
-                        locations.append((location[:pos], lineno))
-                    else:
-                        locations.append((location, None))
-            elif line[1:].startswith(','):
-                for flag in line[2:].lstrip().split(','):
-                    flags.append(flag.strip())
-            elif line[1:].startswith('~'):
-                obsolete[0] = True
-                _process_message_line(lineno, line[2:].lstrip())
-            elif line[1:].startswith('.'):
-                # These are called auto-comments
-                comment = line[2:].strip()
-                if comment:  # Just check that we're not adding empty comments
-                    auto_comments.append(comment)
-            else:
-                # These are called user comments
-                user_comments.append(line[1:].strip())
-        else:
-            _process_message_line(lineno, line)
-
-    if messages:
-        _add_message()
-
-    # No actual messages found, but there was some info in comments, from which
-    # we'll construct an empty header message
-    elif not counter[0] and (flags or user_comments or auto_comments):
-        messages.append(u'')
-        translations.append([0, u''])
-        _add_message()
-
+    parser = PoFileParser(catalog, ignore_obsolete)
+    parser.parse(fileobj)
     return catalog
 
 
diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py
index 1bac360..a271a84 100644
--- a/tests/messages/test_pofile.py
+++ b/tests/messages/test_pofile.py
@@ -164,6 +164,48 @@ msgstr "Bahr"
         self.assertEqual(1, len(catalog))
         self.assertEqual(0, len(catalog.obsolete))
 
+    def test_multi_line_obsolete_message(self):
+        buf = StringIO(r'''# This is an obsolete message
+#~ msgid ""
+#~ "foo"
+#~ "foo"
+#~ msgstr ""
+#~ "Voh"
+#~ "Vooooh"
+
+# This message is not obsolete
+#: main.py:1
+msgid "bar"
+msgstr "Bahr"
+''')
+        catalog = pofile.read_po(buf)
+        self.assertEqual(1, len(catalog.obsolete))
+        message = catalog.obsolete[u'foofoo']
+        self.assertEqual(u'foofoo', message.id)
+        self.assertEqual(u'VohVooooh', message.string)
+        self.assertEqual(['This is an obsolete message'], message.user_comments)
+
+    def test_unit_following_multi_line_obsolete_message(self):
+        buf = StringIO(r'''# This is an obsolete message
+#~ msgid ""
+#~ "foo"
+#~ "fooooooo"
+#~ msgstr ""
+#~ "Voh"
+#~ "Vooooh"
+
+# This message is not obsolete
+#: main.py:1
+msgid "bar"
+msgstr "Bahr"
+''')
+        catalog = pofile.read_po(buf)
+        self.assertEqual(1, len(catalog))
+        message = catalog[u'bar']
+        self.assertEqual(u'bar', message.id)
+        self.assertEqual(u'Bahr', message.string)
+        self.assertEqual(['This message is not obsolete'], message.user_comments)
+
     def test_with_context(self):
         buf = BytesIO(b'''# Some string in the menu
 #: main.py:1
author	Aarni Koskela <akx@iki.fi>	2016-07-08 20:39:56 +0300
committer	GitHub <noreply@github.com>	2016-07-08 20:39:56 +0300
commit	30d4897e6bc03a63eee672afa7b9080f66a13b2b (patch)
tree	935fd65a4d6c1899fa057f3f4da0003b12e14413
parent	86170f409c195394753b603fdbde2ffa8583b1e7 (diff)
parent	38a67790931ba3272fc395b8470e2d062d02961b (diff)
download	babel-30d4897e6bc03a63eee672afa7b9080f66a13b2b.tar.gz