summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAarni Koskela <akx@iki.fi>2016-07-08 20:39:56 +0300
committerGitHub <noreply@github.com>2016-07-08 20:39:56 +0300
commit30d4897e6bc03a63eee672afa7b9080f66a13b2b (patch)
tree935fd65a4d6c1899fa057f3f4da0003b12e14413
parent86170f409c195394753b603fdbde2ffa8583b1e7 (diff)
parent38a67790931ba3272fc395b8470e2d062d02961b (diff)
downloadbabel-30d4897e6bc03a63eee672afa7b9080f66a13b2b.tar.gz
Merge pull request #429 from mbirtwell/multi_line_obsolete
Support reading multi line obsolete units in po files
-rw-r--r--.travis.yml6
-rw-r--r--babel/messages/pofile.py293
-rw-r--r--tests/messages/test_pofile.py42
3 files changed, 202 insertions, 139 deletions
diff --git a/.travis.yml b/.travis.yml
index 978d377..08ac545 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -33,12 +33,6 @@ matrix:
python: 3.4
- os: linux
python: 3.5
- - os: osx
- language: generic
- env:
- - PYTHON_VERSION=3.5.1
- - PYENV_ROOT=~/.pyenv
- - PATH=$PYENV_ROOT/shims:$PATH:$PYENV_ROOT/bin
install:
- bash .ci/deps.${TRAVIS_OS_NAME}.sh
diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py
index a775ec0..f3ba2fa 100644
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -73,6 +73,164 @@ def denormalize(string):
return unescape(string)
+class PoFileParser(object):
+ """Support class to read messages from a ``gettext`` PO (portable object) file
+ and add them to a `Catalog`
+
+ See `read_po` for simple cases.
+ """
+
+ def __init__(self, catalog, ignore_obsolete=False):
+ self.catalog = catalog
+ self.ignore_obsolete = ignore_obsolete
+ self.counter = 0
+ self.offset = 0
+ self.messages = []
+ self.translations = []
+ self.locations = []
+ self.flags = []
+ self.user_comments = []
+ self.auto_comments = []
+ self.obsolete = False
+ self.context = []
+ self.in_msgid = False
+ self.in_msgstr = False
+ self.in_msgctxt = False
+
+ def _add_message(self):
+ """
+ Add a message to the catalog based on the current parser state and
+ clear the state ready to process the next message.
+ """
+ self.translations.sort()
+ if len(self.messages) > 1:
+ msgid = tuple([denormalize(m) for m in self.messages])
+ else:
+ msgid = denormalize(self.messages[0])
+ if isinstance(msgid, (list, tuple)):
+ string = []
+ for idx in range(self.catalog.num_plurals):
+ try:
+ string.append(self.translations[idx])
+ except IndexError:
+ string.append((idx, ''))
+ string = tuple([denormalize(t[1]) for t in string])
+ else:
+ string = denormalize(self.translations[0][1])
+ if self.context:
+ msgctxt = denormalize('\n'.join(self.context))
+ else:
+ msgctxt = None
+ message = Message(msgid, string, list(self.locations), set(self.flags),
+ self.auto_comments, self.user_comments, lineno=self.offset + 1,
+ context=msgctxt)
+ if self.obsolete:
+ if not self.ignore_obsolete:
+ self.catalog.obsolete[msgid] = message
+ else:
+ self.catalog[msgid] = message
+ del self.messages[:]
+ del self.translations[:]
+ del self.context[:]
+ del self.locations[:]
+ del self.flags[:]
+ del self.auto_comments[:]
+ del self.user_comments[:]
+ self.obsolete = False
+ self.counter += 1
+
+ def _process_message_line(self, lineno, line):
+ if line.startswith('msgid_plural'):
+ self.in_msgid = True
+ msg = line[12:].lstrip()
+ self.messages.append(msg)
+ elif line.startswith('msgid'):
+ self.in_msgid = True
+ self.offset = lineno
+ txt = line[5:].lstrip()
+ if self.messages:
+ self._add_message()
+ self.messages.append(txt)
+ elif line.startswith('msgstr'):
+ self.in_msgid = False
+ self.in_msgstr = True
+ msg = line[6:].lstrip()
+ if msg.startswith('['):
+ idx, msg = msg[1:].split(']', 1)
+ self.translations.append([int(idx), msg.lstrip()])
+ else:
+ self.translations.append([0, msg])
+ elif line.startswith('msgctxt'):
+ if self.messages:
+ self._add_message()
+ self.in_msgid = self.in_msgstr = False
+ self.context.append(line[7:].lstrip())
+ elif line.startswith('"'):
+ if self.in_msgid:
+ self.messages[-1] += u'\n' + line.rstrip()
+ elif self.in_msgstr:
+ self.translations[-1][1] += u'\n' + line.rstrip()
+ elif self.in_msgctxt:
+ self.context.append(line.rstrip())
+
+ def _process_comment(self, line):
+
+ self.in_msgid = self.in_msgstr = False
+ if self.messages and self.translations:
+ self._add_message()
+ if line[1:].startswith(':'):
+ for location in line[2:].lstrip().split():
+ pos = location.rfind(':')
+ if pos >= 0:
+ try:
+ lineno = int(location[pos + 1:])
+ except ValueError:
+ continue
+ self.locations.append((location[:pos], lineno))
+ else:
+ self.locations.append((location, None))
+ elif line[1:].startswith(','):
+ for flag in line[2:].lstrip().split(','):
+ self.flags.append(flag.strip())
+ elif line[1:].startswith('.'):
+ # These are called auto-comments
+ comment = line[2:].strip()
+ if comment: # Just check that we're not adding empty comments
+ self.auto_comments.append(comment)
+ else:
+ # These are called user comments
+ self.user_comments.append(line[1:].strip())
+
+ def parse(self, fileobj):
+ """
+ Reads from the file-like object `fileobj` and adds any po file
+ units found in it to the `Catalog` supplied to the constructor.
+ """
+
+ for lineno, line in enumerate(fileobj.readlines()):
+ line = line.strip()
+ if not isinstance(line, text_type):
+ line = line.decode(self.catalog.charset)
+ if line.startswith('#'):
+ if line[1:].startswith('~'):
+ self.obsolete = True
+ self._process_message_line(lineno, line[2:].lstrip())
+ else:
+ self._process_comment(line)
+ else:
+ self._process_message_line(lineno, line)
+
+ if self.messages:
+ self._add_message()
+
+ # No actual messages found, but there was some info in comments, from which
+ # we'll construct an empty header message
+ elif not self.counter and (self.flags or self.user_comments or self.auto_comments):
+ self.messages.append(u'')
+ self.translations.append([0, u''])
+ self._add_message()
+
+
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
"""Read messages from a ``gettext`` PO (portable object) file from the given
file-like object and return a `Catalog`.
@@ -120,139 +278,8 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=No
:param charset: the character set of the catalog.
"""
catalog = Catalog(locale=locale, domain=domain, charset=charset)
-
- counter = [0]
- offset = [0]
- messages = []
- translations = []
- locations = []
- flags = []
- user_comments = []
- auto_comments = []
- obsolete = [False]
- context = []
- in_msgid = [False]
- in_msgstr = [False]
- in_msgctxt = [False]
-
- def _add_message():
- translations.sort()
- if len(messages) > 1:
- msgid = tuple([denormalize(m) for m in messages])
- else:
- msgid = denormalize(messages[0])
- if isinstance(msgid, (list, tuple)):
- string = []
- for idx in range(catalog.num_plurals):
- try:
- string.append(translations[idx])
- except IndexError:
- string.append((idx, ''))
- string = tuple([denormalize(t[1]) for t in string])
- else:
- string = denormalize(translations[0][1])
- if context:
- msgctxt = denormalize('\n'.join(context))
- else:
- msgctxt = None
- message = Message(msgid, string, list(locations), set(flags),
- auto_comments, user_comments, lineno=offset[0] + 1,
- context=msgctxt)
- if obsolete[0]:
- if not ignore_obsolete:
- catalog.obsolete[msgid] = message
- else:
- catalog[msgid] = message
- del messages[:]
- del translations[:]
- del context[:]
- del locations[:]
- del flags[:]
- del auto_comments[:]
- del user_comments[:]
- obsolete[0] = False
- counter[0] += 1
-
- def _process_message_line(lineno, line):
- if line.startswith('msgid_plural'):
- in_msgid[0] = True
- msg = line[12:].lstrip()
- messages.append(msg)
- elif line.startswith('msgid'):
- in_msgid[0] = True
- offset[0] = lineno
- txt = line[5:].lstrip()
- if messages:
- _add_message()
- messages.append(txt)
- elif line.startswith('msgstr'):
- in_msgid[0] = False
- in_msgstr[0] = True
- msg = line[6:].lstrip()
- if msg.startswith('['):
- idx, msg = msg[1:].split(']', 1)
- translations.append([int(idx), msg.lstrip()])
- else:
- translations.append([0, msg])
- elif line.startswith('msgctxt'):
- if messages:
- _add_message()
- in_msgid[0] = in_msgstr[0] = False
- context.append(line[7:].lstrip())
- elif line.startswith('"'):
- if in_msgid[0]:
- messages[-1] += u'\n' + line.rstrip()
- elif in_msgstr[0]:
- translations[-1][1] += u'\n' + line.rstrip()
- elif in_msgctxt[0]:
- context.append(line.rstrip())
-
- for lineno, line in enumerate(fileobj.readlines()):
- line = line.strip()
- if not isinstance(line, text_type):
- line = line.decode(catalog.charset)
- if line.startswith('#'):
- in_msgid[0] = in_msgstr[0] = False
- if messages and translations:
- _add_message()
- if line[1:].startswith(':'):
- for location in line[2:].lstrip().split():
- pos = location.rfind(':')
- if pos >= 0:
- try:
- lineno = int(location[pos + 1:])
- except ValueError:
- continue
- locations.append((location[:pos], lineno))
- else:
- locations.append((location, None))
- elif line[1:].startswith(','):
- for flag in line[2:].lstrip().split(','):
- flags.append(flag.strip())
- elif line[1:].startswith('~'):
- obsolete[0] = True
- _process_message_line(lineno, line[2:].lstrip())
- elif line[1:].startswith('.'):
- # These are called auto-comments
- comment = line[2:].strip()
- if comment: # Just check that we're not adding empty comments
- auto_comments.append(comment)
- else:
- # These are called user comments
- user_comments.append(line[1:].strip())
- else:
- _process_message_line(lineno, line)
-
- if messages:
- _add_message()
-
- # No actual messages found, but there was some info in comments, from which
- # we'll construct an empty header message
- elif not counter[0] and (flags or user_comments or auto_comments):
- messages.append(u'')
- translations.append([0, u''])
- _add_message()
-
+ parser = PoFileParser(catalog, ignore_obsolete)
+ parser.parse(fileobj)
return catalog
diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py
index 1bac360..a271a84 100644
--- a/tests/messages/test_pofile.py
+++ b/tests/messages/test_pofile.py
@@ -164,6 +164,48 @@ msgstr "Bahr"
self.assertEqual(1, len(catalog))
self.assertEqual(0, len(catalog.obsolete))
+ def test_multi_line_obsolete_message(self):
+ buf = StringIO(r'''# This is an obsolete message
+#~ msgid ""
+#~ "foo"
+#~ "foo"
+#~ msgstr ""
+#~ "Voh"
+#~ "Vooooh"
+
+# This message is not obsolete
+#: main.py:1
+msgid "bar"
+msgstr "Bahr"
+''')
+ catalog = pofile.read_po(buf)
+ self.assertEqual(1, len(catalog.obsolete))
+ message = catalog.obsolete[u'foofoo']
+ self.assertEqual(u'foofoo', message.id)
+ self.assertEqual(u'VohVooooh', message.string)
+ self.assertEqual(['This is an obsolete message'], message.user_comments)
+
+ def test_unit_following_multi_line_obsolete_message(self):
+ buf = StringIO(r'''# This is an obsolete message
+#~ msgid ""
+#~ "foo"
+#~ "fooooooo"
+#~ msgstr ""
+#~ "Voh"
+#~ "Vooooh"
+
+# This message is not obsolete
+#: main.py:1
+msgid "bar"
+msgstr "Bahr"
+''')
+ catalog = pofile.read_po(buf)
+ self.assertEqual(1, len(catalog))
+ message = catalog[u'bar']
+ self.assertEqual(u'bar', message.id)
+ self.assertEqual(u'Bahr', message.string)
+ self.assertEqual(['This message is not obsolete'], message.user_comments)
+
def test_with_context(self):
buf = BytesIO(b'''# Some string in the menu
#: main.py:1