summaryrefslogtreecommitdiff
path: root/babel
diff options
context:
space:
mode:
authorMichael Birtwell <michael.birtwell@starleaf.com>2016-10-12 16:13:50 +0100
committerMichael Birtwell <michael.birtwell@starleaf.com>2016-11-21 14:32:49 +0000
commit467bf955711e9224bb9f51a9620282d98e61f6c6 (patch)
tree8ba1d2f6e25169f52745ae38ef9ac061d44e38f0 /babel
parent15365e2cc99d0fe05b90aeb2fddec44c6ffef762 (diff)
downloadbabel-467bf955711e9224bb9f51a9620282d98e61f6c6.tar.gz
pofile parsing. Fix handling of obsolete + refactor
Fixes the handling of the unit before the obsolete unit. Previously it would mark the unit before an obsolete unit as obsolete also. Some refactoring: * the transition between finishing one unit and starting the next is clearer * separate the processing of keywords and continuation lines * combine the reset and initialisation code * Make the handling of strings consistent. * Add some nascent error handling, removed some errors in test inputs
Diffstat (limited to 'babel')
-rw-r--r--babel/messages/pofile.py152
1 files changed, 99 insertions, 53 deletions
diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py
index 741e25f..2d1854b 100644
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -73,6 +73,23 @@ def denormalize(string):
return unescape(string)
+class _NormalizedString(object):
+
+ def __init__(self, *args):
+ self._strs = []
+ for arg in args:
+ self.append(arg)
+
+ def append(self, s):
+ self._strs.append(s.strip())
+
+ def denormalize(self):
+ return ''.join(map(unescape, self._strs))
+
+ def __nonzero__(self):
+ return bool(self._strs)
+
+
class PoFileParser(object):
"""Support class to read messages from a ``gettext`` PO (portable object) file
and add them to a `Catalog`
@@ -80,19 +97,29 @@ class PoFileParser(object):
See `read_po` for simple cases.
"""
+ _keywords = [
+ 'msgid',
+ 'msgstr',
+ 'msgctxt',
+ 'msgid_plural',
+ ]
+
def __init__(self, catalog, ignore_obsolete=False):
self.catalog = catalog
self.ignore_obsolete = ignore_obsolete
self.counter = 0
self.offset = 0
+ self._reset_message_state()
+
+ def _reset_message_state(self):
self.messages = []
self.translations = []
self.locations = []
self.flags = []
self.user_comments = []
self.auto_comments = []
+ self.context = None
self.obsolete = False
- self.context = []
self.in_msgid = False
self.in_msgstr = False
self.in_msgctxt = False
@@ -104,9 +131,9 @@ class PoFileParser(object):
"""
self.translations.sort()
if len(self.messages) > 1:
- msgid = tuple([denormalize(m) for m in self.messages])
+ msgid = tuple([m.denormalize() for m in self.messages])
else:
- msgid = denormalize(self.messages[0])
+ msgid = self.messages[0].denormalize()
if isinstance(msgid, (list, tuple)):
string = []
for idx in range(self.catalog.num_plurals):
@@ -114,11 +141,11 @@ class PoFileParser(object):
string.append(self.translations[idx])
except IndexError:
string.append((idx, ''))
- string = tuple([denormalize(t[1]) for t in string])
+ string = tuple([t[1].denormalize() for t in string])
else:
- string = denormalize(self.translations[0][1])
+ string = self.translations[0][1].denormalize()
if self.context:
- msgctxt = denormalize('\n'.join(self.context))
+ msgctxt = self.context.denormalize()
else:
msgctxt = None
message = Message(msgid, string, list(self.locations), set(self.flags),
@@ -129,55 +156,70 @@ class PoFileParser(object):
self.catalog.obsolete[msgid] = message
else:
self.catalog[msgid] = message
- del self.messages[:]
- del self.translations[:]
- del self.context[:]
- del self.locations[:]
- del self.flags[:]
- del self.auto_comments[:]
- del self.user_comments[:]
- self.obsolete = False
self.counter += 1
+ self._reset_message_state()
- def _process_message_line(self, lineno, line):
- if line.startswith('msgid_plural'):
- self.in_msgid = True
- msg = line[12:].lstrip()
- self.messages.append(msg)
- elif line.startswith('msgid'):
- self.in_msgid = True
+ def _finish_current_message(self):
+ if self.messages:
+ self._add_message()
+
+ def _process_message_line(self, lineno, line, obsolete=False):
+ if line.startswith('"'):
+ self._process_string_continuation_line(line, lineno)
+ else:
+ self._process_keyword_line(lineno, line, obsolete)
+
+ def _process_keyword_line(self, lineno, line, obsolete=False):
+
+ for keyword in self._keywords:
+ if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:
+ arg = line[len(keyword):]
+ break
+ else:
+ self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")
+ return
+
+ if keyword in ['msgid', 'msgctxt']:
+ self._finish_current_message()
+
+ self.obsolete = obsolete
+
+ # The line that has the msgid is stored as the offset of the msg
+ # should this be the msgctxt if it has one?
+ if keyword == 'msgid':
self.offset = lineno
- txt = line[5:].lstrip()
- if self.messages:
- self._add_message()
- self.messages.append(txt)
- elif line.startswith('msgstr'):
+
+ if keyword in ['msgid', 'msgid_plural']:
+ self.in_msgid = True
+ self.messages.append(_NormalizedString(arg))
+
+ elif keyword == 'msgstr':
self.in_msgid = False
self.in_msgstr = True
- msg = line[6:].lstrip()
- if msg.startswith('['):
- idx, msg = msg[1:].split(']', 1)
- self.translations.append([int(idx), msg.lstrip()])
+ if arg.startswith('['):
+ idx, msg = arg[1:].split(']', 1)
+ self.translations.append([int(idx), _NormalizedString(msg)])
else:
- self.translations.append([0, msg])
- elif line.startswith('msgctxt'):
- if self.messages:
- self._add_message()
- self.in_msgid = self.in_msgstr = False
- self.context.append(line[7:].lstrip())
- elif line.startswith('"'):
- if self.in_msgid:
- self.messages[-1] += u'\n' + line.rstrip()
- elif self.in_msgstr:
- self.translations[-1][1] += u'\n' + line.rstrip()
- elif self.in_msgctxt:
- self.context.append(line.rstrip())
+ self.translations.append([0, _NormalizedString(arg)])
+ elif keyword == 'msgctxt':
+ self.context = _NormalizedString(arg)
+
+ def _process_string_continuation_line(self, line, lineno):
+ if self.in_msgid:
+ s = self.messages[-1]
+ elif self.in_msgstr:
+ s = self.translations[-1][1]
+ elif self.in_msgctxt:
+ s = self.context
+ else:
+ self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
+ return
+ s.append(line)
def _process_comment(self, line):
- self.in_msgid = self.in_msgstr = False
- if self.messages and self.translations:
- self._add_message()
+ self._finish_current_message()
+
if line[1:].startswith(':'):
for location in line[2:].lstrip().split():
pos = location.rfind(':')
@@ -211,25 +253,29 @@ class PoFileParser(object):
line = line.strip()
if not isinstance(line, text_type):
line = line.decode(self.catalog.charset)
+ if not line:
+ continue
if line.startswith('#'):
if line[1:].startswith('~'):
- self.obsolete = True
- self._process_message_line(lineno, line[2:].lstrip())
+ self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
else:
self._process_comment(line)
else:
self._process_message_line(lineno, line)
- if self.messages:
- self._add_message()
+ self._finish_current_message()
# No actual messages found, but there was some info in comments, from which
# we'll construct an empty header message
- elif not self.counter and (self.flags or self.user_comments or self.auto_comments):
- self.messages.append(u'')
- self.translations.append([0, u''])
+ if not self.counter and (self.flags or self.user_comments or self.auto_comments):
+ self.messages.append(_NormalizedString(u'""'))
+ self.translations.append([0, _NormalizedString(u'""')])
self._add_message()
+ def _invalid_pofile(self, line, lineno, msg):
+ print("WARNING:", msg)
+ print("WARNING: Problem on line {0}: {1}".format(lineno + 1, line))
+
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
"""Read messages from a ``gettext`` PO (portable object) file from the given