summaryrefslogtreecommitdiff
path: root/i18n/polib.py
diff options
context:
space:
mode:
Diffstat (limited to 'i18n/polib.py')
-rw-r--r--i18n/polib.py1501
1 files changed, 771 insertions, 730 deletions
diff --git a/i18n/polib.py b/i18n/polib.py
index c09aebc..88428ce 100644
--- a/i18n/polib.py
+++ b/i18n/polib.py
@@ -5,48 +5,113 @@
# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
"""
-**polib** allows you to manipulate, create, modify gettext files (pot, po and
-mo files). You can load existing files, iterate through it's entries, add,
-modify entries, comments or metadata, etc. or create new po files from scratch.
-
-**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
-:func:`~polib.mofile` convenience functions.
+**polib** allows you to manipulate, create, modify gettext files (pot, po
+and mo files). You can load existing files, iterate through it's entries,
+add, modify entries, comments or metadata, etc... or create new po files
+from scratch.
+
+**polib** provides a simple and pythonic API, exporting only three
+convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
+four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
+new files/entries.
+
+**Basic example**:
+
+>>> import polib
+>>> # load an existing po file
+>>> po = polib.pofile('tests/test_utf8.po')
+>>> for entry in po:
+... # do something with entry...
+... pass
+>>> # add an entry
+>>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
+>>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
+>>> po.append(entry)
+>>> # to save our modified po file:
+>>> # po.save()
+>>> # or you may want to compile the po file
+>>> # po.save_as_mofile('tests/test_utf8.mo')
"""
-__author__ = 'David Jean Louis <izimobil@gmail.com>'
-__version__ = '0.6.4'
+__author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
+__version__ = '0.5.2'
__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
-import array
import codecs
-import os
-import re
import struct
-import sys
import textwrap
import types
+import re
-
-# the default encoding to use when encoding cannot be detected
default_encoding = 'utf-8'
-# _pofile_or_mofile {{{
+# function pofile() {{{
-def _pofile_or_mofile(f, type, **kwargs):
+def pofile(fpath, **kwargs):
"""
- Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
- honor the DRY concept.
+ Convenience function that parse the po/pot file *fpath* and return
+ a POFile instance.
+
+ **Keyword arguments**:
+ - *fpath*: string, full or relative path to the po/pot file to parse
+ - *wrapwidth*: integer, the wrap width, only useful when -w option was
+ passed to xgettext (optional, default to 78)
+ - *autodetect_encoding*: boolean, if set to False the function will
+ not try to detect the po file encoding (optional, default to True)
+ - *encoding*: string, an encoding, only relevant if autodetect_encoding
+ is set to False
+ - *check_for_duplicates*: whether to check for duplicate entries when
+ adding entries to the file, default: False (optional)
+
+ **Example**:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_weird_occurrences.po',
+ ... check_for_duplicates=True)
+ >>> po #doctest: +ELLIPSIS
+ <POFile instance at ...>
+ >>> import os, tempfile
+ >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',
+ ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',
+ ... 'occurrences', 'flags', 'previous_msgctxt',
+ ... 'previous_msgid', 'previous_msgid_plural')
+ >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
+ ... orig_po = polib.pofile('tests/'+fname)
+ ... tmpf = tempfile.NamedTemporaryFile().name
+ ... orig_po.save(tmpf)
+ ... try:
+ ... new_po = polib.pofile(tmpf)
+ ... for old, new in zip(orig_po, new_po):
+ ... for attr in all_attrs:
+ ... if getattr(old, attr) != getattr(new, attr):
+ ... getattr(old, attr)
+ ... getattr(new, attr)
+ ... finally:
+ ... os.unlink(tmpf)
+ >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
+ >>> tmpf = tempfile.NamedTemporaryFile().name
+ >>> po_file.save_as_mofile(tmpf)
+ >>> try:
+ ... mo_file = polib.mofile(tmpf)
+ ... for old, new in zip(po_file, mo_file):
+ ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
+ ... 'OLD: ', po_file._encode(old.msgid)
+ ... 'NEW: ', mo_file._encode(new.msgid)
+ ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
+ ... 'OLD: ', po_file._encode(old.msgstr)
+ ... 'NEW: ', mo_file._encode(new.msgstr)
+ ... print new.msgstr
+ ... finally:
+ ... os.unlink(tmpf)
"""
- # get the file encoding
- enc = kwargs.get('encoding')
- if enc is None:
- enc = detect_encoding(f, type == 'mofile')
-
- # parse the file
- kls = type == 'pofile' and _POFileParser or _MOFileParser
- parser = kls(
- f,
+ if kwargs.get('autodetect_encoding', True):
+ enc = detect_encoding(fpath)
+ else:
+ enc = kwargs.get('encoding', default_encoding)
+ check_for_duplicates = kwargs.get('check_for_duplicates', False)
+ parser = _POFileParser(
+ fpath,
encoding=enc,
check_for_duplicates=kwargs.get('check_for_duplicates', False)
)
@@ -55,108 +120,95 @@ def _pofile_or_mofile(f, type, **kwargs):
return instance
# }}}
-# function pofile() {{{
-
-def pofile(pofile, **kwargs):
- """
- Convenience function that parses the po or pot file ``pofile`` and returns
- a :class:`~polib.POFile` instance.
-
- Arguments:
-
- ``pofile``
- string, full or relative path to the po/pot file or its content (data).
-
- ``wrapwidth``
- integer, the wrap width, only useful when the ``-w`` option was passed
- to xgettext (optional, default: ``78``).
-
- ``encoding``
- string, the encoding to use (e.g. "utf-8") (default: ``None``, the
- encoding will be auto-detected).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
- """
- return _pofile_or_mofile(pofile, 'pofile', **kwargs)
-
-# }}}
# function mofile() {{{
-def mofile(mofile, **kwargs):
+def mofile(fpath, **kwargs):
"""
- Convenience function that parses the mo file ``mofile`` and returns a
- :class:`~polib.MOFile` instance.
-
- Arguments:
-
- ``mofile``
- string, full or relative path to the mo file or its content (data).
-
- ``wrapwidth``
- integer, the wrap width, only useful when the ``-w`` option was passed
- to xgettext to generate the po file that was used to format the mo file
- (optional, default: ``78``).
-
- ``encoding``
- string, the encoding to use (e.g. "utf-8") (default: ``None``, the
- encoding will be auto-detected).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
+ Convenience function that parse the mo file *fpath* and return
+ a MOFile instance.
+
+ **Keyword arguments**:
+ - *fpath*: string, full or relative path to the mo file to parse
+ - *wrapwidth*: integer, the wrap width, only useful when -w option was
+ passed to xgettext to generate the po file that was used to format
+ the mo file (optional, default to 78)
+ - *autodetect_encoding*: boolean, if set to False the function will
+ not try to detect the po file encoding (optional, default to True)
+ - *encoding*: string, an encoding, only relevant if autodetect_encoding
+ is set to False
+ - *check_for_duplicates*: whether to check for duplicate entries when
+ adding entries to the file, default: False (optional)
+
+ **Example**:
+
+ >>> import polib
+ >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
+ >>> mo #doctest: +ELLIPSIS
+ <MOFile instance at ...>
+ >>> import os, tempfile
+ >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
+ ... orig_mo = polib.mofile('tests/'+fname)
+ ... tmpf = tempfile.NamedTemporaryFile().name
+ ... orig_mo.save(tmpf)
+ ... try:
+ ... new_mo = polib.mofile(tmpf)
+ ... for old, new in zip(orig_mo, new_mo):
+ ... if old.msgid != new.msgid:
+ ... old.msgstr
+ ... new.msgstr
+ ... finally:
+ ... os.unlink(tmpf)
"""
- return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+ if kwargs.get('autodetect_encoding', True):
+ enc = detect_encoding(fpath, True)
+ else:
+ enc = kwargs.get('encoding', default_encoding)
+ parser = _MOFileParser(
+ fpath,
+ encoding=enc,
+ check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ )
+ instance = parser.parse()
+ instance.wrapwidth = kwargs.get('wrapwidth', 78)
+ return instance
# }}}
# function detect_encoding() {{{
-def detect_encoding(file, binary_mode=False):
+def detect_encoding(fpath, binary_mode=False):
"""
- Try to detect the encoding used by the ``file``. The ``file`` argument can
- be a PO or MO file path or a string containing the contents of the file.
- If the encoding cannot be detected, the function will return the value of
- ``default_encoding``.
-
- Arguments:
-
- ``file``
- string, full or relative path to the po/mo file or its content.
-
- ``binary_mode``
- boolean, set this to True if ``file`` is a mo file.
+ Try to detect the encoding used by the file *fpath*. The function will
+ return polib default *encoding* if it's unable to detect it.
+
+ **Keyword argument**:
+ - *fpath*: string, full or relative path to the mo file to parse.
+
+ **Examples**:
+
+ >>> print(detect_encoding('tests/test_noencoding.po'))
+ utf-8
+ >>> print(detect_encoding('tests/test_utf8.po'))
+ UTF-8
+ >>> print(detect_encoding('tests/test_utf8.mo', True))
+ UTF-8
+ >>> print(detect_encoding('tests/test_iso-8859-15.po'))
+ ISO_8859-15
+ >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
+ ISO_8859-15
"""
+ import re
rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
-
- def charset_exists(charset):
- """Check whether ``charset`` is valid or not."""
- try:
- codecs.lookup(charset)
- except LookupError:
- return False
- return True
-
- if not os.path.exists(file):
- match = rx.search(file)
- if match:
- enc = match.group(1).strip()
- if charset_exists(enc):
- return enc
+ if binary_mode:
+ mode = 'rb'
else:
- if binary_mode:
- mode = 'rb'
- else:
- mode = 'r'
- f = open(file, mode)
- for l in f.readlines():
- match = rx.search(l)
- if match:
- f.close()
- enc = match.group(1).strip()
- if charset_exists(enc):
- return enc
- f.close()
+ mode = 'r'
+ f = open(fpath, mode)
+ for l in f.readlines():
+ match = rx.search(l)
+ if match:
+ f.close()
+ return match.group(1).strip()
+ f.close()
return default_encoding
# }}}
@@ -164,8 +216,12 @@ def detect_encoding(file, binary_mode=False):
def escape(st):
"""
- Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
- the given string ``st`` and returns it.
+ Escape special chars and return the given string *st*.
+
+ **Examples**:
+
+ >>> escape('\\t and \\n and \\r and " and \\\\')
+ '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
"""
return st.replace('\\', r'\\')\
.replace('\t', r'\t')\
@@ -178,8 +234,18 @@ def escape(st):
def unescape(st):
"""
- Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
- the given string ``st`` and returns it.
+ Unescape special chars and return the given string *st*.
+
+ **Examples**:
+
+ >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
+ '\\t and \\n and \\r and " and \\\\'
+ >>> unescape(r'\\n')
+ '\\n'
+ >>> unescape(r'\\\\n')
+ '\\\\n'
+ >>> unescape(r'\\\\n\\n')
+ '\\\\n\\n'
"""
def unescape_repl(m):
m = m.group(1)
@@ -199,36 +265,27 @@ def unescape(st):
class _BaseFile(list):
"""
- Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
- classes. This class should **not** be instanciated directly.
+ Common parent class for POFile and MOFile classes.
+ This class must **not** be instanciated directly.
"""
def __init__(self, *args, **kwargs):
"""
- Constructor, accepts the following keyword arguments:
-
- ``pofile``
- string, the path to the po or mo file, or its content as a string.
-
- ``wrapwidth``
- integer, the wrap width, only useful when the ``-w`` option was
- passed to xgettext (optional, default: ``78``).
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
+ Constructor.
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file, (optional, default: ``False``).
+ **Keyword arguments**:
+ - *fpath*: string, path to po or mo file
+ - *wrapwidth*: integer, the wrap width, only useful when -w option
+ was passed to xgettext to generate the po file that was used to
+ format the mo file, default to 78 (optional),
+ - *encoding*: string, the encoding to use, defaults to
+ "default_encoding" global variable (optional),
+ - *check_for_duplicates*: whether to check for duplicate entries
+ when adding entries to the file, default: False (optional).
"""
list.__init__(self)
# the opened file handle
- pofile = kwargs.get('pofile', None)
- if pofile and os.path.exists(pofile):
- self.fpath = pofile
- else:
- self.fpath = kwargs.get('fpath')
+ self.fpath = kwargs.get('fpath')
# the width at which lines should be wrapped
self.wrapwidth = kwargs.get('wrapwidth', 78)
# the file encoding
@@ -241,56 +298,66 @@ class _BaseFile(list):
self.metadata = {}
self.metadata_is_fuzzy = 0
- def __unicode__(self):
+ def __str__(self):
"""
- Returns the unicode representation of the file.
+ String representation of the file.
"""
ret = []
entries = [self.metadata_as_entry()] + \
[e for e in self if not e.obsolete]
for entry in entries:
- ret.append(entry.__unicode__(self.wrapwidth))
+ ret.append(entry.__str__(self.wrapwidth))
for entry in self.obsolete_entries():
- ret.append(entry.__unicode__(self.wrapwidth))
- ret = '\n'.join(ret)
-
- if type(ret) != types.UnicodeType:
- return unicode(ret, self.encoding)
- return ret
-
- def __str__(self):
- """
- Returns the string representation of the file.
- """
- return unicode(self).encode(self.encoding)
+ ret.append(entry.__str__(self.wrapwidth))
+ return '\n'.join(ret)
def __contains__(self, entry):
"""
- Overriden ``list`` method to implement the membership test (in and
- not in).
- The method considers that an entry is in the file if it finds an entry
- that has the same msgid (the test is **case sensitive**).
-
- Argument:
-
- ``entry``
- an instance of :class:`~polib._BaseEntry`.
+ Overriden method to implement the membership test (in and not in).
+ The method considers that an entry is in the file if it finds an
+ entry that has the same msgid (case sensitive).
+
+ **Keyword argument**:
+ - *entry*: an instance of polib._BaseEntry
+
+ **Tests**:
+ >>> po = POFile()
+ >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+ >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
+ >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
+ >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
+ >>> po.append(e1)
+ >>> po.append(e2)
+ >>> e1 in po
+ True
+ >>> e2 not in po
+ False
+ >>> e3 in po
+ True
+ >>> e4 in po
+ False
"""
return self.find(entry.msgid, by='msgid') is not None
-
- def __eq__(self, other):
- return unicode(self) == unicode(other)
def append(self, entry):
"""
Overriden method to check for duplicates entries, if a user tries to
- add an entry that is already in the file, the method will raise a
- ``ValueError`` exception.
-
- Argument:
-
- ``entry``
- an instance of :class:`~polib._BaseEntry`.
+ add an entry that already exists, the method will raise a ValueError
+ exception.
+
+ **Keyword argument**:
+ - *entry*: an instance of polib._BaseEntry
+
+ **Tests**:
+ >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+ >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
+ >>> po = POFile(check_for_duplicates=True)
+ >>> po.append(e1)
+ >>> try:
+ ... po.append(e2)
+ ... except ValueError, e:
+ ... unicode(e)
+ u'Entry "foobar" already exists'
"""
if self.check_for_duplicates and entry in self:
raise ValueError('Entry "%s" already exists' % entry.msgid)
@@ -299,50 +366,70 @@ class _BaseFile(list):
def insert(self, index, entry):
"""
Overriden method to check for duplicates entries, if a user tries to
- add an entry that is already in the file, the method will raise a
- ``ValueError`` exception.
-
- Arguments:
-
- ``index``
- index at which the entry should be inserted.
-
- ``entry``
- an instance of :class:`~polib._BaseEntry`.
+ insert an entry that already exists, the method will raise a ValueError
+ exception.
+
+ **Keyword arguments**:
+ - *index*: index at which the entry should be inserted
+ - *entry*: an instance of polib._BaseEntry
+
+ **Tests**:
+ >>> import polib
+ >>> polib.check_for_duplicates = True
+ >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+ >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
+ >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
+ >>> po = POFile(check_for_duplicates=True)
+ >>> po.insert(0, e1)
+ >>> po.insert(1, e2)
+ >>> try:
+ ... po.insert(0, e3)
+ ... except ValueError, e:
+ ... unicode(e)
+ u'Entry "foobar" already exists'
"""
if self.check_for_duplicates and entry in self:
raise ValueError('Entry "%s" already exists' % entry.msgid)
super(_BaseFile, self).insert(index, entry)
+ def __repr__(self):
+ """Return the official string representation of the object."""
+ return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
def metadata_as_entry(self):
"""
- Returns the file metadata as a :class:`~polib.POFile` instance.
+ Return the metadata as an entry:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_fuzzy_header.po')
+ >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
+ True
"""
e = POEntry(msgid='')
mdata = self.ordered_metadata()
if mdata:
strs = []
+ e._multiline_str['msgstr'] = ''
for name, value in mdata:
# Strip whitespace off each line in a multi-line entry
strs.append('%s: %s' % (name, value))
e.msgstr = '\n'.join(strs) + '\n'
+ e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
+ [s + '\n' for s in strs])
if self.metadata_is_fuzzy:
e.flags.append('fuzzy')
return e
def save(self, fpath=None, repr_method='__str__'):
"""
- Saves the po file to ``fpath``.
- If it is an existing file and no ``fpath`` is provided, then the
- existing file is rewritten with the modified data.
-
- Keyword arguments:
+ Save the po file to file *fpath* if no file handle exists for
+ the object. If there's already an open file and no fpath is
+ provided, then the existing file is rewritten with the modified
+ data.
- ``fpath``
- string, full or relative path to the file.
-
- ``repr_method``
- string, the method to use for output.
+ **Keyword arguments**:
+ - *fpath*: string, full or relative path to the file.
+ - *repr_method*: string, the method to use for output.
"""
if self.fpath is None and fpath is None:
raise IOError('You must provide a file path to save() method')
@@ -357,47 +444,38 @@ class _BaseFile(list):
contents = contents.decode(self.encoding)
fhandle.write(contents)
fhandle.close()
- # set the file path if not set
- if self.fpath is None and fpath:
- self.fpath = fpath
- def find(self, st, by='msgid', include_obsolete_entries=False,
- msgctxt=False):
+ def find(self, st, by='msgid'):
"""
- Find the entry which msgid (or property identified by the ``by``
- argument) matches the string ``st``.
-
- Keyword arguments:
+ Find entry which msgid (or property identified by the *by*
+ attribute) matches the string *st*.
- ``st``
- string, the string to search for.
+ **Keyword arguments**:
+ - *st*: string, the string to search for
+ - *by*: string, the comparison attribute
- ``by``
- string, the property to use for comparison (default: ``msgid``).
+ **Examples**:
- ``include_obsolete_entries``
- boolean, whether to also search in entries that are obsolete.
-
- ``msgctxt``
- string, allows to specify a specific message context for the
- search.
+ >>> po = pofile('tests/test_utf8.po')
+ >>> entry = po.find('Thursday')
+ >>> entry.msgstr
+ u'Jueves'
+ >>> entry = po.find('Some unexistant msgid')
+ >>> entry is None
+ True
+ >>> entry = po.find('Jueves', 'msgstr')
+ >>> entry.msgid
+ u'Thursday'
"""
- if include_obsolete_entries:
- entries = self[:]
- else:
- entries = [e for e in self if not e.obsolete]
- for e in entries:
+ for e in self:
if getattr(e, by) == st:
- if msgctxt and e.msgctxt != msgctxt:
- continue
return e
return None
def ordered_metadata(self):
"""
- Convenience method that returns an ordered version of the metadata
- dictionnary. The return value is list of tuples (metadata name,
- metadata_value).
+ Convenience method that return the metadata ordered. The return
+ value is list of tuples (metadata name, metadata_value).
"""
# copy the dict first
metadata = self.metadata.copy()
@@ -419,10 +497,9 @@ class _BaseFile(list):
ordered_data.append((data, value))
except KeyError:
pass
- # the rest of the metadata will be alphabetically ordered since there
- # are no specs for this AFAIK
+ # the rest of the metadata won't be ordered there are no specs for this
keys = metadata.keys()
- keys.sort()
+ list(keys).sort()
for data in keys:
value = metadata[data]
ordered_data.append((data, value))
@@ -430,51 +507,45 @@ class _BaseFile(list):
def to_binary(self):
"""
- Return the binary representation of the file.
+ Return the mofile binary representation.
"""
+ import array
+ import struct
+ import types
offsets = []
entries = self.translated_entries()
# the keys are sorted in the .mo file
def cmp(_self, other):
- # msgfmt compares entries with msgctxt if it exists
- self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
- other_msgid = other.msgctxt and other.msgctxt or other.msgid
- if self_msgid > other_msgid:
+ if _self.msgid > other.msgid:
return 1
- elif self_msgid < other_msgid:
+ elif _self.msgid < other.msgid:
return -1
else:
return 0
# add metadata entry
entries.sort(cmp)
mentry = self.metadata_as_entry()
- #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+ mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
entries = [mentry] + entries
entries_len = len(entries)
ids, strs = '', ''
for e in entries:
# For each string, we need size and file offset. Each string is
# NUL terminated; the NUL does not count into the size.
- msgid = ''
- if e.msgctxt:
- # Contexts are stored by storing the concatenation of the
- # context, a <EOT> byte, and the original string
- msgid = self._encode(e.msgctxt + '\4')
if e.msgid_plural:
indexes = e.msgstr_plural.keys()
indexes.sort()
msgstr = []
for index in indexes:
msgstr.append(e.msgstr_plural[index])
- msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
+ msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
msgstr = self._encode('\0'.join(msgstr))
else:
- msgid += self._encode(e.msgid)
+ msgid = self._encode(e.msgid)
msgstr = self._encode(e.msgstr)
offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
ids += msgid + '\0'
strs += msgstr + '\0'
-
# The header is 7 32-bit unsigned integers.
keystart = 7*4+16*entries_len
# and the values start after the keys
@@ -487,31 +558,22 @@ class _BaseFile(list):
koffsets += [l1, o1+keystart]
voffsets += [l2, o2+valuestart]
offsets = koffsets + voffsets
- # check endianness for magic number
- if struct.pack('@h', 1) == struct.pack('<h', 1):
- magic_number = MOFile.LITTLE_ENDIAN
- else:
- magic_number = MOFile.BIG_ENDIAN
-
- output = struct.pack(
- "Iiiiiii",
- magic_number, # Magic number
- 0, # Version
- entries_len, # # of entries
- 7*4, # start of key index
- 7*4+entries_len*8, # start of value index
- 0, keystart # size and offset of hash table
- # Important: we don't use hash tables
- )
- output += array.array("i", offsets).tostring()
+ output = struct.pack("IIIIIII",
+ 0x950412de, # Magic number
+ 0, # Version
+ entries_len, # # of entries
+ 7*4, # start of key index
+ 7*4+entries_len*8, # start of value index
+ 0, 0) # size and offset of hash table
+ output += array.array("I", offsets).tostring()
output += ids
output += strs
return output
def _encode(self, mixed):
"""
- Encodes the given ``mixed`` argument with the file encoding if and
- only if it's an unicode string and returns the encoded string.
+ Encode the given argument with the file encoding if the type is unicode
+ and return the encoded string.
"""
if type(mixed) == types.UnicodeType:
return mixed.encode(self.encoding)
@@ -521,43 +583,88 @@ class _BaseFile(list):
# class POFile {{{
class POFile(_BaseFile):
- """
+ '''
Po (or Pot) file reader/writer.
- This class inherits the :class:`~polib._BaseFile` class and, by extension,
- the python ``list`` type.
- """
+ POFile objects inherit the list objects methods.
+
+ **Example**:
+
+ >>> po = POFile()
+ >>> entry1 = POEntry(
+ ... msgid="Some english text",
+ ... msgstr="Un texte en anglais"
+ ... )
+ >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
+ >>> entry1.comment = "Some useful comment"
+ >>> entry2 = POEntry(
+ ... msgid="Peace in some languages",
+ ... msgstr="Pace سلام שלום Hasîtî 和平"
+ ... )
+ >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
+ >>> entry2.comment = "Another useful comment"
+ >>> entry3 = POEntry(
+ ... msgid='Some entry with quotes " \\"',
+ ... msgstr='Un message unicode avec des quotes " \\"'
+ ... )
+ >>> entry3.comment = "Test string quoting"
+ >>> po.append(entry1)
+ >>> po.append(entry2)
+ >>> po.append(entry3)
+ >>> po.header = "Some Header"
+ >>> print(po)
+ # Some Header
+ msgid ""
+ msgstr ""
+ <BLANKLINE>
+ #. Some useful comment
+ #: testfile:12 another_file:1
+ msgid "Some english text"
+ msgstr "Un texte en anglais"
+ <BLANKLINE>
+ #. Another useful comment
+ #: testfile:15 another_file:5
+ msgid "Peace in some languages"
+ msgstr "Pace سلام שלום Hasîtî 和平"
+ <BLANKLINE>
+ #. Test string quoting
+ msgid "Some entry with quotes \\" \\""
+ msgstr "Un message unicode avec des quotes \\" \\""
+ <BLANKLINE>
+ '''
- def __unicode__(self):
- """
- Returns the unicode representation of the po file.
- """
+ def __str__(self):
+ """Return the string representation of the po file"""
ret, headers = '', self.header.split('\n')
for header in headers:
if header[:1] in [',', ':']:
ret += '#%s\n' % header
else:
ret += '# %s\n' % header
-
- if type(ret) != types.UnicodeType:
- ret = unicode(ret, self.encoding)
-
- return ret + _BaseFile.__unicode__(self)
+ return ret + _BaseFile.__str__(self)
def save_as_mofile(self, fpath):
"""
- Saves the binary representation of the file to given ``fpath``.
-
- Keyword argument:
+ Save the binary representation of the file to *fpath*.
- ``fpath``
- string, full or relative path to the mo file.
+ **Keyword arguments**:
+ - *fpath*: string, full or relative path to the file.
"""
_BaseFile.save(self, fpath, 'to_binary')
def percent_translated(self):
"""
- Convenience method that returns the percentage of translated
+ Convenience method that return the percentage of translated
messages.
+
+ **Example**:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_pofile_helpers.po')
+ >>> po.percent_translated()
+ 50
+ >>> po = POFile()
+ >>> po.percent_translated()
+ 100
"""
total = len([e for e in self if not e.obsolete])
if total == 0:
@@ -567,52 +674,91 @@ class POFile(_BaseFile):
def translated_entries(self):
"""
- Convenience method that returns the list of translated entries.
+ Convenience method that return a list of translated entries.
+
+ **Example**:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_pofile_helpers.po')
+ >>> len(po.translated_entries())
+ 6
"""
return [e for e in self if e.translated()]
def untranslated_entries(self):
"""
- Convenience method that returns the list of untranslated entries.
+ Convenience method that return a list of untranslated entries.
+
+ **Example**:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_pofile_helpers.po')
+ >>> len(po.untranslated_entries())
+ 4
"""
return [e for e in self if not e.translated() and not e.obsolete \
and not 'fuzzy' in e.flags]
def fuzzy_entries(self):
"""
- Convenience method that returns the list of fuzzy entries.
+ Convenience method that return the list of 'fuzzy' entries.
+
+ **Example**:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_pofile_helpers.po')
+ >>> len(po.fuzzy_entries())
+ 2
"""
return [e for e in self if 'fuzzy' in e.flags]
def obsolete_entries(self):
"""
- Convenience method that returns the list of obsolete entries.
+ Convenience method that return the list of obsolete entries.
+
+ **Example**:
+
+ >>> import polib
+ >>> po = polib.pofile('tests/test_pofile_helpers.po')
+ >>> len(po.obsolete_entries())
+ 4
"""
return [e for e in self if e.obsolete]
def merge(self, refpot):
"""
- Convenience method that merges the current pofile with the pot file
+ XXX this could not work if encodings are different, needs thinking
+ and general refactoring of how polib handles encoding...
+
+ Convenience method that merge the current pofile with the pot file
provided. It behaves exactly as the gettext msgmerge utility:
- * comments of this file will be preserved, but extracted comments and
- occurrences will be discarded;
- * any translations or comments in the file will be discarded, however,
- dot comments and file positions will be preserved;
- * the fuzzy flags are preserved.
+ - comments of this file will be preserved, but extracted comments
+ and occurrences will be discarded
+ - any translations or comments in the file will be discarded,
+ however dot comments and file positions will be preserved
- Keyword argument:
+ **Keyword argument**:
+ - *refpot*: object POFile, the reference catalog.
- ``refpot``
- object POFile, the reference catalog.
+ **Example**:
+
+ >>> import polib
+ >>> refpot = polib.pofile('tests/test_merge.pot')
+ >>> po = polib.pofile('tests/test_merge_before.po')
+ >>> po.merge(refpot)
+ >>> expected_po = polib.pofile('tests/test_merge_after.po')
+ >>> unicode(po) == unicode(expected_po)
+ True
"""
for entry in refpot:
- e = self.find(entry.msgid, include_obsolete_entries=True)
+ e = self.find(entry.msgid)
if e is None:
e = POEntry()
self.append(e)
e.merge(entry)
- # ok, now we must "obsolete" entries that are not in the refpot anymore
+ # ok, now we must "obsolete" entries that are not in the refpot
+ # anymore
for entry in self:
if refpot.find(entry.msgid) is None:
entry.obsolete = True
@@ -621,18 +767,48 @@ class POFile(_BaseFile):
# class MOFile {{{
class MOFile(_BaseFile):
- """
+ '''
Mo file reader/writer.
- This class inherits the :class:`~polib._BaseFile` class and, by
- extension, the python ``list`` type.
- """
- BIG_ENDIAN = 0xde120495
- LITTLE_ENDIAN = 0x950412de
+ MOFile objects inherit the list objects methods.
+
+ **Example**:
+
+ >>> mo = MOFile()
+ >>> entry1 = POEntry(
+ ... msgid="Some english text",
+ ... msgstr="Un texte en anglais"
+ ... )
+ >>> entry2 = POEntry(
+ ... msgid="I need my dirty cheese",
+ ... msgstr="Je veux mon sale fromage"
+ ... )
+ >>> entry3 = MOEntry(
+ ... msgid='Some entry with quotes " \\"',
+ ... msgstr='Un message unicode avec des quotes " \\"'
+ ... )
+ >>> mo.append(entry1)
+ >>> mo.append(entry2)
+ >>> mo.append(entry3)
+ >>> print(mo)
+ msgid ""
+ msgstr ""
+ <BLANKLINE>
+ msgid "Some english text"
+ msgstr "Un texte en anglais"
+ <BLANKLINE>
+ msgid "I need my dirty cheese"
+ msgstr "Je veux mon sale fromage"
+ <BLANKLINE>
+ msgid "Some entry with quotes \\" \\""
+ msgstr "Un message unicode avec des quotes \\" \\""
+ <BLANKLINE>
+ '''
def __init__(self, *args, **kwargs):
"""
- Constructor, accepts all keywords arguments accepted by
- :class:`~polib._BaseFile` class.
+ MOFile constructor. Mo files have two other properties:
+ - magic_number: the magic_number of the binary file,
+ - version: the version of the mo spec.
"""
_BaseFile.__init__(self, *args, **kwargs)
self.magic_number = None
@@ -640,23 +816,19 @@ class MOFile(_BaseFile):
def save_as_pofile(self, fpath):
"""
- Saves the mofile as a pofile to ``fpath``.
+ Save the string representation of the file to *fpath*.
- Keyword argument:
-
- ``fpath``
- string, full or relative path to the file.
+ **Keyword argument**:
+ - *fpath*: string, full or relative path to the file.
"""
_BaseFile.save(self, fpath)
- def save(self, fpath=None):
+ def save(self, fpath):
"""
- Saves the mofile to ``fpath``.
-
- Keyword argument:
+ Save the binary representation of the file to *fpath*.
- ``fpath``
- string, full or relative path to the file.
+ **Keyword argument**:
+ - *fpath*: string, full or relative path to the file.
"""
_BaseFile.save(self, fpath, 'to_binary')
@@ -695,47 +867,29 @@ class MOFile(_BaseFile):
class _BaseEntry(object):
"""
- Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
- This class should **not** be instanciated directly.
+ Base class for POEntry or MOEntry objects.
+ This class must *not* be instanciated directly.
"""
def __init__(self, *args, **kwargs):
- """
- Constructor, accepts the following keyword arguments:
-
- ``msgid``
- string, the entry msgid.
-
- ``msgstr``
- string, the entry msgstr.
-
- ``msgid_plural``
- string, the entry msgid_plural.
-
- ``msgstr_plural``
- list, the entry msgstr_plural lines.
-
- ``msgctxt``
- string, the entry context (msgctxt).
-
- ``obsolete``
- bool, whether the entry is "obsolete" or not.
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
- """
+ """Base Entry constructor."""
self.msgid = kwargs.get('msgid', '')
self.msgstr = kwargs.get('msgstr', '')
self.msgid_plural = kwargs.get('msgid_plural', '')
self.msgstr_plural = kwargs.get('msgstr_plural', {})
- self.msgctxt = kwargs.get('msgctxt', None)
self.obsolete = kwargs.get('obsolete', False)
self.encoding = kwargs.get('encoding', default_encoding)
+ self.msgctxt = kwargs.get('msgctxt', None)
+ self._multiline_str = {}
- def __unicode__(self, wrapwidth=78):
+ def __repr__(self):
+ """Return the official string representation of the object."""
+ return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
+ def __str__(self, wrapwidth=78):
"""
- Returns the unicode representation of the entry.
+ Common string representation of the POEntry and MOEntry
+ objects.
"""
if self.obsolete:
delflag = '#~ '
@@ -744,12 +898,12 @@ class _BaseEntry(object):
ret = []
# write the msgctxt if any
if self.msgctxt is not None:
- ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+ ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
# write the msgid
- ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
+ ret += self._str_field("msgid", delflag, "", self.msgid)
# write the msgid_plural if any
if self.msgid_plural:
- ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+ ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
if self.msgstr_plural:
# write the msgstr_plural if any
msgstrs = self.msgstr_plural
@@ -758,51 +912,23 @@ class _BaseEntry(object):
for index in keys:
msgstr = msgstrs[index]
plural_index = '[%s]' % index
- ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+ ret += self._str_field("msgstr", delflag, plural_index, msgstr)
else:
# otherwise write the msgstr
- ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+ ret += self._str_field("msgstr", delflag, "", self.msgstr)
ret.append('')
- ret = '\n'.join(ret)
-
- if type(ret) != types.UnicodeType:
- return unicode(ret, self.encoding)
- return ret
-
- def __str__(self):
- """
- Returns the string representation of the entry.
- """
- return unicode(self).encode(self.encoding)
-
- def __eq__(self, other):
- return unicode(self) == unicode(other)
+ return '\n'.join(ret)
- def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
- lines = field.splitlines(True)
- if len(lines) > 1:
- lines = [''] + lines # start with initial empty line
+ def _str_field(self, fieldname, delflag, plural_index, field):
+ if (fieldname + plural_index) in self._multiline_str:
+ field = self._multiline_str[fieldname + plural_index]
+ lines = [''] + field.split('__POLIB__NL__')
else:
- escaped_field = escape(field)
- specialchars_count = 0
- for c in ['\\', '\n', '\r', '\t', '"']:
- specialchars_count += field.count(c)
- # comparison must take into account fieldname length + one space
- # + 2 quotes (eg. msgid "<string>")
- flength = len(fieldname) + 3
- if plural_index:
- flength += len(plural_index)
- real_wrapwidth = wrapwidth - flength + specialchars_count
- if wrapwidth > 0 and len(field) > real_wrapwidth:
- # Wrap the line but take field name into account
- lines = [''] + [unescape(item) for item in wrap(
- escaped_field,
- wrapwidth - 2, # 2 for quotes ""
- drop_whitespace=False,
- break_long_words=False
- )]
+ lines = field.splitlines(True)
+ if len(lines) > 1:
+ lines = ['']+lines # start with initial empty line
else:
- lines = [field]
+ lines = [field] # needed for the empty string case
if fieldname.startswith('previous_'):
# quick and dirty trick to get the real field name
fieldname = fieldname[9:]
@@ -819,33 +945,50 @@ class _BaseEntry(object):
class POEntry(_BaseEntry):
"""
Represents a po file entry.
+
+ **Examples**:
+
+ >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
+ >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
+ >>> print(entry)
+ #: welcome.py:12 anotherfile.py:34
+ msgid "Welcome"
+ msgstr "Bienvenue"
+ <BLANKLINE>
+ >>> entry = POEntry()
+ >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
+ >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
+ >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
+ >>> entry.flags.append('c-format')
+ >>> entry.previous_msgctxt = '@somecontext'
+ >>> entry.previous_msgid = 'I had eggs but no spam !'
+ >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
+ >>> entry.msgctxt = '@somenewcontext'
+ >>> entry.msgid = 'I have spam but no egg !'
+ >>> entry.msgid_plural = 'I have spam and %d eggs !'
+ >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
+ >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
+ >>> print(entry)
+ #. A plural translation. This is a very very very long line please do not
+ #. wrap, this is just for testing comment wrapping...
+ # A plural translation. This is a very very very long line please do not wrap,
+ # this is just for testing comment wrapping...
+ #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
+ #: src/eggs.c:45
+ #, c-format
+ #| msgctxt "@somecontext"
+ #| msgid "I had eggs but no spam !"
+ #| msgid_plural "I had eggs and %d spam !"
+ msgctxt "@somenewcontext"
+ msgid "I have spam but no egg !"
+ msgid_plural "I have spam and %d eggs !"
+ msgstr[0] "J'ai du jambon mais aucun oeuf !"
+ msgstr[1] "J'ai du jambon et %d oeufs !"
+ <BLANKLINE>
"""
def __init__(self, *args, **kwargs):
- """
- Constructor, accepts the following keyword arguments:
-
- ``comment``
- string, the entry comment.
-
- ``tcomment``
- string, the entry translator comment.
-
- ``occurrences``
- list, the entry occurrences.
-
- ``flags``
- list, the entry flags.
-
- ``previous_msgctxt``
- string, the entry previous context.
-
- ``previous_msgid``
- string, the entry previous msgid.
-
- ``previous_msgid_plural``
- string, the entry previous msgid_plural.
- """
+ """POEntry constructor."""
_BaseEntry.__init__(self, *args, **kwargs)
self.comment = kwargs.get('comment', '')
self.tcomment = kwargs.get('tcomment', '')
@@ -855,31 +998,33 @@ class POEntry(_BaseEntry):
self.previous_msgid = kwargs.get('previous_msgid', None)
self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
- def __unicode__(self, wrapwidth=78):
+ def __str__(self, wrapwidth=78):
"""
- Returns the unicode representation of the entry.
+ Return the string representation of the entry.
"""
if self.obsolete:
- return _BaseEntry.__unicode__(self, wrapwidth)
-
+ return _BaseEntry.__str__(self)
ret = []
- # comments first, if any (with text wrapping as xgettext does)
- comments = [('comment', '#. '), ('tcomment', '# ')]
- for c in comments:
- val = getattr(self, c[0])
- if val:
- for comment in val.split('\n'):
- if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
- ret += wrap(
- comment,
- wrapwidth,
- initial_indent=c[1],
- subsequent_indent=c[1],
- break_long_words=False
- )
- else:
- ret.append('%s%s' % (c[1], comment))
-
+ # comment first, if any (with text wrapping as xgettext does)
+ if self.comment != '':
+ for comment in self.comment.split('\n'):
+ if wrapwidth > 0 and len(comment) > wrapwidth-3:
+ ret += textwrap.wrap(comment, wrapwidth,
+ initial_indent='#. ',
+ subsequent_indent='#. ',
+ break_long_words=False)
+ else:
+ ret.append('#. %s' % comment)
+ # translator comment, if any (with text wrapping as xgettext does)
+ if self.tcomment != '':
+ for tcomment in self.tcomment.split('\n'):
+ if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
+ ret += textwrap.wrap(tcomment, wrapwidth,
+ initial_indent='# ',
+ subsequent_indent='# ',
+ break_long_words=False)
+ else:
+ ret.append('# %s' % tcomment)
# occurrences (with text wrapping as xgettext does)
if self.occurrences:
filelist = []
@@ -889,43 +1034,79 @@ class POEntry(_BaseEntry):
else:
filelist.append(fpath)
filestr = ' '.join(filelist)
- if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
- # textwrap split words that contain hyphen, this is not
+ if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
+ # XXX textwrap split words that contain hyphen, this is not
# what we want for filenames, so the dirty hack is to
# temporally replace hyphens with a char that a file cannot
# contain, like "*"
- ret += [l.replace('*', '-') for l in wrap(
- filestr.replace('-', '*'),
- wrapwidth,
- initial_indent='#: ',
- subsequent_indent='#: ',
- break_long_words=False
- )]
+ lines = textwrap.wrap(filestr.replace('-', '*'),
+ wrapwidth,
+ initial_indent='#: ',
+ subsequent_indent='#: ',
+ break_long_words=False)
+ # end of the replace hack
+ for line in lines:
+ ret.append(line.replace('*', '-'))
else:
- ret.append('#: ' + filestr)
-
- # flags (TODO: wrapping ?)
+ ret.append('#: '+filestr)
+ # flags
if self.flags:
- ret.append('#, %s' % ', '.join(self.flags))
+ flags = []
+ for flag in self.flags:
+ flags.append(flag)
+ ret.append('#, %s' % ', '.join(flags))
# previous context and previous msgid/msgid_plural
- fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
- for f in fields:
- val = getattr(self, f)
- if val:
- ret += self._str_field(f, "#| ", "", val, wrapwidth)
-
- ret.append(_BaseEntry.__unicode__(self, wrapwidth))
- ret = '\n'.join(ret)
-
- if type(ret) != types.UnicodeType:
- return unicode(ret, self.encoding)
- return ret
+ if self.previous_msgctxt:
+ ret += self._str_field("previous_msgctxt", "#| ", "",
+ self.previous_msgctxt)
+ if self.previous_msgid:
+ ret += self._str_field("previous_msgid", "#| ", "",
+ self.previous_msgid)
+ if self.previous_msgid_plural:
+ ret += self._str_field("previous_msgid_plural", "#| ", "",
+ self.previous_msgid_plural)
+
+ ret.append(_BaseEntry.__str__(self))
+ return '\n'.join(ret)
def __cmp__(self, other):
- """
+ '''
Called by comparison operations if rich comparison is not defined.
- """
+
+ **Tests**:
+ >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
+ >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
+ >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
+ >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
+ >>> po = POFile()
+ >>> po.append(a)
+ >>> po.append(b)
+ >>> po.append(c1)
+ >>> po.append(c2)
+ >>> po.sort()
+ >>> print(po)
+ #
+ msgid ""
+ msgstr ""
+ <BLANKLINE>
+ #: a.py:1 a.py:3
+ msgid "c2"
+ msgstr ""
+ <BLANKLINE>
+ #: a.py:1 b.py:1
+ msgid "c1"
+ msgstr ""
+ <BLANKLINE>
+ #: b.py:1 b.py:3
+ msgid "a"
+ msgstr ""
+ <BLANKLINE>
+ #: b.py:1 b.py:3
+ msgid "b"
+ msgstr ""
+ <BLANKLINE>
+ '''
def compare_occurrences(a, b):
"""
Compare an entry occurrence with another one.
@@ -972,8 +1153,7 @@ class POEntry(_BaseEntry):
def translated(self):
"""
- Returns ``True`` if the entry has been translated or ``False``
- otherwise.
+ Return True if the entry has been translated or False.
"""
if self.obsolete or 'fuzzy' in self.flags:
return False
@@ -990,19 +1170,11 @@ class POEntry(_BaseEntry):
"""
Merge the current entry with the given pot entry.
"""
- self.msgid = other.msgid
- self.msgctxt = other.msgctxt
- self.occurrences = other.occurrences
- self.comment = other.comment
- fuzzy = 'fuzzy' in self.flags
- self.flags = other.flags[:] # clone flags
- if fuzzy:
- self.flags.append('fuzzy')
+ self.msgid = other.msgid
+ self.occurrences = other.occurrences
+ self.comment = other.comment
+ self.flags = other.flags
self.msgid_plural = other.msgid_plural
- self.obsolete = other.obsolete
- self.previous_msgctxt = other.previous_msgctxt
- self.previous_msgid = other.previous_msgid
- self.previous_msgid_plural = other.previous_msgid_plural
if other.msgstr_plural:
for pos in other.msgstr_plural:
try:
@@ -1017,8 +1189,23 @@ class POEntry(_BaseEntry):
class MOEntry(_BaseEntry):
"""
Represents a mo file entry.
+
+ **Examples**:
+
+ >>> entry = MOEntry()
+ >>> entry.msgid = 'translate me !'
+ >>> entry.msgstr = 'traduisez moi !'
+ >>> print(entry)
+ msgid "translate me !"
+ msgstr "traduisez moi !"
+ <BLANKLINE>
"""
- pass
+
+ def __str__(self, wrapwidth=78):
+ """
+ Return the string representation of the entry.
+ """
+ return _BaseEntry.__str__(self, wrapwidth)
# }}}
# class _POFileParser {{{
@@ -1029,37 +1216,28 @@ class _POFileParser(object):
file format.
"""
- def __init__(self, pofile, *args, **kwargs):
+ def __init__(self, fpath, *args, **kwargs):
"""
Constructor.
- Keyword arguments:
-
- ``pofile``
- string, path to the po file or its content
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
+ **Arguments**:
+ - *fpath*: string, path to the po file
+ - *encoding*: string, the encoding to use, defaults to
+ "default_encoding" global variable (optional),
+ - *check_for_duplicates*: whether to check for duplicate entries
+ when adding entries to the file, default: False (optional).
"""
enc = kwargs.get('encoding', default_encoding)
- if os.path.exists(pofile):
- try:
- self.fhandle = codecs.open(pofile, 'rU', enc)
- except LookupError:
- enc = default_encoding
- self.fhandle = codecs.open(pofile, 'rU', enc)
- else:
- self.fhandle = pofile.splitlines()
-
+ check_dup = kwargs.get('check_for_duplicates', False)
+ try:
+ self.fhandle = codecs.open(fpath, 'rU', enc)
+ except LookupError:
+ enc = default_encoding
+ self.fhandle = codecs.open(fpath, 'rU', enc)
self.instance = POFile(
- pofile=pofile,
+ fpath=fpath,
encoding=enc,
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ check_for_duplicates=check_dup
)
self.transitions = {}
self.current_entry = POEntry()
@@ -1111,103 +1289,59 @@ class _POFileParser(object):
Run the state machine, parse the file line by line and call process()
with the current matched symbol.
"""
- i = 0
-
- keywords = {
- 'msgctxt': 'CT',
- 'msgid': 'MI',
- 'msgstr': 'MS',
- 'msgid_plural': 'MP',
- }
- prev_keywords = {
- 'msgid_plural': 'PP',
- 'msgid': 'PM',
- 'msgctxt': 'PC',
- }
-
+ i, lastlen = 1, 0
for line in self.fhandle:
- i += 1
line = line.strip()
if line == '':
+ i = i+1
continue
-
- tokens = line.split(None, 2)
- nb_tokens = len(tokens)
-
- if tokens[0] == '#~' and nb_tokens > 1:
- line = line[3:].strip()
- tokens = tokens[1:]
- nb_tokens -= 1
+ if line[:3] == '#~ ':
+ line = line[3:]
self.entry_obsolete = 1
else:
self.entry_obsolete = 0
-
- # Take care of keywords like
- # msgid, msgid_plural, msgctxt & msgstr.
- if tokens[0] in keywords and nb_tokens > 1:
- line = line[len(tokens[0]):].lstrip()
- self.current_token = line
- self.process(keywords[tokens[0]], i)
- continue
-
self.current_token = line
-
- if tokens[0] == '#:' and nb_tokens > 1:
+ if line[:2] == '#:':
# we are on a occurrences line
self.process('OC', i)
-
- elif line[:1] == '"':
- # we are on a continuation line
+ elif line[:9] == 'msgctxt "':
+ # we are on a msgctxt
+ self.process('CT', i)
+ elif line[:7] == 'msgid "':
+ # we are on a msgid
+ self.process('MI', i)
+ elif line[:8] == 'msgstr "':
+ # we are on a msgstr
+ self.process('MS', i)
+ elif line[:1] == '"' or line[:4] == '#| "':
+ # we are on a continuation line or some metadata
self.process('MC', i)
-
+ elif line[:14] == 'msgid_plural "':
+ # we are on a msgid plural
+ self.process('MP', i)
elif line[:7] == 'msgstr[':
# we are on a msgstr plural
self.process('MX', i)
-
- elif tokens[0] == '#,' and nb_tokens > 1:
+ elif line[:3] == '#, ':
# we are on a flags line
self.process('FL', i)
-
- elif tokens[0] == '#':
- if line == '#': line += ' '
+ elif line[:2] == '# ' or line == '#':
+ if line == '#': line = line + ' '
# we are on a translator comment line
self.process('TC', i)
-
- elif tokens[0] == '#.' and nb_tokens > 1:
+ elif line[:2] == '#.':
# we are on a generated comment line
self.process('GC', i)
-
- elif tokens[0] == '#|':
- if nb_tokens < 2:
- self.process('??', i)
- continue
-
- # Remove the marker and any whitespace right after that.
- line = line[2:].lstrip()
- self.current_token = line
-
- if tokens[1].startswith('"'):
- # Continuation of previous metadata.
- self.process('MC', i)
- continue
-
- if nb_tokens == 2:
- # Invalid continuation line.
- self.process('??', i)
-
- # we are on a "previous translation" comment line,
- if tokens[1] not in prev_keywords:
- # Unknown keyword in previous translation comment.
- self.process('??', i)
-
- # Remove the keyword and any whitespace
- # between it and the starting quote.
- line = line[len(tokens[1]):].lstrip()
- self.current_token = line
- self.process(prev_keywords[tokens[1]], i)
-
- else:
- self.process('??', i)
+ elif line[:15] == '#| msgid_plural':
+ # we are on a previous msgid_plural
+ self.process('PP', i)
+ elif line[:8] == '#| msgid':
+ self.process('PM', i)
+ # we are on a previous msgid
+ elif line[:10] == '#| msgctxt':
+ # we are on a previous msgctxt
+ self.process('PC', i)
+ i = i+1
if self.current_entry:
# since entries are added when another entry is found, we must add
@@ -1229,24 +1363,17 @@ class _POFileParser(object):
if key is not None:
self.instance.metadata[key] += '\n'+ msg.strip()
# close opened file
- if isinstance(self.fhandle, file):
- self.fhandle.close()
+ self.fhandle.close()
return self.instance
def add(self, symbol, states, next_state):
"""
Add a transition to the state machine.
-
Keywords arguments:
- ``symbol``
- string, the matched token (two chars symbol).
-
- ``states``
- list, a list of states (two chars symbols).
-
- ``next_state``
- the next state the fsm will have after the action.
+ symbol -- string, the matched token (two chars symbol)
+ states -- list, a list of states (two chars symbols)
+ next_state -- the next state the fsm will have after the action
"""
for state in states:
action = getattr(self, 'handle_%s' % next_state.lower())
@@ -1258,12 +1385,8 @@ class _POFileParser(object):
symbol provided.
Keywords arguments:
-
- ``symbol``
- string, the matched token (two chars symbol).
-
- ``linenum``
- integer, the current line number of the parsed file.
+ symbol -- string, the matched token (two chars symbol)
+ linenum -- integer, the current line number of the parsed file
"""
try:
(action, state) = self.transitions[(symbol, self.current_state)]
@@ -1333,7 +1456,7 @@ class _POFileParser(object):
self.instance.append(self.current_entry)
self.current_entry = POEntry()
self.current_entry.previous_msgid_plural = \
- unescape(self.current_token[1:-1])
+ unescape(self.current_token[17:-1])
return True
def handle_pm(self):
@@ -1342,7 +1465,7 @@ class _POFileParser(object):
self.instance.append(self.current_entry)
self.current_entry = POEntry()
self.current_entry.previous_msgid = \
- unescape(self.current_token[1:-1])
+ unescape(self.current_token[10:-1])
return True
def handle_pc(self):
@@ -1351,7 +1474,7 @@ class _POFileParser(object):
self.instance.append(self.current_entry)
self.current_entry = POEntry()
self.current_entry.previous_msgctxt = \
- unescape(self.current_token[1:-1])
+ unescape(self.current_token[12:-1])
return True
def handle_ct(self):
@@ -1359,7 +1482,7 @@ class _POFileParser(object):
if self.current_state in ['MC', 'MS', 'MX']:
self.instance.append(self.current_entry)
self.current_entry = POEntry()
- self.current_entry.msgctxt = unescape(self.current_token[1:-1])
+ self.current_entry.msgctxt = unescape(self.current_token[9:-1])
return True
def handle_mi(self):
@@ -1368,17 +1491,17 @@ class _POFileParser(object):
self.instance.append(self.current_entry)
self.current_entry = POEntry()
self.current_entry.obsolete = self.entry_obsolete
- self.current_entry.msgid = unescape(self.current_token[1:-1])
+ self.current_entry.msgid = unescape(self.current_token[7:-1])
return True
def handle_mp(self):
"""Handle a msgid plural."""
- self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
+ self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
return True
def handle_ms(self):
"""Handle a msgstr."""
- self.current_entry.msgstr = unescape(self.current_token[1:-1])
+ self.current_entry.msgstr = unescape(self.current_token[8:-1])
return True
def handle_mx(self):
@@ -1418,6 +1541,10 @@ class _POFileParser(object):
typ = 'previous_msgctxt'
token = token[3:]
self.current_entry.previous_msgctxt += token
+ if typ not in self.current_entry._multiline_str:
+ self.current_entry._multiline_str[typ] = token
+ else:
+ self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
# don't change the current state
return False
@@ -1428,41 +1555,43 @@ class _MOFileParser(object):
"""
A class to parse binary mo files.
"""
+ BIG_ENDIAN = 0xde120495
+ LITTLE_ENDIAN = 0x950412de
- def __init__(self, mofile, *args, **kwargs):
+ def __init__(self, fpath, *args, **kwargs):
"""
Constructor.
- Keyword arguments:
-
- ``mofile``
- string, path to the mo file or its content
-
- ``encoding``
- string, the encoding to use, defaults to ``default_encoding``
- global variable (optional).
-
- ``check_for_duplicates``
- whether to check for duplicate entries when adding entries to the
- file (optional, default: ``False``).
+ **Arguments**:
+ - *fpath*: string, path to the po file
+ - *encoding*: string, the encoding to use, defaults to
+ "default_encoding" global variable (optional),
+ - *check_for_duplicates*: whether to check for duplicate entries
+ when adding entries to the file, default: False (optional).
"""
- self.fhandle = open(mofile, 'rb')
+ enc = kwargs.get('encoding', default_encoding)
+ check_dup = kwargs.get('check_for_duplicates', False)
+ self.fhandle = open(fpath, 'rb')
self.instance = MOFile(
- fpath=mofile,
- encoding=kwargs.get('encoding', default_encoding),
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ fpath=fpath,
+ encoding=enc,
+ check_for_duplicates=check_dup
)
+ def parse_magicnumber(self):
+ """
+ Parse the magic number and raise an exception if not valid.
+ """
+
def parse(self):
"""
Build the instance with the file handle provided in the
constructor.
"""
- # parse magic number
magic_number = self._readbinary('<I', 4)
- if magic_number == MOFile.LITTLE_ENDIAN:
+ if magic_number == self.LITTLE_ENDIAN:
ii = '<II'
- elif magic_number == MOFile.BIG_ENDIAN:
+ elif magic_number == self.BIG_ENDIAN:
ii = '>II'
else:
raise IOError('Invalid mo file, magic number is incorrect !')
@@ -1501,35 +1630,18 @@ class _MOFileParser(object):
# test if we have a plural entry
msgid_tokens = msgid.split('\0')
if len(msgid_tokens) > 1:
- entry = self._build_entry(
+ entry = MOEntry(
msgid=msgid_tokens[0],
msgid_plural=msgid_tokens[1],
- msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
+ msgstr_plural=dict((k,v) for k,v in \
+ enumerate(msgstr.split('\0')))
)
else:
- entry = self._build_entry(msgid=msgid, msgstr=msgstr)
+ entry = MOEntry(msgid=msgid, msgstr=msgstr)
self.instance.append(entry)
# close opened file
self.fhandle.close()
return self.instance
-
- def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
- msgstr_plural=None):
- msgctxt_msgid = msgid.split('\x04')
- if len(msgctxt_msgid) > 1:
- kwargs = {
- 'msgctxt': msgctxt_msgid[0],
- 'msgid' : msgctxt_msgid[1],
- }
- else:
- kwargs = {'msgid': msgid}
- if msgstr:
- kwargs['msgstr'] = msgstr
- if msgid_plural:
- kwargs['msgid_plural'] = msgid_plural
- if msgstr_plural:
- kwargs['msgstr_plural'] = msgstr_plural
- return MOEntry(**kwargs)
def _readbinary(self, fmt, numbytes):
"""
@@ -1543,97 +1655,26 @@ class _MOFileParser(object):
return tup
# }}}
-# class TextWrapper {{{
+# __main__ {{{
-class TextWrapper(textwrap.TextWrapper):
+if __name__ == '__main__':
"""
- Subclass of textwrap.TextWrapper that backport the
- drop_whitespace option.
+ **Main function**::
+ - to **test** the module just run: *python polib.py [-v]*
+ - to **profile** the module: *python polib.py -p <some_pofile.po>*
"""
- def __init__(self, *args, **kwargs):
- drop_whitespace = kwargs.pop('drop_whitespace', True)
- textwrap.TextWrapper.__init__(self, *args, **kwargs)
- self.drop_whitespace = drop_whitespace
-
- def _wrap_chunks(self, chunks):
- """_wrap_chunks(chunks : [string]) -> [string]
-
- Wrap a sequence of text chunks and return a list of lines of
- length 'self.width' or less. (If 'break_long_words' is false,
- some lines may be longer than this.) Chunks correspond roughly
- to words and the whitespace between them: each chunk is
- indivisible (modulo 'break_long_words'), but a line break can
- come between any two chunks. Chunks should not have internal
- whitespace; ie. a chunk is either all whitespace or a "word".
- Whitespace chunks will be removed from the beginning and end of
- lines, but apart from that whitespace is preserved.
- """
- lines = []
- if self.width <= 0:
- raise ValueError("invalid width %r (must be > 0)" % self.width)
-
- # Arrange in reverse order so items can be efficiently popped
- # from a stack of chucks.
- chunks.reverse()
-
- while chunks:
-
- # Start the list of chunks that will make up the current line.
- # cur_len is just the length of all the chunks in cur_line.
- cur_line = []
- cur_len = 0
-
- # Figure out which static string will prefix this line.
- if lines:
- indent = self.subsequent_indent
+ import sys
+ if len(sys.argv) > 2 and sys.argv[1] == '-p':
+ def test(f):
+ if f.endswith('po'):
+ p = pofile(f)
else:
- indent = self.initial_indent
-
- # Maximum width for this line.
- width = self.width - len(indent)
-
- # First chunk on line is whitespace -- drop it, unless this
- # is the very beginning of the text (ie. no lines started yet).
- if self.drop_whitespace and chunks[-1].strip() == '' and lines:
- del chunks[-1]
-
- while chunks:
- l = len(chunks[-1])
-
- # Can at least squeeze this chunk onto the current line.
- if cur_len + l <= width:
- cur_line.append(chunks.pop())
- cur_len += l
-
- # Nope, this line is full.
- else:
- break
-
- # The current line is full, and the next chunk is too big to
- # fit on *any* line (not just this one).
- if chunks and len(chunks[-1]) > width:
- self._handle_long_word(chunks, cur_line, cur_len, width)
-
- # If the last chunk on this line is all whitespace, drop it.
- if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
- del cur_line[-1]
-
- # Convert current line back to a string and store it in list
- # of all lines (return value).
- if cur_line:
- lines.append(indent + ''.join(cur_line))
-
- return lines
+ p = mofile(f)
+ s = unicode(p)
+ import profile
+ profile.run('test("'+sys.argv[2]+'")')
+ else:
+ import doctest
+ doctest.testmod()
# }}}
-# function wrap() {{{
-
-def wrap(text, width=70, **kwargs):
- """
- Wrap a single paragraph of text, returning a list of wrapped lines.
- """
- if sys.version_info < (2, 6):
- return TextWrapper(width=width, **kwargs).wrap(text)
- return textwrap.wrap(text, width=width, **kwargs)
-
-#}}}