1 files changed, 771 insertions, 730 deletions
diff --git a/i18n/polib.py b/i18n/polib.py
index c09aebc..88428ce 100644
--- a/i18n/polib.py
+++ b/i18n/polib.py
@@ -5,48 +5,113 @@
 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
 
 """
-**polib** allows you to manipulate, create, modify gettext files (pot, po and
-mo files).  You can load existing files, iterate through it's entries, add,
-modify entries, comments or metadata, etc. or create new po files from scratch.
-
-**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
-:func:`~polib.mofile` convenience functions.
+**polib** allows you to manipulate, create, modify gettext files (pot, po
+and mo files).  You can load existing files, iterate through it's entries,
+add, modify entries, comments or metadata, etc... or create new po files
+from scratch.
+
+**polib** provides a simple and pythonic API, exporting only three
+convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
+four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
+new files/entries.
+
+**Basic example**:
+
+>>> import polib
+>>> # load an existing po file
+>>> po = polib.pofile('tests/test_utf8.po')
+>>> for entry in po:
+...     # do something with entry...
+...     pass
+>>> # add an entry
+>>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
+>>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
+>>> po.append(entry)
+>>> # to save our modified po file:
+>>> # po.save()
+>>> # or you may want to compile the po file
+>>> # po.save_as_mofile('tests/test_utf8.mo')
 """
 
-__author__    = 'David Jean Louis <izimobil@gmail.com>'
-__version__   = '0.6.4'
+__author__    = 'David JEAN LOUIS <izimobil@gmail.com>'
+__version__   = '0.5.2'
 __all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
                  'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
 
-import array
 import codecs
-import os
-import re
 import struct
-import sys
 import textwrap
 import types
+import re
 
-
-# the default encoding to use when encoding cannot be detected
 default_encoding = 'utf-8'
 
-# _pofile_or_mofile {{{
+# function pofile() {{{
 
-def _pofile_or_mofile(f, type, **kwargs):
+def pofile(fpath, **kwargs):
     """
-    Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
-    honor the DRY concept.
+    Convenience function that parse the po/pot file *fpath* and return
+    a POFile instance.
+
+    **Keyword arguments**:
+      - *fpath*: string, full or relative path to the po/pot file to parse
+      - *wrapwidth*: integer, the wrap width, only useful when -w option was
+        passed to xgettext (optional, default to 78)
+      - *autodetect_encoding*: boolean, if set to False the function will
+        not try to detect the po file encoding (optional, default to True)
+      - *encoding*: string, an encoding, only relevant if autodetect_encoding
+        is set to False
+      - *check_for_duplicates*: whether to check for duplicate entries when
+        adding entries to the file, default: False (optional)
+
+    **Example**:
+
+    >>> import polib
+    >>> po = polib.pofile('tests/test_weird_occurrences.po',
+    ...     check_for_duplicates=True)
+    >>> po #doctest: +ELLIPSIS
+    <POFile instance at ...>
+    >>> import os, tempfile
+    >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural', 
+    ...              'msgstr_plural', 'obsolete', 'comment', 'tcomment', 
+    ...              'occurrences', 'flags', 'previous_msgctxt', 
+    ...              'previous_msgid', 'previous_msgid_plural')
+    >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
+    ...     orig_po = polib.pofile('tests/'+fname)
+    ...     tmpf = tempfile.NamedTemporaryFile().name
+    ...     orig_po.save(tmpf)
+    ...     try:
+    ...         new_po = polib.pofile(tmpf)
+    ...         for old, new in zip(orig_po, new_po):
+    ...             for attr in all_attrs:
+    ...                 if getattr(old, attr) != getattr(new, attr):
+    ...                     getattr(old, attr)
+    ...                     getattr(new, attr)
+    ...     finally:
+    ...         os.unlink(tmpf)
+    >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
+    >>> tmpf = tempfile.NamedTemporaryFile().name
+    >>> po_file.save_as_mofile(tmpf)
+    >>> try:
+    ...     mo_file = polib.mofile(tmpf)
+    ...     for old, new in zip(po_file, mo_file):
+    ...         if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
+    ...             'OLD: ', po_file._encode(old.msgid)
+    ...             'NEW: ', mo_file._encode(new.msgid)
+    ...         if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
+    ...             'OLD: ', po_file._encode(old.msgstr)
+    ...             'NEW: ', mo_file._encode(new.msgstr)
+    ...             print new.msgstr
+    ... finally:
+    ...     os.unlink(tmpf)
     """
-    # get the file encoding
-    enc = kwargs.get('encoding')
-    if enc is None:
-        enc = detect_encoding(f, type == 'mofile')
-
-    # parse the file
-    kls = type == 'pofile' and _POFileParser or _MOFileParser
-    parser = kls(
-        f,
+    if kwargs.get('autodetect_encoding', True):
+        enc = detect_encoding(fpath)
+    else:
+        enc = kwargs.get('encoding', default_encoding)
+    check_for_duplicates = kwargs.get('check_for_duplicates', False)
+    parser = _POFileParser(
+        fpath,
         encoding=enc,
         check_for_duplicates=kwargs.get('check_for_duplicates', False)
     )
@@ -55,108 +120,95 @@ def _pofile_or_mofile(f, type, **kwargs):
     return instance
 
 # }}}
-# function pofile() {{{
-
-def pofile(pofile, **kwargs):
-    """
-    Convenience function that parses the po or pot file ``pofile`` and returns
-    a :class:`~polib.POFile` instance.
-
-    Arguments:
-
-    ``pofile``
-        string, full or relative path to the po/pot file or its content (data).
-
-    ``wrapwidth``
-        integer, the wrap width, only useful when the ``-w`` option was passed
-        to xgettext (optional, default: ``78``).
-
-    ``encoding``
-        string, the encoding to use (e.g. "utf-8") (default: ``None``, the
-        encoding will be auto-detected).
-
-    ``check_for_duplicates``
-        whether to check for duplicate entries when adding entries to the
-        file (optional, default: ``False``).
-    """
-    return _pofile_or_mofile(pofile, 'pofile', **kwargs)
-
-# }}}
 # function mofile() {{{
 
-def mofile(mofile, **kwargs):
+def mofile(fpath, **kwargs):
     """
-    Convenience function that parses the mo file ``mofile`` and returns a
-    :class:`~polib.MOFile` instance.
-
-    Arguments:
-
-    ``mofile``
-        string, full or relative path to the mo file or its content (data).
-
-    ``wrapwidth``
-        integer, the wrap width, only useful when the ``-w`` option was passed
-        to xgettext to generate the po file that was used to format the mo file
-        (optional, default: ``78``).
-
-    ``encoding``
-        string, the encoding to use (e.g. "utf-8") (default: ``None``, the
-        encoding will be auto-detected).
-
-    ``check_for_duplicates``
-        whether to check for duplicate entries when adding entries to the
-        file (optional, default: ``False``).
+    Convenience function that parse the mo file *fpath* and return
+    a MOFile instance.
+
+    **Keyword arguments**:
+      - *fpath*: string, full or relative path to the mo file to parse
+      - *wrapwidth*: integer, the wrap width, only useful when -w option was
+        passed to xgettext to generate the po file that was used to format
+        the mo file (optional, default to 78)
+      - *autodetect_encoding*: boolean, if set to False the function will
+        not try to detect the po file encoding (optional, default to True)
+      - *encoding*: string, an encoding, only relevant if autodetect_encoding
+        is set to False
+      - *check_for_duplicates*: whether to check for duplicate entries when
+        adding entries to the file, default: False (optional)
+
+    **Example**:
+
+    >>> import polib
+    >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
+    >>> mo #doctest: +ELLIPSIS
+    <MOFile instance at ...>
+    >>> import os, tempfile
+    >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
+    ...     orig_mo = polib.mofile('tests/'+fname)
+    ...     tmpf = tempfile.NamedTemporaryFile().name
+    ...     orig_mo.save(tmpf)
+    ...     try:
+    ...         new_mo = polib.mofile(tmpf)
+    ...         for old, new in zip(orig_mo, new_mo):
+    ...             if old.msgid != new.msgid:
+    ...                 old.msgstr
+    ...                 new.msgstr
+    ...     finally:
+    ...         os.unlink(tmpf)
     """
-    return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+    if kwargs.get('autodetect_encoding', True):
+        enc = detect_encoding(fpath, True)
+    else:
+        enc = kwargs.get('encoding', default_encoding)
+    parser = _MOFileParser(
+        fpath,
+        encoding=enc,
+        check_for_duplicates=kwargs.get('check_for_duplicates', False)
+    )
+    instance = parser.parse()
+    instance.wrapwidth = kwargs.get('wrapwidth', 78)
+    return instance
 
 # }}}
 # function detect_encoding() {{{
 
-def detect_encoding(file, binary_mode=False):
+def detect_encoding(fpath, binary_mode=False):
     """
-    Try to detect the encoding used by the ``file``. The ``file`` argument can
-    be a PO or MO file path or a string containing the contents of the file.
-    If the encoding cannot be detected, the function will return the value of
-    ``default_encoding``.
-
-    Arguments:
-
-    ``file``
-        string, full or relative path to the po/mo file or its content.
-
-    ``binary_mode``
-        boolean, set this to True if ``file`` is a mo file.
+    Try to detect the encoding used by the file *fpath*. The function will
+    return polib default *encoding* if it's unable to detect it.
+
+    **Keyword argument**:
+      - *fpath*: string, full or relative path to the mo file to parse.
+
+    **Examples**:
+
+    >>> print(detect_encoding('tests/test_noencoding.po'))
+    utf-8
+    >>> print(detect_encoding('tests/test_utf8.po'))
+    UTF-8
+    >>> print(detect_encoding('tests/test_utf8.mo', True))
+    UTF-8
+    >>> print(detect_encoding('tests/test_iso-8859-15.po'))
+    ISO_8859-15
+    >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
+    ISO_8859-15
     """
+    import re
     rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
-
-    def charset_exists(charset):
-        """Check whether ``charset`` is valid or not."""
-        try:
-            codecs.lookup(charset)
-        except LookupError:
-            return False
-        return True
-
-    if not os.path.exists(file):
-        match = rx.search(file)
-        if match:
-            enc = match.group(1).strip()
-            if charset_exists(enc):
-                return enc
+    if binary_mode:
+        mode = 'rb'
     else:
-        if binary_mode:
-            mode = 'rb'
-        else:
-            mode = 'r'
-        f = open(file, mode)
-        for l in f.readlines():
-            match = rx.search(l)
-            if match:
-                f.close()
-                enc = match.group(1).strip()
-                if charset_exists(enc):
-                    return enc
-        f.close()
+        mode = 'r'
+    f = open(fpath, mode)
+    for l in f.readlines():
+        match = rx.search(l)
+        if match:
+            f.close()
+            return match.group(1).strip()
+    f.close()
     return default_encoding
 
 # }}}
@@ -164,8 +216,12 @@ def detect_encoding(file, binary_mode=False):
 
 def escape(st):
     """
-    Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
-    the given string ``st`` and returns it.
+    Escape special chars and return the given string *st*.
+
+    **Examples**:
+
+    >>> escape('\\t and \\n and \\r and " and \\\\')
+    '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
     """
     return st.replace('\\', r'\\')\
              .replace('\t', r'\t')\
@@ -178,8 +234,18 @@ def escape(st):
 
 def unescape(st):
     """
-    Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
-    the given string ``st`` and returns it.
+    Unescape special chars and return the given string *st*.
+
+    **Examples**:
+
+    >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
+    '\\t and \\n and \\r and " and \\\\'
+    >>> unescape(r'\\n')
+    '\\n'
+    >>> unescape(r'\\\\n')
+    '\\\\n'
+    >>> unescape(r'\\\\n\\n')
+    '\\\\n\\n'
     """
     def unescape_repl(m):
         m = m.group(1)
@@ -199,36 +265,27 @@ def unescape(st):
 
 class _BaseFile(list):
     """
-    Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
-    classes. This class should **not** be instanciated directly.
+    Common parent class for POFile and MOFile classes.
+    This class must **not** be instanciated directly.
     """
 
     def __init__(self, *args, **kwargs):
         """
-        Constructor, accepts the following keyword arguments:
-
-        ``pofile``
-            string, the path to the po or mo file, or its content as a string.
-
-        ``wrapwidth``
-            integer, the wrap width, only useful when the ``-w`` option was
-            passed to xgettext (optional, default: ``78``).
-
-        ``encoding``
-            string, the encoding to use, defaults to ``default_encoding``
-            global variable (optional).
+        Constructor.
 
-        ``check_for_duplicates``
-            whether to check for duplicate entries when adding entries to the
-            file, (optional, default: ``False``).
+        **Keyword arguments**:
+          - *fpath*: string, path to po or mo file
+          - *wrapwidth*: integer, the wrap width, only useful when -w option
+            was passed to xgettext to generate the po file that was used to
+            format the mo file, default to 78 (optional),
+          - *encoding*: string, the encoding to use, defaults to
+            "default_encoding" global variable (optional),
+          - *check_for_duplicates*: whether to check for duplicate entries
+            when adding entries to the file, default: False (optional).
         """
         list.__init__(self)
         # the opened file handle
-        pofile = kwargs.get('pofile', None)
-        if pofile and os.path.exists(pofile):
-            self.fpath = pofile
-        else:
-            self.fpath = kwargs.get('fpath')
+        self.fpath = kwargs.get('fpath')
         # the width at which lines should be wrapped
         self.wrapwidth = kwargs.get('wrapwidth', 78)
         # the file encoding
@@ -241,56 +298,66 @@ class _BaseFile(list):
         self.metadata = {}
         self.metadata_is_fuzzy = 0
 
-    def __unicode__(self):
+    def __str__(self):
         """
-        Returns the unicode representation of the file.
+        String representation of the file.
         """
         ret = []
         entries = [self.metadata_as_entry()] + \
                   [e for e in self if not e.obsolete]
         for entry in entries:
-            ret.append(entry.__unicode__(self.wrapwidth))
+            ret.append(entry.__str__(self.wrapwidth))
         for entry in self.obsolete_entries():
-            ret.append(entry.__unicode__(self.wrapwidth))
-        ret = '\n'.join(ret)
-
-        if type(ret) != types.UnicodeType:
-            return unicode(ret, self.encoding)
-        return ret
-
-    def __str__(self):
-        """
-        Returns the string representation of the file.
-        """
-        return unicode(self).encode(self.encoding)
+            ret.append(entry.__str__(self.wrapwidth))
+        return '\n'.join(ret)
 
     def __contains__(self, entry):
         """
-        Overriden ``list`` method to implement the membership test (in and
-        not in).
-        The method considers that an entry is in the file if it finds an entry
-        that has the same msgid (the test is **case sensitive**).
-
-        Argument:
-
-        ``entry``
-            an instance of :class:`~polib._BaseEntry`.
+        Overriden method to implement the membership test (in and not in).
+        The method considers that an entry is in the file if it finds an 
+        entry that has the same msgid (case sensitive).
+
+        **Keyword argument**:
+          - *entry*: an instance of polib._BaseEntry
+
+        **Tests**:
+        >>> po = POFile()
+        >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+        >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
+        >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
+        >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
+        >>> po.append(e1)
+        >>> po.append(e2)
+        >>> e1 in po
+        True
+        >>> e2 not in po
+        False
+        >>> e3 in po
+        True
+        >>> e4 in po
+        False
         """
         return self.find(entry.msgid, by='msgid') is not None
-    
-    def __eq__(self, other):
-        return unicode(self) == unicode(other)
 
     def append(self, entry):
         """
         Overriden method to check for duplicates entries, if a user tries to
-        add an entry that is already in the file, the method will raise a
-        ``ValueError`` exception.
-
-        Argument:
-
-        ``entry``
-            an instance of :class:`~polib._BaseEntry`.
+        add an entry that already exists, the method will raise a ValueError
+        exception.
+
+        **Keyword argument**:
+          - *entry*: an instance of polib._BaseEntry
+
+        **Tests**:
+        >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+        >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
+        >>> po = POFile(check_for_duplicates=True)
+        >>> po.append(e1)
+        >>> try:
+        ...     po.append(e2)
+        ... except ValueError, e:
+        ...     unicode(e)
+        u'Entry "foobar" already exists'
         """
         if self.check_for_duplicates and entry in self:
             raise ValueError('Entry "%s" already exists' % entry.msgid)
@@ -299,50 +366,70 @@ class _BaseFile(list):
     def insert(self, index, entry):
         """
         Overriden method to check for duplicates entries, if a user tries to
-        add an entry that is already in the file, the method will raise a
-        ``ValueError`` exception.
-
-        Arguments:
-
-        ``index``
-            index at which the entry should be inserted.
-
-        ``entry``
-            an instance of :class:`~polib._BaseEntry`.
+        insert an entry that already exists, the method will raise a ValueError
+        exception.
+
+        **Keyword arguments**:
+          - *index*: index at which the entry should be inserted
+          - *entry*: an instance of polib._BaseEntry
+
+        **Tests**:
+        >>> import polib
+        >>> polib.check_for_duplicates = True
+        >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+        >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
+        >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
+        >>> po = POFile(check_for_duplicates=True)
+        >>> po.insert(0, e1)
+        >>> po.insert(1, e2)
+        >>> try:
+        ...     po.insert(0, e3)
+        ... except ValueError, e:
+        ...     unicode(e)
+        u'Entry "foobar" already exists'
         """
         if self.check_for_duplicates and entry in self:
             raise ValueError('Entry "%s" already exists' % entry.msgid)
         super(_BaseFile, self).insert(index, entry)
 
+    def __repr__(self):
+        """Return the official string representation of the object."""
+        return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
     def metadata_as_entry(self):
         """
-        Returns the file metadata as a :class:`~polib.POFile` instance.
+        Return the metadata as an entry:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_fuzzy_header.po')
+        >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
+        True
         """
         e = POEntry(msgid='')
         mdata = self.ordered_metadata()
         if mdata:
             strs = []
+            e._multiline_str['msgstr'] = ''
             for name, value in mdata:
                 # Strip whitespace off each line in a multi-line entry
                 strs.append('%s: %s' % (name, value))
             e.msgstr = '\n'.join(strs) + '\n'
+            e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
+                    [s + '\n' for s in strs])
         if self.metadata_is_fuzzy:
             e.flags.append('fuzzy')
         return e
 
     def save(self, fpath=None, repr_method='__str__'):
         """
-        Saves the po file to ``fpath``.
-        If it is an existing file and no ``fpath`` is provided, then the
-        existing file is rewritten with the modified data.
-
-        Keyword arguments:
+        Save the po file to file *fpath* if no file handle exists for
+        the object. If there's already an open file and no fpath is
+        provided, then the existing file is rewritten with the modified
+        data.
 
-        ``fpath``
-            string, full or relative path to the file.
-
-        ``repr_method``
-            string, the method to use for output.
+        **Keyword arguments**:
+          - *fpath*: string, full or relative path to the file.
+          - *repr_method*: string, the method to use for output.
         """
         if self.fpath is None and fpath is None:
             raise IOError('You must provide a file path to save() method')
@@ -357,47 +444,38 @@ class _BaseFile(list):
                 contents = contents.decode(self.encoding)
         fhandle.write(contents)
         fhandle.close()
-        # set the file path if not set
-        if self.fpath is None and fpath:
-            self.fpath = fpath
 
-    def find(self, st, by='msgid', include_obsolete_entries=False,
-             msgctxt=False):
+    def find(self, st, by='msgid'):
         """
-        Find the entry which msgid (or property identified by the ``by``
-        argument) matches the string ``st``.
-
-        Keyword arguments:
+        Find entry which msgid (or property identified by the *by*
+        attribute) matches the string *st*.
 
-        ``st``
-            string, the string to search for.
+        **Keyword arguments**:
+          - *st*: string, the string to search for
+          - *by*: string, the comparison attribute
 
-        ``by``
-            string, the property to use for comparison (default: ``msgid``).
+        **Examples**:
 
-        ``include_obsolete_entries``
-            boolean, whether to also search in entries that are obsolete.
-
-        ``msgctxt``
-            string, allows to specify a specific message context for the
-            search.
+        >>> po = pofile('tests/test_utf8.po')
+        >>> entry = po.find('Thursday')
+        >>> entry.msgstr
+        u'Jueves'
+        >>> entry = po.find('Some unexistant msgid')
+        >>> entry is None
+        True
+        >>> entry = po.find('Jueves', 'msgstr')
+        >>> entry.msgid
+        u'Thursday'
         """
-        if include_obsolete_entries:
-            entries = self[:]
-        else:
-            entries = [e for e in self if not e.obsolete]
-        for e in entries:
+        for e in self:
             if getattr(e, by) == st:
-                if msgctxt and e.msgctxt != msgctxt:
-                    continue
                 return e
         return None
 
     def ordered_metadata(self):
         """
-        Convenience method that returns an ordered version of the metadata
-        dictionnary. The return value is list of tuples (metadata name,
-        metadata_value).
+        Convenience method that return the metadata ordered. The return
+        value is list of tuples (metadata name, metadata_value).
         """
         # copy the dict first
         metadata = self.metadata.copy()
@@ -419,10 +497,9 @@ class _BaseFile(list):
                 ordered_data.append((data, value))
             except KeyError:
                 pass
-        # the rest of the metadata will be alphabetically ordered since there
-        # are no specs for this AFAIK
+        # the rest of the metadata won't be ordered there are no specs for this
         keys = metadata.keys()
-        keys.sort()
+        list(keys).sort()
         for data in keys:
             value = metadata[data]
             ordered_data.append((data, value))
@@ -430,51 +507,45 @@ class _BaseFile(list):
 
     def to_binary(self):
         """
-        Return the binary representation of the file.
+        Return the mofile binary representation.
         """
+        import array
+        import struct
+        import types
         offsets = []
         entries = self.translated_entries()
         # the keys are sorted in the .mo file
         def cmp(_self, other):
-            # msgfmt compares entries with msgctxt if it exists
-            self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
-            other_msgid = other.msgctxt and other.msgctxt or other.msgid
-            if self_msgid > other_msgid:
+            if _self.msgid > other.msgid:
                 return 1
-            elif self_msgid < other_msgid:
+            elif _self.msgid < other.msgid:
                 return -1
             else:
                 return 0
         # add metadata entry
         entries.sort(cmp)
         mentry = self.metadata_as_entry()
-        #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+        mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
         entries = [mentry] + entries
         entries_len = len(entries)
         ids, strs = '', ''
         for e in entries:
             # For each string, we need size and file offset.  Each string is
             # NUL terminated; the NUL does not count into the size.
-            msgid = ''
-            if e.msgctxt:
-                # Contexts are stored by storing the concatenation of the
-                # context, a <EOT> byte, and the original string
-                msgid = self._encode(e.msgctxt + '\4')
             if e.msgid_plural:
                 indexes = e.msgstr_plural.keys()
                 indexes.sort()
                 msgstr = []
                 for index in indexes:
                     msgstr.append(e.msgstr_plural[index])
-                msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
+                msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
                 msgstr = self._encode('\0'.join(msgstr))
             else:
-                msgid += self._encode(e.msgid)
+                msgid = self._encode(e.msgid)
                 msgstr = self._encode(e.msgstr)
             offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
             ids  += msgid  + '\0'
             strs += msgstr + '\0'
-
         # The header is 7 32-bit unsigned integers.
         keystart = 7*4+16*entries_len
         # and the values start after the keys
@@ -487,31 +558,22 @@ class _BaseFile(list):
             koffsets += [l1, o1+keystart]
             voffsets += [l2, o2+valuestart]
         offsets = koffsets + voffsets
-        # check endianness for magic number
-        if struct.pack('@h', 1) == struct.pack('<h', 1):
-            magic_number = MOFile.LITTLE_ENDIAN
-        else:
-            magic_number = MOFile.BIG_ENDIAN
-
-        output = struct.pack(
-            "Iiiiiii",
-            magic_number,      # Magic number
-            0,                 # Version
-            entries_len,       # # of entries
-            7*4,               # start of key index
-            7*4+entries_len*8, # start of value index
-            0, keystart        # size and offset of hash table
-                               # Important: we don't use hash tables
-        )              
-        output += array.array("i", offsets).tostring()
+        output  = struct.pack("IIIIIII",
+                             0x950412de,        # Magic number
+                             0,                 # Version
+                             entries_len,       # # of entries
+                             7*4,               # start of key index
+                             7*4+entries_len*8, # start of value index
+                             0, 0)              # size and offset of hash table
+        output += array.array("I", offsets).tostring()
         output += ids
         output += strs
         return output
 
     def _encode(self, mixed):
         """
-        Encodes the given ``mixed`` argument with the file encoding if and
-        only if it's an unicode string and returns the encoded string.
+        Encode the given argument with the file encoding if the type is unicode
+        and return the encoded string.
         """
         if type(mixed) == types.UnicodeType:
             return mixed.encode(self.encoding)
@@ -521,43 +583,88 @@ class _BaseFile(list):
 # class POFile {{{
 
 class POFile(_BaseFile):
-    """
+    '''
     Po (or Pot) file reader/writer.
-    This class inherits the :class:`~polib._BaseFile` class and, by extension,
-    the python ``list`` type.
-    """
+    POFile objects inherit the list objects methods.
+
+    **Example**:
+
+    >>> po = POFile()
+    >>> entry1 = POEntry(
+    ...     msgid="Some english text",
+    ...     msgstr="Un texte en anglais"
+    ... )
+    >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
+    >>> entry1.comment = "Some useful comment"
+    >>> entry2 = POEntry(
+    ...     msgid="Peace in some languages",
+    ...     msgstr="Pace سلام שלום Hasîtî 和平"
+    ... )
+    >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
+    >>> entry2.comment = "Another useful comment"
+    >>> entry3 = POEntry(
+    ...     msgid='Some entry with quotes " \\"',
+    ...     msgstr='Un message unicode avec des quotes " \\"'
+    ... )
+    >>> entry3.comment = "Test string quoting"
+    >>> po.append(entry1)
+    >>> po.append(entry2)
+    >>> po.append(entry3)
+    >>> po.header = "Some Header"
+    >>> print(po)
+    # Some Header
+    msgid ""
+    msgstr ""
+    <BLANKLINE>
+    #. Some useful comment
+    #: testfile:12 another_file:1
+    msgid "Some english text"
+    msgstr "Un texte en anglais"
+    <BLANKLINE>
+    #. Another useful comment
+    #: testfile:15 another_file:5
+    msgid "Peace in some languages"
+    msgstr "Pace سلام שלום Hasîtî 和平"
+    <BLANKLINE>
+    #. Test string quoting
+    msgid "Some entry with quotes \\" \\""
+    msgstr "Un message unicode avec des quotes \\" \\""
+    <BLANKLINE>
+    '''
 
-    def __unicode__(self):
-        """
-        Returns the unicode representation of the po file.
-        """
+    def __str__(self):
+        """Return the string representation of the po file"""
         ret, headers = '', self.header.split('\n')
         for header in headers:
             if header[:1] in [',', ':']:
                 ret += '#%s\n' % header
             else:
                 ret += '# %s\n' % header
-
-        if type(ret) != types.UnicodeType:
-            ret = unicode(ret, self.encoding)
-
-        return ret + _BaseFile.__unicode__(self)
+        return ret + _BaseFile.__str__(self)
 
     def save_as_mofile(self, fpath):
         """
-        Saves the binary representation of the file to given ``fpath``.
-
-        Keyword argument:
+        Save the binary representation of the file to *fpath*.
 
-        ``fpath``
-            string, full or relative path to the mo file.
+        **Keyword arguments**:
+          - *fpath*: string, full or relative path to the file.
         """
         _BaseFile.save(self, fpath, 'to_binary')
 
     def percent_translated(self):
         """
-        Convenience method that returns the percentage of translated
+        Convenience method that return the percentage of translated
         messages.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> po.percent_translated()
+        50
+        >>> po = POFile()
+        >>> po.percent_translated()
+        100
         """
         total = len([e for e in self if not e.obsolete])
         if total == 0:
@@ -567,52 +674,91 @@ class POFile(_BaseFile):
 
     def translated_entries(self):
         """
-        Convenience method that returns the list of translated entries.
+        Convenience method that return a list of translated entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.translated_entries())
+        6
         """
         return [e for e in self if e.translated()]
 
     def untranslated_entries(self):
         """
-        Convenience method that returns the list of untranslated entries.
+        Convenience method that return a list of untranslated entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.untranslated_entries())
+        4
         """
         return [e for e in self if not e.translated() and not e.obsolete \
                 and not 'fuzzy' in e.flags]
 
     def fuzzy_entries(self):
         """
-        Convenience method that returns the list of fuzzy entries.
+        Convenience method that return the list of 'fuzzy' entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.fuzzy_entries())
+        2
         """
         return [e for e in self if 'fuzzy' in e.flags]
 
     def obsolete_entries(self):
         """
-        Convenience method that returns the list of obsolete entries.
+        Convenience method that return the list of obsolete entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.obsolete_entries())
+        4
         """
         return [e for e in self if e.obsolete]
 
     def merge(self, refpot):
         """
-        Convenience method that merges the current pofile with the pot file
+        XXX this could not work if encodings are different, needs thinking
+        and general refactoring of how polib handles encoding...
+
+        Convenience method that merge the current pofile with the pot file
         provided. It behaves exactly as the gettext msgmerge utility:
 
-        * comments of this file will be preserved, but extracted comments and
-          occurrences will be discarded;
-        * any translations or comments in the file will be discarded, however,
-          dot comments and file positions will be preserved;
-        * the fuzzy flags are preserved.
+          - comments of this file will be preserved, but extracted comments
+            and occurrences will be discarded
+          - any translations or comments in the file will be discarded,
+            however dot comments and file positions will be preserved
 
-        Keyword argument:
+        **Keyword argument**:
+          - *refpot*: object POFile, the reference catalog.
 
-        ``refpot``
-            object POFile, the reference catalog.
+        **Example**:
+
+        >>> import polib
+        >>> refpot = polib.pofile('tests/test_merge.pot')
+        >>> po = polib.pofile('tests/test_merge_before.po')
+        >>> po.merge(refpot)
+        >>> expected_po = polib.pofile('tests/test_merge_after.po')
+        >>> unicode(po) == unicode(expected_po)
+        True
         """
         for entry in refpot:
-            e = self.find(entry.msgid, include_obsolete_entries=True)
+            e = self.find(entry.msgid)
             if e is None:
                 e = POEntry()
                 self.append(e)
             e.merge(entry)
-        # ok, now we must "obsolete" entries that are not in the refpot anymore
+        # ok, now we must "obsolete" entries that are not in the refpot
+        # anymore
         for entry in self:
             if refpot.find(entry.msgid) is None:
                 entry.obsolete = True
@@ -621,18 +767,48 @@ class POFile(_BaseFile):
 # class MOFile {{{
 
 class MOFile(_BaseFile):
-    """
+    '''
     Mo file reader/writer.
-    This class inherits the :class:`~polib._BaseFile` class and, by
-    extension, the python ``list`` type.
-    """
-    BIG_ENDIAN    = 0xde120495
-    LITTLE_ENDIAN = 0x950412de
+    MOFile objects inherit the list objects methods.
+
+    **Example**:
+
+    >>> mo = MOFile()
+    >>> entry1 = POEntry(
+    ...     msgid="Some english text",
+    ...     msgstr="Un texte en anglais"
+    ... )
+    >>> entry2 = POEntry(
+    ...     msgid="I need my dirty cheese",
+    ...     msgstr="Je veux mon sale fromage"
+    ... )
+    >>> entry3 = MOEntry(
+    ...     msgid='Some entry with quotes " \\"',
+    ...     msgstr='Un message unicode avec des quotes " \\"'
+    ... )
+    >>> mo.append(entry1)
+    >>> mo.append(entry2)
+    >>> mo.append(entry3)
+    >>> print(mo)
+    msgid ""
+    msgstr ""
+    <BLANKLINE>
+    msgid "Some english text"
+    msgstr "Un texte en anglais"
+    <BLANKLINE>
+    msgid "I need my dirty cheese"
+    msgstr "Je veux mon sale fromage"
+    <BLANKLINE>
+    msgid "Some entry with quotes \\" \\""
+    msgstr "Un message unicode avec des quotes \\" \\""
+    <BLANKLINE>
+    '''
 
     def __init__(self, *args, **kwargs):
         """
-        Constructor, accepts all keywords arguments accepted by 
-        :class:`~polib._BaseFile` class.
+        MOFile constructor. Mo files have two other properties:
+            - magic_number: the magic_number of the binary file,
+            - version: the version of the mo spec.
         """
         _BaseFile.__init__(self, *args, **kwargs)
         self.magic_number = None
@@ -640,23 +816,19 @@ class MOFile(_BaseFile):
 
     def save_as_pofile(self, fpath):
         """
-        Saves the mofile as a pofile to ``fpath``.
+        Save the string representation of the file to *fpath*.
 
-        Keyword argument:
-
-        ``fpath``
-            string, full or relative path to the file.
+        **Keyword argument**:
+          - *fpath*: string, full or relative path to the file.
         """
         _BaseFile.save(self, fpath)
 
-    def save(self, fpath=None):
+    def save(self, fpath):
         """
-        Saves the mofile to ``fpath``.
-
-        Keyword argument:
+        Save the binary representation of the file to *fpath*.
 
-        ``fpath``
-            string, full or relative path to the file.
+        **Keyword argument**:
+          - *fpath*: string, full or relative path to the file.
         """
         _BaseFile.save(self, fpath, 'to_binary')
 
@@ -695,47 +867,29 @@ class MOFile(_BaseFile):
 
 class _BaseEntry(object):
     """
-    Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
-    This class should **not** be instanciated directly.
+    Base class for POEntry or MOEntry objects.
+    This class must *not* be instanciated directly.
     """
 
     def __init__(self, *args, **kwargs):
-        """
-        Constructor, accepts the following keyword arguments:
-
-        ``msgid``
-            string, the entry msgid.
-
-        ``msgstr``
-            string, the entry msgstr.
-
-        ``msgid_plural``
-            string, the entry msgid_plural.
-
-        ``msgstr_plural``
-            list, the entry msgstr_plural lines.
-
-        ``msgctxt``
-            string, the entry context (msgctxt).
-
-        ``obsolete``
-            bool, whether the entry is "obsolete" or not.
-
-        ``encoding``
-            string, the encoding to use, defaults to ``default_encoding``
-            global variable (optional).
-        """
+        """Base Entry constructor."""
         self.msgid = kwargs.get('msgid', '')
         self.msgstr = kwargs.get('msgstr', '')
         self.msgid_plural = kwargs.get('msgid_plural', '')
         self.msgstr_plural = kwargs.get('msgstr_plural', {})
-        self.msgctxt = kwargs.get('msgctxt', None)
         self.obsolete = kwargs.get('obsolete', False)
         self.encoding = kwargs.get('encoding', default_encoding)
+        self.msgctxt = kwargs.get('msgctxt', None)
+        self._multiline_str = {}
 
-    def __unicode__(self, wrapwidth=78):
+    def __repr__(self):
+        """Return the official string representation of the object."""
+        return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
+    def __str__(self, wrapwidth=78):
         """
-        Returns the unicode representation of the entry.
+        Common string representation of the POEntry and MOEntry
+        objects.
         """
         if self.obsolete:
             delflag = '#~ '
@@ -744,12 +898,12 @@ class _BaseEntry(object):
         ret = []
         # write the msgctxt if any
         if self.msgctxt is not None:
-            ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+            ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
         # write the msgid
-        ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
+        ret += self._str_field("msgid", delflag, "", self.msgid)
         # write the msgid_plural if any
         if self.msgid_plural:
-            ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+            ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
         if self.msgstr_plural:
             # write the msgstr_plural if any
             msgstrs = self.msgstr_plural
@@ -758,51 +912,23 @@ class _BaseEntry(object):
             for index in keys:
                 msgstr = msgstrs[index]
                 plural_index = '[%s]' % index
-                ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+                ret += self._str_field("msgstr", delflag, plural_index, msgstr)
         else:
             # otherwise write the msgstr
-            ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+            ret += self._str_field("msgstr", delflag, "", self.msgstr)
         ret.append('')
-        ret = '\n'.join(ret)
-
-        if type(ret) != types.UnicodeType:
-            return unicode(ret, self.encoding)
-        return ret
-
-    def __str__(self):
-        """
-        Returns the string representation of the entry.
-        """
-        return unicode(self).encode(self.encoding)
-    
-    def __eq__(self, other):
-        return unicode(self) == unicode(other)
+        return '\n'.join(ret)
 
-    def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
-        lines = field.splitlines(True)
-        if len(lines) > 1:
-            lines = [''] + lines # start with initial empty line
+    def _str_field(self, fieldname, delflag, plural_index, field):
+        if (fieldname + plural_index) in self._multiline_str:
+            field = self._multiline_str[fieldname + plural_index]
+            lines = [''] + field.split('__POLIB__NL__')
         else:
-            escaped_field = escape(field)
-            specialchars_count = 0
-            for c in ['\\', '\n', '\r', '\t', '"']:
-                specialchars_count += field.count(c)
-            # comparison must take into account fieldname length + one space 
-            # + 2 quotes (eg. msgid "<string>")
-            flength = len(fieldname) + 3
-            if plural_index:
-                flength += len(plural_index)
-            real_wrapwidth = wrapwidth - flength + specialchars_count
-            if wrapwidth > 0 and len(field) > real_wrapwidth:
-                # Wrap the line but take field name into account
-                lines = [''] + [unescape(item) for item in wrap(
-                    escaped_field,
-                    wrapwidth - 2, # 2 for quotes ""
-                    drop_whitespace=False,
-                    break_long_words=False
-                )]
+            lines = field.splitlines(True)
+            if len(lines) > 1:
+                lines = ['']+lines # start with initial empty line
             else:
-                lines = [field]
+                lines = [field] # needed for the empty string case
         if fieldname.startswith('previous_'):
             # quick and dirty trick to get the real field name
             fieldname = fieldname[9:]
@@ -819,33 +945,50 @@ class _BaseEntry(object):
 class POEntry(_BaseEntry):
     """
     Represents a po file entry.
+
+    **Examples**:
+
+    >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
+    >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
+    >>> print(entry)
+    #: welcome.py:12 anotherfile.py:34
+    msgid "Welcome"
+    msgstr "Bienvenue"
+    <BLANKLINE>
+    >>> entry = POEntry()
+    >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
+    >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
+    >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
+    >>> entry.flags.append('c-format')
+    >>> entry.previous_msgctxt = '@somecontext'
+    >>> entry.previous_msgid = 'I had eggs but no spam !'
+    >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
+    >>> entry.msgctxt = '@somenewcontext'
+    >>> entry.msgid = 'I have spam but no egg !'
+    >>> entry.msgid_plural = 'I have spam and %d eggs !'
+    >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
+    >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
+    >>> print(entry)
+    #. A plural translation. This is a very very very long line please do not
+    #. wrap, this is just for testing comment wrapping...
+    # A plural translation. This is a very very very long line please do not wrap,
+    # this is just for testing comment wrapping...
+    #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
+    #: src/eggs.c:45
+    #, c-format
+    #| msgctxt "@somecontext"
+    #| msgid "I had eggs but no spam !"
+    #| msgid_plural "I had eggs and %d spam !"
+    msgctxt "@somenewcontext"
+    msgid "I have spam but no egg !"
+    msgid_plural "I have spam and %d eggs !"
+    msgstr[0] "J'ai du jambon mais aucun oeuf !"
+    msgstr[1] "J'ai du jambon et %d oeufs !"
+    <BLANKLINE>
     """
 
     def __init__(self, *args, **kwargs):
-        """
-        Constructor, accepts the following keyword arguments:
-
-        ``comment``
-            string, the entry comment.
-
-        ``tcomment``
-            string, the entry translator comment.
-
-        ``occurrences``
-            list, the entry occurrences.
-
-        ``flags``
-            list, the entry flags.
-
-        ``previous_msgctxt``
-            string, the entry previous context.
-
-        ``previous_msgid``
-            string, the entry previous msgid.
-
-        ``previous_msgid_plural``
-            string, the entry previous msgid_plural.
-        """
+        """POEntry constructor."""
         _BaseEntry.__init__(self, *args, **kwargs)
         self.comment = kwargs.get('comment', '')
         self.tcomment = kwargs.get('tcomment', '')
@@ -855,31 +998,33 @@ class POEntry(_BaseEntry):
         self.previous_msgid = kwargs.get('previous_msgid', None)
         self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
 
-    def __unicode__(self, wrapwidth=78):
+    def __str__(self, wrapwidth=78):
         """
-        Returns the unicode representation of the entry.
+        Return the string representation of the entry.
         """
         if self.obsolete:
-            return _BaseEntry.__unicode__(self, wrapwidth)
-
+            return _BaseEntry.__str__(self)
         ret = []
-        # comments first, if any (with text wrapping as xgettext does)
-        comments = [('comment', '#. '), ('tcomment', '# ')]
-        for c in comments:
-            val = getattr(self, c[0])
-            if val:
-                for comment in val.split('\n'):
-                    if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
-                        ret += wrap(
-                            comment,
-                            wrapwidth,
-                            initial_indent=c[1],
-                            subsequent_indent=c[1],
-                            break_long_words=False
-                        )
-                    else:
-                        ret.append('%s%s' % (c[1], comment))
-
+        # comment first, if any (with text wrapping as xgettext does)
+        if self.comment != '':
+            for comment in self.comment.split('\n'):
+                if wrapwidth > 0 and len(comment) > wrapwidth-3:
+                    ret += textwrap.wrap(comment, wrapwidth,
+                                         initial_indent='#. ',
+                                         subsequent_indent='#. ',
+                                         break_long_words=False)
+                else:
+                    ret.append('#. %s' % comment)
+        # translator comment, if any (with text wrapping as xgettext does)
+        if self.tcomment != '':
+            for tcomment in self.tcomment.split('\n'):
+                if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
+                    ret += textwrap.wrap(tcomment, wrapwidth,
+                                         initial_indent='# ',
+                                         subsequent_indent='# ',
+                                         break_long_words=False)
+                else:
+                    ret.append('# %s' % tcomment)
         # occurrences (with text wrapping as xgettext does)
         if self.occurrences:
             filelist = []
@@ -889,43 +1034,79 @@ class POEntry(_BaseEntry):
                 else:
                     filelist.append(fpath)
             filestr = ' '.join(filelist)
-            if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
-                # textwrap split words that contain hyphen, this is not 
+            if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
+                # XXX textwrap split words that contain hyphen, this is not 
                 # what we want for filenames, so the dirty hack is to 
                 # temporally replace hyphens with a char that a file cannot 
                 # contain, like "*"
-                ret += [l.replace('*', '-') for l in wrap(
-                    filestr.replace('-', '*'),
-                    wrapwidth,
-                    initial_indent='#: ',
-                    subsequent_indent='#: ',
-                    break_long_words=False
-                )]
+                lines = textwrap.wrap(filestr.replace('-', '*'),
+                                      wrapwidth,
+                                      initial_indent='#: ',
+                                      subsequent_indent='#: ',
+                                      break_long_words=False)
+                # end of the replace hack
+                for line in lines:
+                    ret.append(line.replace('*', '-'))
             else:
-                ret.append('#: ' + filestr)
-
-        # flags (TODO: wrapping ?)
+                ret.append('#: '+filestr)
+        # flags
         if self.flags:
-            ret.append('#, %s' % ', '.join(self.flags))
+            flags = []
+            for flag in self.flags:
+                flags.append(flag)
+            ret.append('#, %s' % ', '.join(flags))
 
         # previous context and previous msgid/msgid_plural
-        fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
-        for f in fields:
-            val = getattr(self, f)
-            if val:
-                ret += self._str_field(f, "#| ", "", val, wrapwidth)
-
-        ret.append(_BaseEntry.__unicode__(self, wrapwidth))
-        ret = '\n'.join(ret)
-
-        if type(ret) != types.UnicodeType:
-            return unicode(ret, self.encoding)
-        return ret
+        if self.previous_msgctxt:
+            ret += self._str_field("previous_msgctxt", "#| ", "",
+                                   self.previous_msgctxt)
+        if self.previous_msgid:
+            ret += self._str_field("previous_msgid", "#| ", "", 
+                                   self.previous_msgid)
+        if self.previous_msgid_plural:
+            ret += self._str_field("previous_msgid_plural", "#| ", "", 
+                                   self.previous_msgid_plural)
+
+        ret.append(_BaseEntry.__str__(self))
+        return '\n'.join(ret)
 
     def __cmp__(self, other):
-        """
+        '''
         Called by comparison operations if rich comparison is not defined.
-        """
+
+        **Tests**:
+        >>> a  = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
+        >>> b  = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
+        >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
+        >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
+        >>> po = POFile()
+        >>> po.append(a)
+        >>> po.append(b)
+        >>> po.append(c1)
+        >>> po.append(c2)
+        >>> po.sort()
+        >>> print(po)
+        # 
+        msgid ""
+        msgstr ""
+        <BLANKLINE>
+        #: a.py:1 a.py:3
+        msgid "c2"
+        msgstr ""
+        <BLANKLINE>
+        #: a.py:1 b.py:1
+        msgid "c1"
+        msgstr ""
+        <BLANKLINE>
+        #: b.py:1 b.py:3
+        msgid "a"
+        msgstr ""
+        <BLANKLINE>
+        #: b.py:1 b.py:3
+        msgid "b"
+        msgstr ""
+        <BLANKLINE>
+        '''
         def compare_occurrences(a, b):
             """
             Compare an entry occurrence with another one.
@@ -972,8 +1153,7 @@ class POEntry(_BaseEntry):
 
     def translated(self):
         """
-        Returns ``True`` if the entry has been translated or ``False``
-        otherwise.
+        Return True if the entry has been translated or False.
         """
         if self.obsolete or 'fuzzy' in self.flags:
             return False
@@ -990,19 +1170,11 @@ class POEntry(_BaseEntry):
         """
         Merge the current entry with the given pot entry.
         """
-        self.msgid = other.msgid
-        self.msgctxt = other.msgctxt
-        self.occurrences = other.occurrences
-        self.comment = other.comment
-        fuzzy = 'fuzzy' in self.flags
-        self.flags = other.flags[:]  # clone flags
-        if fuzzy:
-            self.flags.append('fuzzy')
+        self.msgid        = other.msgid
+        self.occurrences  = other.occurrences
+        self.comment      = other.comment
+        self.flags        = other.flags
         self.msgid_plural = other.msgid_plural
-        self.obsolete = other.obsolete
-        self.previous_msgctxt = other.previous_msgctxt
-        self.previous_msgid = other.previous_msgid
-        self.previous_msgid_plural = other.previous_msgid_plural
         if other.msgstr_plural:
             for pos in other.msgstr_plural:
                 try:
@@ -1017,8 +1189,23 @@ class POEntry(_BaseEntry):
 class MOEntry(_BaseEntry):
     """
     Represents a mo file entry.
+
+    **Examples**:
+
+    >>> entry = MOEntry()
+    >>> entry.msgid  = 'translate me !'
+    >>> entry.msgstr = 'traduisez moi !'
+    >>> print(entry)
+    msgid "translate me !"
+    msgstr "traduisez moi !"
+    <BLANKLINE>
     """
-    pass
+
+    def __str__(self, wrapwidth=78):
+        """
+        Return the string representation of the entry.
+        """
+        return _BaseEntry.__str__(self, wrapwidth)
 
 # }}}
 # class _POFileParser {{{
@@ -1029,37 +1216,28 @@ class _POFileParser(object):
     file format.
     """
 
-    def __init__(self, pofile, *args, **kwargs):
+    def __init__(self, fpath, *args, **kwargs):
         """
         Constructor.
 
-        Keyword arguments:
-
-        ``pofile``
-            string, path to the po file or its content
-
-        ``encoding``
-            string, the encoding to use, defaults to ``default_encoding``
-            global variable (optional).
-
-        ``check_for_duplicates``
-            whether to check for duplicate entries when adding entries to the
-            file (optional, default: ``False``).
+        **Arguments**:
+          - *fpath*: string, path to the po file
+          - *encoding*: string, the encoding to use, defaults to
+            "default_encoding" global variable (optional),
+          - *check_for_duplicates*: whether to check for duplicate entries
+            when adding entries to the file, default: False (optional).
         """
         enc = kwargs.get('encoding', default_encoding)
-        if os.path.exists(pofile):
-            try:
-                self.fhandle = codecs.open(pofile, 'rU', enc)
-            except LookupError:
-                enc = default_encoding
-                self.fhandle = codecs.open(pofile, 'rU', enc)
-        else:
-            self.fhandle = pofile.splitlines()
-
+        check_dup = kwargs.get('check_for_duplicates', False)
+        try:
+            self.fhandle = codecs.open(fpath, 'rU', enc)
+        except LookupError:
+            enc = default_encoding
+            self.fhandle = codecs.open(fpath, 'rU', enc)
         self.instance = POFile(
-            pofile=pofile,
+            fpath=fpath,
             encoding=enc,
-            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+            check_for_duplicates=check_dup
         )
         self.transitions = {}
         self.current_entry = POEntry()
@@ -1111,103 +1289,59 @@ class _POFileParser(object):
         Run the state machine, parse the file line by line and call process()
         with the current matched symbol.
         """
-        i = 0
-
-        keywords = {
-            'msgctxt': 'CT',
-            'msgid': 'MI',
-            'msgstr': 'MS',
-            'msgid_plural': 'MP',
-        }
-        prev_keywords = {
-            'msgid_plural': 'PP',
-            'msgid': 'PM',
-            'msgctxt': 'PC',
-        }
-
+        i, lastlen = 1, 0
         for line in self.fhandle:
-            i += 1
             line = line.strip()
             if line == '':
+                i = i+1
                 continue
-
-            tokens = line.split(None, 2)
-            nb_tokens = len(tokens)
-
-            if tokens[0] == '#~' and nb_tokens > 1:
-                line = line[3:].strip()
-                tokens = tokens[1:]
-                nb_tokens -= 1
+            if line[:3] == '#~ ':
+                line = line[3:]
                 self.entry_obsolete = 1
             else:
                 self.entry_obsolete = 0
-
-            # Take care of keywords like
-            # msgid, msgid_plural, msgctxt & msgstr.
-            if tokens[0] in keywords and nb_tokens > 1:
-                line = line[len(tokens[0]):].lstrip()
-                self.current_token = line
-                self.process(keywords[tokens[0]], i)
-                continue
-
             self.current_token = line
-
-            if tokens[0] == '#:' and nb_tokens > 1:
+            if line[:2] == '#:':
                 # we are on a occurrences line
                 self.process('OC', i)
-
-            elif line[:1] == '"':
-                # we are on a continuation line
+            elif line[:9] == 'msgctxt "':
+                # we are on a msgctxt
+                self.process('CT', i)
+            elif line[:7] == 'msgid "':
+                # we are on a msgid
+                self.process('MI', i)
+            elif line[:8] == 'msgstr "':
+                # we are on a msgstr
+                self.process('MS', i)
+            elif line[:1] == '"' or line[:4] == '#| "':
+                # we are on a continuation line or some metadata
                 self.process('MC', i)
-
+            elif line[:14] == 'msgid_plural "':
+                # we are on a msgid plural
+                self.process('MP', i)
             elif line[:7] == 'msgstr[':
                 # we are on a msgstr plural
                 self.process('MX', i)
-
-            elif tokens[0] == '#,' and nb_tokens > 1:
+            elif line[:3] == '#, ':
                 # we are on a flags line
                 self.process('FL', i)
-
-            elif tokens[0] == '#':
-                if line == '#': line += ' '
+            elif line[:2] == '# ' or line == '#':
+                if line == '#': line = line + ' '
                 # we are on a translator comment line
                 self.process('TC', i)
-
-            elif tokens[0] == '#.' and nb_tokens > 1:
+            elif line[:2] == '#.':
                 # we are on a generated comment line
                 self.process('GC', i)
-
-            elif tokens[0] == '#|':
-                if nb_tokens < 2:
-                    self.process('??', i)
-                    continue
-
-                # Remove the marker and any whitespace right after that.
-                line = line[2:].lstrip()
-                self.current_token = line
-
-                if tokens[1].startswith('"'):
-                    # Continuation of previous metadata.
-                    self.process('MC', i)
-                    continue
-
-                if nb_tokens == 2:
-                    # Invalid continuation line.
-                    self.process('??', i)
-
-                # we are on a "previous translation" comment line,
-                if tokens[1] not in prev_keywords:
-                    # Unknown keyword in previous translation comment.
-                    self.process('??', i)
-
-                # Remove the keyword and any whitespace
-                # between it and the starting quote.
-                line = line[len(tokens[1]):].lstrip()
-                self.current_token = line
-                self.process(prev_keywords[tokens[1]], i)
-
-            else:
-                self.process('??', i)
+            elif line[:15] == '#| msgid_plural':
+                # we are on a previous msgid_plural
+                self.process('PP', i)
+            elif line[:8] == '#| msgid':
+                self.process('PM', i)
+                # we are on a previous msgid
+            elif line[:10] == '#| msgctxt':
+                # we are on a previous msgctxt
+                self.process('PC', i)
+            i = i+1
 
         if self.current_entry:
             # since entries are added when another entry is found, we must add
@@ -1229,24 +1363,17 @@ class _POFileParser(object):
                     if key is not None:
                         self.instance.metadata[key] += '\n'+ msg.strip()
         # close opened file
-        if isinstance(self.fhandle, file):
-            self.fhandle.close()
+        self.fhandle.close()
         return self.instance
 
     def add(self, symbol, states, next_state):
         """
         Add a transition to the state machine.
-
         Keywords arguments:
 
-        ``symbol``
-            string, the matched token (two chars symbol).
-
-        ``states``
-            list, a list of states (two chars symbols).
-
-        ``next_state``
-            the next state the fsm will have after the action.
+        symbol     -- string, the matched token (two chars symbol)
+        states     -- list, a list of states (two chars symbols)
+        next_state -- the next state the fsm will have after the action
         """
         for state in states:
             action = getattr(self, 'handle_%s' % next_state.lower())
@@ -1258,12 +1385,8 @@ class _POFileParser(object):
         symbol provided.
 
         Keywords arguments:
-
-        ``symbol``
-            string, the matched token (two chars symbol).
-
-        ``linenum``
-            integer, the current line number of the parsed file.
+        symbol  -- string, the matched token (two chars symbol)
+        linenum -- integer, the current line number of the parsed file
         """
         try:
             (action, state) = self.transitions[(symbol, self.current_state)]
@@ -1333,7 +1456,7 @@ class _POFileParser(object):
             self.instance.append(self.current_entry)
             self.current_entry = POEntry()
         self.current_entry.previous_msgid_plural = \
-            unescape(self.current_token[1:-1])
+            unescape(self.current_token[17:-1])
         return True
 
     def handle_pm(self):
@@ -1342,7 +1465,7 @@ class _POFileParser(object):
             self.instance.append(self.current_entry)
             self.current_entry = POEntry()
         self.current_entry.previous_msgid = \
-            unescape(self.current_token[1:-1])
+            unescape(self.current_token[10:-1])
         return True
 
     def handle_pc(self):
@@ -1351,7 +1474,7 @@ class _POFileParser(object):
             self.instance.append(self.current_entry)
             self.current_entry = POEntry()
         self.current_entry.previous_msgctxt = \
-            unescape(self.current_token[1:-1])
+            unescape(self.current_token[12:-1])
         return True
 
     def handle_ct(self):
@@ -1359,7 +1482,7 @@ class _POFileParser(object):
         if self.current_state in ['MC', 'MS', 'MX']:
             self.instance.append(self.current_entry)
             self.current_entry = POEntry()
-        self.current_entry.msgctxt = unescape(self.current_token[1:-1])
+        self.current_entry.msgctxt = unescape(self.current_token[9:-1])
         return True
 
     def handle_mi(self):
@@ -1368,17 +1491,17 @@ class _POFileParser(object):
             self.instance.append(self.current_entry)
             self.current_entry = POEntry()
         self.current_entry.obsolete = self.entry_obsolete
-        self.current_entry.msgid = unescape(self.current_token[1:-1])
+        self.current_entry.msgid = unescape(self.current_token[7:-1])
         return True
 
     def handle_mp(self):
         """Handle a msgid plural."""
-        self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
+        self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
         return True
 
     def handle_ms(self):
         """Handle a msgstr."""
-        self.current_entry.msgstr = unescape(self.current_token[1:-1])
+        self.current_entry.msgstr = unescape(self.current_token[8:-1])
         return True
 
     def handle_mx(self):
@@ -1418,6 +1541,10 @@ class _POFileParser(object):
             typ = 'previous_msgctxt'
             token = token[3:]
             self.current_entry.previous_msgctxt += token
+        if typ not in self.current_entry._multiline_str:
+            self.current_entry._multiline_str[typ] = token
+        else:
+            self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
         # don't change the current state
         return False
 
@@ -1428,41 +1555,43 @@ class _MOFileParser(object):
     """
     A class to parse binary mo files.
     """
+    BIG_ENDIAN    = 0xde120495
+    LITTLE_ENDIAN = 0x950412de
 
-    def __init__(self, mofile, *args, **kwargs):
+    def __init__(self, fpath, *args, **kwargs):
         """
         Constructor.
 
-        Keyword arguments:
-
-        ``mofile``
-            string, path to the mo file or its content
-
-        ``encoding``
-            string, the encoding to use, defaults to ``default_encoding``
-            global variable (optional).
-
-        ``check_for_duplicates``
-            whether to check for duplicate entries when adding entries to the
-            file (optional, default: ``False``).
+        **Arguments**:
+          - *fpath*: string, path to the po file
+          - *encoding*: string, the encoding to use, defaults to
+            "default_encoding" global variable (optional),
+          - *check_for_duplicates*: whether to check for duplicate entries
+            when adding entries to the file, default: False (optional).
         """
-        self.fhandle = open(mofile, 'rb')
+        enc = kwargs.get('encoding', default_encoding)
+        check_dup = kwargs.get('check_for_duplicates', False)
+        self.fhandle = open(fpath, 'rb')
         self.instance = MOFile(
-            fpath=mofile,
-            encoding=kwargs.get('encoding', default_encoding),
-            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+            fpath=fpath,
+            encoding=enc,
+            check_for_duplicates=check_dup
         )
 
+    def parse_magicnumber(self):
+        """
+        Parse the magic number and raise an exception if not valid.
+        """
+
     def parse(self):
         """
         Build the instance with the file handle provided in the
         constructor.
         """
-        # parse magic number
         magic_number = self._readbinary('<I', 4)
-        if magic_number == MOFile.LITTLE_ENDIAN:
+        if magic_number == self.LITTLE_ENDIAN:
             ii = '<II'
-        elif magic_number == MOFile.BIG_ENDIAN:
+        elif magic_number == self.BIG_ENDIAN:
             ii = '>II'
         else:
             raise IOError('Invalid mo file, magic number is incorrect !')
@@ -1501,35 +1630,18 @@ class _MOFileParser(object):
             # test if we have a plural entry
             msgid_tokens = msgid.split('\0')
             if len(msgid_tokens) > 1:
-                entry = self._build_entry(
+                entry = MOEntry(
                     msgid=msgid_tokens[0],
                     msgid_plural=msgid_tokens[1],
-                    msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
+                    msgstr_plural=dict((k,v) for k,v in \
+                        enumerate(msgstr.split('\0')))
                 )
             else:
-                entry = self._build_entry(msgid=msgid, msgstr=msgstr)
+                entry = MOEntry(msgid=msgid, msgstr=msgstr)
             self.instance.append(entry)
         # close opened file
         self.fhandle.close()
         return self.instance
-    
-    def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
-                     msgstr_plural=None):
-        msgctxt_msgid = msgid.split('\x04')
-        if len(msgctxt_msgid) > 1:
-            kwargs = {
-                'msgctxt': msgctxt_msgid[0],
-                'msgid'  : msgctxt_msgid[1],
-            }
-        else:
-            kwargs = {'msgid': msgid}
-        if msgstr:
-            kwargs['msgstr'] = msgstr
-        if msgid_plural:
-            kwargs['msgid_plural'] = msgid_plural
-        if msgstr_plural:
-            kwargs['msgstr_plural'] = msgstr_plural
-        return MOEntry(**kwargs)
 
     def _readbinary(self, fmt, numbytes):
         """
@@ -1543,97 +1655,26 @@ class _MOFileParser(object):
         return tup
 
 # }}}
-# class TextWrapper {{{
+# __main__ {{{
 
-class TextWrapper(textwrap.TextWrapper):
+if __name__ == '__main__':
     """
-    Subclass of textwrap.TextWrapper that backport the
-    drop_whitespace option.
+    **Main function**::
+      - to **test** the module just run: *python polib.py [-v]*
+      - to **profile** the module: *python polib.py -p <some_pofile.po>*
     """
-    def __init__(self, *args, **kwargs):
-        drop_whitespace = kwargs.pop('drop_whitespace', True) 
-        textwrap.TextWrapper.__init__(self, *args, **kwargs)
-        self.drop_whitespace = drop_whitespace
-
-    def _wrap_chunks(self, chunks):
-        """_wrap_chunks(chunks : [string]) -> [string]
-
-        Wrap a sequence of text chunks and return a list of lines of
-        length 'self.width' or less.  (If 'break_long_words' is false,
-        some lines may be longer than this.)  Chunks correspond roughly
-        to words and the whitespace between them: each chunk is
-        indivisible (modulo 'break_long_words'), but a line break can
-        come between any two chunks.  Chunks should not have internal
-        whitespace; ie. a chunk is either all whitespace or a "word".
-        Whitespace chunks will be removed from the beginning and end of
-        lines, but apart from that whitespace is preserved.
-        """
-        lines = []
-        if self.width <= 0:
-            raise ValueError("invalid width %r (must be > 0)" % self.width)
-
-        # Arrange in reverse order so items can be efficiently popped
-        # from a stack of chucks.
-        chunks.reverse()
-
-        while chunks:
-
-            # Start the list of chunks that will make up the current line.
-            # cur_len is just the length of all the chunks in cur_line.
-            cur_line = []
-            cur_len = 0
-
-            # Figure out which static string will prefix this line.
-            if lines:
-                indent = self.subsequent_indent
+    import sys
+    if len(sys.argv) > 2 and sys.argv[1] == '-p':
+        def test(f):
+            if f.endswith('po'):
+                p = pofile(f)
             else:
-                indent = self.initial_indent
-
-            # Maximum width for this line.
-            width = self.width - len(indent)
-
-            # First chunk on line is whitespace -- drop it, unless this
-            # is the very beginning of the text (ie. no lines started yet).
-            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
-                del chunks[-1]
-
-            while chunks:
-                l = len(chunks[-1])
-
-                # Can at least squeeze this chunk onto the current line.
-                if cur_len + l <= width:
-                    cur_line.append(chunks.pop())
-                    cur_len += l
-
-                # Nope, this line is full.
-                else:
-                    break
-
-            # The current line is full, and the next chunk is too big to
-            # fit on *any* line (not just this one).
-            if chunks and len(chunks[-1]) > width:
-                self._handle_long_word(chunks, cur_line, cur_len, width)
-
-            # If the last chunk on this line is all whitespace, drop it.
-            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
-                del cur_line[-1]
-
-            # Convert current line back to a string and store it in list
-            # of all lines (return value).
-            if cur_line:
-                lines.append(indent + ''.join(cur_line))
-
-        return lines
+                p = mofile(f)
+            s = unicode(p)
+        import profile
+        profile.run('test("'+sys.argv[2]+'")')
+    else:
+        import doctest
+        doctest.testmod()
 
 # }}}
-# function wrap() {{{
-
-def wrap(text, width=70, **kwargs):
-    """
-    Wrap a single paragraph of text, returning a list of wrapped lines.
-    """
-    if sys.version_info < (2, 6):
-        return TextWrapper(width=width, **kwargs).wrap(text)
-    return textwrap.wrap(text, width=width, **kwargs)
-
-#}}}