[tor-commits] r24379: {} a script to validate translation input, thanks chiiph (translation/trunk/tools)

17 Mar 2011

Author: runa
Date: 2011-03-17 17:52:17 +0000 (Thu, 17 Mar 2011)
New Revision: 24379

Added:
   translation/trunk/tools/polib.py
   translation/trunk/tools/validate.py
Log:
a script to validate translation input, thanks chiiph

Added: translation/trunk/tools/polib.py
===================================================================

--- translation/trunk/tools/polib.py	                        (rev 0)
+++ translation/trunk/tools/polib.py	2011-03-17 17:52:17 UTC (rev 24379)
@@ -0,0 +1,1560 @@
+# -*- coding: utf-8 -*-
+#
+# License: MIT (see LICENSE file provided)
+# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
+
+"""
+**polib** allows you to manipulate, create, modify gettext files (pot, po and
+mo files).  You can load existing files, iterate through it's entries, add,
+modify entries, comments or metadata, etc. or create new po files from scratch.
+
+**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
+:func:`~polib.mofile` convenience functions.
+"""
+
+__author__    = 'David Jean Louis <izimobil@gmail.com>'
+__version__   = '0.6.3'
+__all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+                 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
+
+import array
+import codecs
+import os
+import re
+import struct
+import sys
+import textwrap
+import types
+
+
+# the default encoding to use when autodetect_encoding is disabled
+default_encoding = 'utf-8'
+
+# _pofile_or_mofile {{{
+
+def _pofile_or_mofile(f, type, **kwargs):
+    """
+    Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
+    honor the DRY concept.
+    """
+    # get the file encoding
+    if kwargs.get('autodetect_encoding', True):
+        enc = detect_encoding(f, type == 'mofile')
+    else:
+        enc = kwargs.get('encoding', default_encoding)
+
+    # parse the file
+    kls = type == 'pofile' and _POFileParser or _MOFileParser
+    parser = kls(
+        f,
+        encoding=enc,
+        check_for_duplicates=kwargs.get('check_for_duplicates', False)
+    )
+    instance = parser.parse()
+    instance.wrapwidth = kwargs.get('wrapwidth', 78)
+    return instance
+
+# }}}
+# function pofile() {{{
+
+def pofile(pofile, **kwargs):
+    """
+    Convenience function that parses the po or pot file ``pofile`` and returns
+    a :class:`~polib.POFile` instance.
+
+    Arguments:
+
+    ``pofile``
+        string, full or relative path to the po/pot file or its content (data).
+
+    ``wrapwidth``
+        integer, the wrap width, only useful when the ``-w`` option was passed
+        to xgettext (optional, default: ``78``).
+
+    ``autodetect_encoding``
+        boolean, if set to ``False`` the function will not try to detect the
+        po file encoding and will use either the value of the ``encoding``
+        argument or the ``default_encoding`` (optional, default: ``True``).
+
+    ``encoding``
+        string, the encoding to use (e.g. "utf-8"), only relevant if
+        ``autodetect_encoding`` is set to ``False``.
+
+    ``check_for_duplicates``
+        whether to check for duplicate entries when adding entries to the
+        file (optional, default: ``False``).
+    """
+    return _pofile_or_mofile(pofile, 'pofile', **kwargs)
+
+# }}}
+# function mofile() {{{
+
+def mofile(mofile, **kwargs):
+    """
+    Convenience function that parses the mo file ``mofile`` and returns a
+    :class:`~polib.MOFile` instance.
+
+    Arguments:
+
+    ``mofile``
+        string, full or relative path to the mo file or its content (data).
+
+    ``wrapwidth``
+        integer, the wrap width, only useful when the ``-w`` option was passed
+        to xgettext to generate the po file that was used to format the mo file
+        (optional, default: ``78``).
+
+    ``autodetect_encoding``
+        boolean, if set to ``False`` the function will not try to detect the
+        mo file encoding (optional, default: ``True``).
+
+    ``encoding``
+        string, the encoding to use, only relevant if ``autodetect_encoding``
+        is set to ``False``.
+
+    ``check_for_duplicates``
+        whether to check for duplicate entries when adding entries to the
+        file (optional, default: ``False``).
+    """
+    return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+
+# }}}
+# function detect_encoding() {{{
+
+def detect_encoding(file, binary_mode=False):
+    """
+    Try to detect the encoding used by the ``file``. The ``file`` argument can
+    be a PO or MO file path or a string containing the contents of the file.
+    If the encoding cannot be detected, the function will return the value of
+    ``default_encoding``.
+
+    Arguments:
+
+    ``file``
+        string, full or relative path to the po/mo file or its content.
+
+    ``binary_mode``
+        boolean, set this to True if ``file`` is a mo file.
+    """
+    rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
+
+    def charset_exists(charset):
+        """Check whether ``charset`` is valid or not."""
+        try:
+            codecs.lookup(charset)
+        except LookupError:
+            return False
+        return True
+
+    if not os.path.exists(file):
+        match = rx.search(file)
+        if match:
+            enc = match.group(1).strip()
+            if charset_exists(enc):
+                return enc
+    else:
+        if binary_mode:
+            mode = 'rb'
+        else:
+            mode = 'r'
+        f = open(file, mode)
+        for l in f.readlines():
+            match = rx.search(l)
+            if match:
+                f.close()
+                enc = match.group(1).strip()
+                if charset_exists(enc):
+                    return enc
+        f.close()
+    return default_encoding
+
+# }}}
+# function escape() {{{
+
+def escape(st):
+    """
+    Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
+    the given string ``st`` and returns it.
+    """
+    return st.replace('\\', r'\\')\
+             .replace('\t', r'\t')\
+             .replace('\r', r'\r')\
+             .replace('\n', r'\n')\
+             .replace('\"', r'\"')
+
+# }}}
+# function unescape() {{{
+
+def unescape(st):
+    """
+    Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
+    the given string ``st`` and returns it.
+    """
+    def unescape_repl(m):
+        m = m.group(1)
+        if m == 'n':
+            return '\n'
+        if m == 't':
+            return '\t'
+        if m == 'r':
+            return '\r'
+        if m == '\\':
+            return '\\'
+        return m # handles escaped double quote
+    return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
+
+# }}}
+# class _BaseFile {{{
+
+class _BaseFile(list):
+    """
+    Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
+    classes. This class should **not** be instanciated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts the following keyword arguments:
+
+        ``pofile``
+            string, the path to the po or mo file, or its content as a string.
+
+        ``wrapwidth``
+            integer, the wrap width, only useful when the ``-w`` option was
+            passed to xgettext (optional, default: ``78``).
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file, (optional, default: ``False``).
+        """
+        list.__init__(self)
+        # the opened file handle
+        pofile = kwargs.get('pofile', None)
+        if pofile and os.path.exists(pofile):
+            self.fpath = pofile
+        else:
+            self.fpath = kwargs.get('fpath')
+        # the width at which lines should be wrapped
+        self.wrapwidth = kwargs.get('wrapwidth', 78)
+        # the file encoding
+        self.encoding = kwargs.get('encoding', default_encoding)
+        # whether to check for duplicate entries or not
+        self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
+        # header
+        self.header = ''
+        # both po and mo files have metadata
+        self.metadata = {}
+        self.metadata_is_fuzzy = 0
+
+    def __unicode__(self):
+        """
+        Returns the unicode representation of the file.
+        """
+        ret = []
+        entries = [self.metadata_as_entry()] + \
+                  [e for e in self if not e.obsolete]
+        for entry in entries:
+            ret.append(entry.__unicode__(self.wrapwidth))
+        for entry in self.obsolete_entries():
+            ret.append(entry.__unicode__(self.wrapwidth))
+        ret = '\n'.join(ret)
+
+        if type(ret) != types.UnicodeType:
+            return unicode(ret, self.encoding)
+        return ret
+
+    def __str__(self):
+        """
+        Returns the string representation of the file.
+        """
+        return unicode(self).encode(self.encoding)
+
+    def __contains__(self, entry):
+        """
+        Overriden ``list`` method to implement the membership test (in and
+        not in).
+        The method considers that an entry is in the file if it finds an entry
+        that has the same msgid (the test is **case sensitive**).
+
+        Argument:
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        return self.find(entry.msgid, by='msgid') is not None
+    
+    def __eq__(self, other):
+        return unicode(self) == unicode(other)
+
+    def append(self, entry):
+        """
+        Overriden method to check for duplicates entries, if a user tries to
+        add an entry that is already in the file, the method will raise a
+        ``ValueError`` exception.
+
+        Argument:
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        if self.check_for_duplicates and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).append(entry)
+
+    def insert(self, index, entry):
+        """
+        Overriden method to check for duplicates entries, if a user tries to
+        add an entry that is already in the file, the method will raise a
+        ``ValueError`` exception.
+
+        Arguments:
+
+        ``index``
+            index at which the entry should be inserted.
+
+        ``entry``
+            an instance of :class:`~polib._BaseEntry`.
+        """
+        if self.check_for_duplicates and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).insert(index, entry)
+
+    def metadata_as_entry(self):
+        """
+        Returns the file metadata as a :class:`~polib.POFile` instance.
+        """
+        e = POEntry(msgid='')
+        mdata = self.ordered_metadata()
+        if mdata:
+            strs = []
+            for name, value in mdata:
+                # Strip whitespace off each line in a multi-line entry
+                strs.append('%s: %s' % (name, value))
+            e.msgstr = '\n'.join(strs) + '\n'
+        if self.metadata_is_fuzzy:
+            e.flags.append('fuzzy')
+        return e
+
+    def save(self, fpath=None, repr_method='__str__'):
+        """
+        Saves the po file to ``fpath``.
+        If it is an existing file and no ``fpath`` is provided, then the
+        existing file is rewritten with the modified data.
+
+        Keyword arguments:
+
+        ``fpath``
+            string, full or relative path to the file.
+
+        ``repr_method``
+            string, the method to use for output.
+        """
+        if self.fpath is None and fpath is None:
+            raise IOError('You must provide a file path to save() method')
+        contents = getattr(self, repr_method)()
+        if fpath is None:
+            fpath = self.fpath
+        if repr_method == 'to_binary':
+            fhandle = open(fpath, 'wb')
+        else:
+            fhandle = codecs.open(fpath, 'w', self.encoding)
+            if type(contents) != types.UnicodeType:
+                contents = contents.decode(self.encoding)
+        fhandle.write(contents)
+        fhandle.close()
+        # set the file path if not set
+        if self.fpath is None and fpath:
+            self.fpath = fpath
+
+    def find(self, st, by='msgid', include_obsolete_entries=False,
+             msgctxt=False):
+        """
+        Find the entry which msgid (or property identified by the ``by``
+        argument) matches the string ``st``.
+
+        Keyword arguments:
+
+        ``st``
+            string, the string to search for.
+
+        ``by``
+            string, the property to use for comparison (default: ``msgid``).
+
+        ``include_obsolete_entries``
+            boolean, whether to also search in entries that are obsolete.
+
+        ``msgctxt``
+            string, allows to specify a specific message context for the
+            search.
+        """
+        if include_obsolete_entries:
+            entries = self[:]
+        else:
+            entries = [e for e in self if not e.obsolete]
+        for e in entries:
+            if getattr(e, by) == st:
+                if msgctxt and e.msgctxt != msgctxt:
+                    continue
+                return e
+        return None
+
+    def ordered_metadata(self):
+        """
+        Convenience method that returns an ordered version of the metadata
+        dictionnary. The return value is list of tuples (metadata name,
+        metadata_value).
+        """
+        # copy the dict first
+        metadata = self.metadata.copy()
+        data_order = [
+            'Project-Id-Version',
+            'Report-Msgid-Bugs-To',
+            'POT-Creation-Date',
+            'PO-Revision-Date',
+            'Last-Translator',
+            'Language-Team',
+            'MIME-Version',
+            'Content-Type',
+            'Content-Transfer-Encoding'
+        ]
+        ordered_data = []
+        for data in data_order:
+            try:
+                value = metadata.pop(data)
+                ordered_data.append((data, value))
+            except KeyError:
+                pass
+        # the rest of the metadata will be alphabetically ordered since there
+        # are no specs for this AFAIK
+        keys = metadata.keys()
+        keys.sort()
+        for data in keys:
+            value = metadata[data]
+            ordered_data.append((data, value))
+        return ordered_data
+
+    def to_binary(self):
+        """
+        Return the binary representation of the file.
+        """
+        offsets = []
+        entries = self.translated_entries()
+        # the keys are sorted in the .mo file
+        def cmp(_self, other):
+            # msgfmt compares entries with msgctxt if it exists
+            self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
+            other_msgid = other.msgctxt and other.msgctxt or other.msgid
+            if self_msgid > other_msgid:
+                return 1
+            elif self_msgid < other_msgid:
+                return -1
+            else:
+                return 0
+        # add metadata entry
+        entries.sort(cmp)
+        mentry = self.metadata_as_entry()
+        #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+        entries = [mentry] + entries
+        entries_len = len(entries)
+        ids, strs = '', ''
+        for e in entries:
+            # For each string, we need size and file offset.  Each string is
+            # NUL terminated; the NUL does not count into the size.
+            msgid = ''
+            if e.msgctxt:
+                # Contexts are stored by storing the concatenation of the
+                # context, a <EOT> byte, and the original string
+                msgid = self._encode(e.msgctxt + '\4')
+            if e.msgid_plural:
+                indexes = e.msgstr_plural.keys()
+                indexes.sort()
+                msgstr = []
+                for index in indexes:
+                    msgstr.append(e.msgstr_plural[index])
+                msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
+                msgstr = self._encode('\0'.join(msgstr))
+            else:
+                msgid += self._encode(e.msgid)
+                msgstr = self._encode(e.msgstr)
+            offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
+            ids  += msgid  + '\0'
+            strs += msgstr + '\0'
+
+        # The header is 7 32-bit unsigned integers.
+        keystart = 7*4+16*entries_len
+        # and the values start after the keys
+        valuestart = keystart + len(ids)
+        koffsets = []
+        voffsets = []
+        # The string table first has the list of keys, then the list of values.
+        # Each entry has first the size of the string, then the file offset.
+        for o1, l1, o2, l2 in offsets:
+            koffsets += [l1, o1+keystart]
+            voffsets += [l2, o2+valuestart]
+        offsets = koffsets + voffsets
+        # check endianness for magic number
+        if struct.pack('@h', 1) == struct.pack('<h', 1):
+            magic_number = MOFile.LITTLE_ENDIAN
+        else:
+            magic_number = MOFile.BIG_ENDIAN
+
+        output = struct.pack(
+            "Iiiiiii",
+            magic_number,      # Magic number
+            0,                 # Version
+            entries_len,       # # of entries
+            7*4,               # start of key index
+            7*4+entries_len*8, # start of value index
+            0, keystart        # size and offset of hash table
+                               # Important: we don't use hash tables
+        )              
+        output += array.array("i", offsets).tostring()
+        output += ids
+        output += strs
+        return output
+
+    def _encode(self, mixed):
+        """
+        Encodes the given ``mixed`` argument with the file encoding if and
+        only if it's an unicode string and returns the encoded string.
+        """
+        if type(mixed) == types.UnicodeType:
+            return mixed.encode(self.encoding)
+        return mixed
+
+# }}}
+# class POFile {{{
+
+class POFile(_BaseFile):
+    """
+    Po (or Pot) file reader/writer.
+    This class inherits the :class:`~polib._BaseFile` class and, by extension,
+    the python ``list`` type.
+    """
+
+    def __unicode__(self):
+        """
+        Returns the unicode representation of the po file.
+        """
+        ret, headers = '', self.header.split('\n')
+        for header in headers:
+            if header[:1] in [',', ':']:
+                ret += '#%s\n' % header
+            else:
+                ret += '# %s\n' % header
+
+        if type(ret) != types.UnicodeType:
+            ret = unicode(ret, self.encoding)
+
+        return ret + _BaseFile.__unicode__(self)
+
+    def save_as_mofile(self, fpath):
+        """
+        Saves the binary representation of the file to given ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the mo file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method that returns the percentage of translated
+        messages.
+        """
+        total = len([e for e in self if not e.obsolete])
+        if total == 0:
+            return 100
+        translated = len(self.translated_entries())
+        return int((100.00 / float(total)) * translated)
+
+    def translated_entries(self):
+        """
+        Convenience method that returns the list of translated entries.
+        """
+        return [e for e in self if e.translated()]
+
+    def untranslated_entries(self):
+        """
+        Convenience method that returns the list of untranslated entries.
+        """
+        return [e for e in self if not e.translated() and not e.obsolete \
+                and not 'fuzzy' in e.flags]
+
+    def fuzzy_entries(self):
+        """
+        Convenience method that returns the list of fuzzy entries.
+        """
+        return [e for e in self if 'fuzzy' in e.flags]
+
+    def obsolete_entries(self):
+        """
+        Convenience method that returns the list of obsolete entries.
+        """
+        return [e for e in self if e.obsolete]
+
+    def merge(self, refpot):
+        """
+        Convenience method that merges the current pofile with the pot file
+        provided. It behaves exactly as the gettext msgmerge utility:
+
+        * comments of this file will be preserved, but extracted comments and
+          occurrences will be discarded;
+        * any translations or comments in the file will be discarded, however,
+          dot comments and file positions will be preserved;
+        * the fuzzy flags are preserved.
+
+        Keyword argument:
+
+        ``refpot``
+            object POFile, the reference catalog.
+        """
+        for entry in refpot:
+            e = self.find(entry.msgid, include_obsolete_entries=True)
+            if e is None:
+                e = POEntry()
+                self.append(e)
+            e.merge(entry)
+        # ok, now we must "obsolete" entries that are not in the refpot anymore
+        for entry in self:
+            if refpot.find(entry.msgid) is None:
+                entry.obsolete = True
+
+# }}}
+# class MOFile {{{
+
+class MOFile(_BaseFile):
+    """
+    Mo file reader/writer.
+    This class inherits the :class:`~polib._BaseFile` class and, by
+    extension, the python ``list`` type.
+    """
+    BIG_ENDIAN    = 0xde120495
+    LITTLE_ENDIAN = 0x950412de
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor, accepts all keywords arguments accepted by 
+        :class:`~polib._BaseFile` class.
+        """
+        _BaseFile.__init__(self, *args, **kwargs)
+        self.magic_number = None
+        self.version = 0
+
+    def save_as_pofile(self, fpath):
+        """
+        Saves the mofile as a pofile to ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath)
+
+    def save(self, fpath=None):
+        """
+        Saves the mofile to ``fpath``.
+
+        Keyword argument:
+
+        ``fpath``
+            string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return 100
+
+    def translated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return self
+
+    def untranslated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def fuzzy_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def obsolete_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+# }}}
+# class _BaseEntry {{{
+
+class _BaseEntry(object):
+    """
+    Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
+    This class should **not** be instanciated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        TODO: document keyword arguments.
+        """
+        self.msgid = kwargs.get('msgid', '')
+        self.msgstr = kwargs.get('msgstr', '')
+        self.msgid_plural = kwargs.get('msgid_plural', '')
+        self.msgstr_plural = kwargs.get('msgstr_plural', {})
+        self.obsolete = kwargs.get('obsolete', False)
+        self.encoding = kwargs.get('encoding', default_encoding)
+        self.msgctxt = kwargs.get('msgctxt', None)
+
+    def __repr__(self):
+        """
+        Returns the python representation of the entry.
+        """
+        return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
+    def __unicode__(self, wrapwidth=78):
+        """
+        Returns the unicode representation of the entry.
+        """
+        if self.obsolete:
+            delflag = '#~ '
+        else:
+            delflag = ''
+        ret = []
+        # write the msgctxt if any
+        if self.msgctxt is not None:
+            ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+        # write the msgid
+        ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
+        # write the msgid_plural if any
+        if self.msgid_plural:
+            ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+        if self.msgstr_plural:
+            # write the msgstr_plural if any
+            msgstrs = self.msgstr_plural
+            keys = list(msgstrs)
+            keys.sort()
+            for index in keys:
+                msgstr = msgstrs[index]
+                plural_index = '[%s]' % index
+                ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+        else:
+            # otherwise write the msgstr
+            ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+        ret.append('')
+        ret = '\n'.join(ret)
+
+        if type(ret) != types.UnicodeType:
+            return unicode(ret, self.encoding)
+        return ret
+
+    def __str__(self):
+        """
+        Returns the string representation of the entry.
+        """
+        return unicode(self).encode(self.encoding)
+    
+    def __eq__(self, other):
+        return unicode(self) == unicode(other)
+
+    def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
+        lines = field.splitlines(True)
+        if len(lines) > 1:
+            lines = ['']+lines # start with initial empty line
+        else:
+            escaped_field = escape(field)
+            specialchars_count = 0
+            for c in ['\\', '\n', '\r', '\t', '"']:
+                specialchars_count += field.count(c)
+            # comparison must take into account fieldname length + one space 
+            # + 2 quotes (eg. msgid "<string>")
+            real_wrapwidth = wrapwidth - (len(fieldname)+3) + specialchars_count
+            if wrapwidth > 0 and len(field) > real_wrapwidth:
+                # Wrap the line but take field name into account
+                lines = [''] + [unescape(item) for item in wrap(
+                    escaped_field,
+                    wrapwidth - 2, # 2 for quotes ""
+                    drop_whitespace=False,
+                    break_long_words=False
+                )]
+            else:
+                lines = [field] # needed for the empty string case
+            #lines = [field] # needed for the empty string case
+        if fieldname.startswith('previous_'):
+            # quick and dirty trick to get the real field name
+            fieldname = fieldname[9:]
+
+        ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
+                                escape(lines.pop(0)))]
+        for mstr in lines:
+            ret.append('%s"%s"' % (delflag, escape(mstr)))
+        return ret
+
+# }}}
+# class POEntry {{{
+
+class POEntry(_BaseEntry):
+    """
+    Represents a po file entry.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        TODO: document keyword arguments.
+        """
+        _BaseEntry.__init__(self, *args, **kwargs)
+        self.comment = kwargs.get('comment', '')
+        self.tcomment = kwargs.get('tcomment', '')
+        self.occurrences = kwargs.get('occurrences', [])
+        self.flags = kwargs.get('flags', [])
+        self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
+        self.previous_msgid = kwargs.get('previous_msgid', None)
+        self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+
+    def __unicode__(self, wrapwidth=78):
+        """
+        Returns the unicode representation of the entry.
+        """
+        if self.obsolete:
+            return _BaseEntry.__unicode__(self, wrapwidth)
+        ret = []
+        # comment first, if any (with text wrapping as xgettext does)
+        if self.comment != '':
+            for comment in self.comment.split('\n'):
+                if wrapwidth > 0 and len(comment) > wrapwidth-3:
+                    ret += wrap(comment, wrapwidth,
+                                initial_indent='#. ',
+                                subsequent_indent='#. ',
+                                break_long_words=False)
+                else:
+                    ret.append('#. %s' % comment)
+        # translator comment, if any (with text wrapping as xgettext does)
+        if self.tcomment != '':
+            for tcomment in self.tcomment.split('\n'):
+                if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
+                    ret += wrap(tcomment, wrapwidth,
+                                initial_indent='# ',
+                                subsequent_indent='# ',
+                                break_long_words=False)
+                else:
+                    ret.append('# %s' % tcomment)
+        # occurrences (with text wrapping as xgettext does)
+        if self.occurrences:
+            filelist = []
+            for fpath, lineno in self.occurrences:
+                if lineno:
+                    filelist.append('%s:%s' % (fpath, lineno))
+                else:
+                    filelist.append(fpath)
+            filestr = ' '.join(filelist)
+            if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
+                # XXX textwrap split words that contain hyphen, this is not 
+                # what we want for filenames, so the dirty hack is to 
+                # temporally replace hyphens with a char that a file cannot 
+                # contain, like "*"
+                lines = wrap(filestr.replace('-', '*'),
+                             wrapwidth,
+                             initial_indent='#: ',
+                             subsequent_indent='#: ',
+                             break_long_words=False)
+                # end of the replace hack
+                for line in lines:
+                    ret.append(line.replace('*', '-'))
+            else:
+                ret.append('#: '+filestr)
+        # flags
+        if self.flags:
+            flags = []
+            for flag in self.flags:
+                flags.append(flag)
+            ret.append('#, %s' % ', '.join(flags))
+
+        # previous context and previous msgid/msgid_plural
+        if self.previous_msgctxt:
+            ret += self._str_field("previous_msgctxt", "#| ", "",
+                                   self.previous_msgctxt, wrapwidth)
+        if self.previous_msgid:
+            ret += self._str_field("previous_msgid", "#| ", "", 
+                                   self.previous_msgid, wrapwidth)
+        if self.previous_msgid_plural:
+            ret += self._str_field("previous_msgid_plural", "#| ", "", 
+                                   self.previous_msgid_plural, wrapwidth)
+
+        ret.append(_BaseEntry.__unicode__(self, wrapwidth))
+        ret = '\n'.join(ret)
+
+        if type(ret) != types.UnicodeType:
+            return unicode(ret, self.encoding)
+        return ret
+
+    def __cmp__(self, other):
+        """
+        Called by comparison operations if rich comparison is not defined.
+        """
+        def compare_occurrences(a, b):
+            """
+            Compare an entry occurrence with another one.
+            """
+            if a[0] != b[0]:
+                return a[0] < b[0]
+            if a[1] != b[1]:
+                return a[1] < b[1]
+            return 0
+
+        # First: Obsolete test
+        if self.obsolete != other.obsolete:
+            if self.obsolete:
+                return -1
+            else:
+                return 1
+        # Work on a copy to protect original
+        occ1 = self.occurrences[:]
+        occ2 = other.occurrences[:]
+        # Sorting using compare method
+        occ1.sort(compare_occurrences)
+        occ2.sort(compare_occurrences)
+        # Comparing sorted occurrences
+        pos = 0
+        for entry1 in occ1:
+            try:
+                entry2 = occ2[pos]
+            except IndexError:
+                return 1
+            pos = pos + 1
+            if entry1[0] != entry2[0]:
+                if entry1[0] > entry2[0]:
+                    return 1
+                else:
+                    return -1
+            if entry1[1] != entry2[1]:
+                if entry1[1] > entry2[1]:
+                    return 1
+                else:
+                    return -1
+        # Finally: Compare message ID
+        if self.msgid > other.msgid: return 1
+        else: return -1
+
+    def translated(self):
+        """
+        Returns ``True`` if the entry has been translated or ``False``
+        otherwise.
+        """
+        if self.obsolete or 'fuzzy' in self.flags:
+            return False
+        if self.msgstr != '':
+            return True
+        if self.msgstr_plural:
+            for pos in self.msgstr_plural:
+                if self.msgstr_plural[pos] == '':
+                    return False
+            return True
+        return False
+
+    def merge(self, other):
+        """
+        Merge the current entry with the given pot entry.
+        """
+        self.msgid = other.msgid
+        self.msgctxt = other.msgctxt
+        self.occurrences = other.occurrences
+        self.comment = other.comment
+        fuzzy = 'fuzzy' in self.flags
+        self.flags = other.flags[:]  # clone flags
+        if fuzzy:
+            self.flags.append('fuzzy')
+        self.msgid_plural = other.msgid_plural
+        self.obsolete = other.obsolete
+        self.previous_msgctxt = other.previous_msgctxt
+        self.previous_msgid = other.previous_msgid
+        self.previous_msgid_plural = other.previous_msgid_plural
+        if other.msgstr_plural:
+            for pos in other.msgstr_plural:
+                try:
+                    # keep existing translation at pos if any
+                    self.msgstr_plural[pos]
+                except KeyError:
+                    self.msgstr_plural[pos] = ''
+
+# }}}
+# class MOEntry {{{
+
+class MOEntry(_BaseEntry):
+    """
+    Represents a mo file entry.
+    """
+    pass
+
+# }}}
+# class _POFileParser {{{
+
+class _POFileParser(object):
+    """
+    A finite state machine to parse efficiently and correctly po
+    file format.
+    """
+
+    def __init__(self, pofile, *args, **kwargs):
+        """
+        Constructor.
+
+        Keyword arguments:
+
+        ``pofile``
+            string, path to the po file or its content
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file (optional, default: ``False``).
+        """
+        enc = kwargs.get('encoding', default_encoding)
+        if os.path.exists(pofile):
+            try:
+                self.fhandle = codecs.open(pofile, 'rU', enc)
+            except LookupError:
+                enc = default_encoding
+                self.fhandle = codecs.open(pofile, 'rU', enc)
+        else:
+            self.fhandle = pofile.splitlines()
+
+        self.instance = POFile(
+            pofile=pofile,
+            encoding=enc,
+            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+        )
+        self.transitions = {}
+        self.current_entry = POEntry()
+        self.current_state = 'ST'
+        self.current_token = None
+        self.current_line = 0
+        # two memo flags used in handlers
+        self.msgstr_index = 0
+        self.entry_obsolete = 0
+        # Configure the state machine, by adding transitions.
+        # Signification of symbols:
+        #     * ST: Beginning of the file (start)
+        #     * HE: Header
+        #     * TC: a translation comment
+        #     * GC: a generated comment
+        #     * OC: a file/line occurence
+        #     * FL: a flags line
+        #     * CT: a message context
+        #     * PC: a previous msgctxt
+        #     * PM: a previous msgid
+        #     * PP: a previous msgid_plural
+        #     * MI: a msgid
+        #     * MP: a msgid plural
+        #     * MS: a msgstr
+        #     * MX: a msgstr plural
+        #     * MC: a msgid or msgstr continuation line
+        all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
+               'MS', 'MP', 'MX', 'MI']
+
+        self.add('TC', ['ST', 'HE'],                                     'HE')
+        self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
+                        'MP', 'MX', 'MI'],                               'TC')
+        self.add('GC', all,                                              'GC')
+        self.add('OC', all,                                              'OC')
+        self.add('FL', all,                                              'FL')
+        self.add('PC', all,                                              'PC')
+        self.add('PM', all,                                              'PM')
+        self.add('PP', all,                                              'PP')
+        self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
+                        'PP', 'MS', 'MX'],                               'CT')
+        self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC', 
+                 'PM', 'PP', 'MS', 'MX'],                                'MI')
+        self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'],             'MP')
+        self.add('MS', ['MI', 'MP', 'TC'],                               'MS')
+        self.add('MX', ['MI', 'MX', 'MP', 'TC'],                         'MX')
+        self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
+
+    def parse(self):
+        """
+        Run the state machine, parse the file line by line and call process()
+        with the current matched symbol.
+        """
+        i, lastlen = 1, 0
+        for line in self.fhandle:
+            line = line.strip()
+            if line == '':
+                i = i+1
+                continue
+            if line[:3] == '#~ ':
+                line = line[3:]
+                self.entry_obsolete = 1
+            else:
+                self.entry_obsolete = 0
+            self.current_token = line
+            self.current_line = i
+            if line[:2] == '#:':
+                # we are on a occurrences line
+                self.process('OC', i)
+            elif line[:9] == 'msgctxt "':
+                # we are on a msgctxt
+                self.process('CT', i)
+            elif line[:7] == 'msgid "':
+                # we are on a msgid
+                self.process('MI', i)
+            elif line[:8] == 'msgstr "':
+                # we are on a msgstr
+                self.process('MS', i)
+            elif line[:1] == '"' or line[:4] == '#| "':
+                # we are on a continuation line or some metadata
+                self.process('MC', i)
+            elif line[:14] == 'msgid_plural "':
+                # we are on a msgid plural
+                self.process('MP', i)
+            elif line[:7] == 'msgstr[':
+                # we are on a msgstr plural
+                self.process('MX', i)
+            elif line[:3] == '#, ':
+                # we are on a flags line
+                self.process('FL', i)
+            elif line[:2] == '# ' or line == '#':
+                if line == '#': line = line + ' '
+                # we are on a translator comment line
+                self.process('TC', i)
+            elif line[:2] == '#.':
+                # we are on a generated comment line
+                self.process('GC', i)
+            elif line[:15] == '#| msgid_plural':
+                # we are on a previous msgid_plural
+                self.process('PP', i)
+            elif line[:8] == '#| msgid':
+                self.process('PM', i)
+                # we are on a previous msgid
+            elif line[:10] == '#| msgctxt':
+                # we are on a previous msgctxt
+                self.process('PC', i)
+            i = i+1
+
+        if self.current_entry:
+            # since entries are added when another entry is found, we must add
+            # the last entry here (only if there are lines)
+            self.instance.append(self.current_entry)
+        # before returning the instance, check if there's metadata and if 
+        # so extract it in a dict
+        firstentry = self.instance[0]
+        if firstentry.msgid == '': # metadata found
+            # remove the entry
+            firstentry = self.instance.pop(0)
+            self.instance.metadata_is_fuzzy = firstentry.flags
+            key = None
+            for msg in firstentry.msgstr.splitlines():
+                try:
+                    key, val = msg.split(':', 1)
+                    self.instance.metadata[key] = val.strip()
+                except:
+                    if key is not None:
+                        self.instance.metadata[key] += '\n'+ msg.strip()
+        # close opened file
+        if isinstance(self.fhandle, file):
+            self.fhandle.close()
+        return self.instance
+
+    def add(self, symbol, states, next_state):
+        """
+        Add a transition to the state machine.
+
+        Keywords arguments:
+
+        ``symbol``
+            string, the matched token (two chars symbol).
+
+        ``states``
+            list, a list of states (two chars symbols).
+
+        ``next_state``
+            the next state the fsm will have after the action.
+        """
+        for state in states:
+            action = getattr(self, 'handle_%s' % next_state.lower())
+            self.transitions[(symbol, state)] = (action, next_state)
+
+    def process(self, symbol, linenum):
+        """
+        Process the transition corresponding to the current state and the
+        symbol provided.
+
+        Keywords arguments:
+
+        ``symbol``
+            string, the matched token (two chars symbol).
+
+        ``linenum``
+            integer, the current line number of the parsed file.
+        """
+        try:
+            (action, state) = self.transitions[(symbol, self.current_state)]
+            if action():
+                self.current_state = state
+        except Exception, exc:
+            raise IOError('Syntax error in po file (line %s)' % linenum)
+
+    # state handlers
+
+    def handle_he(self):
+        """Handle a header comment."""
+        if self.instance.header != '':
+            self.instance.header += '\n'
+        self.instance.header += self.current_token[2:]
+        return 1
+
+    def handle_tc(self):
+        """Handle a translator comment."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        if self.current_entry.tcomment != '':
+            self.current_entry.tcomment += '\n'
+        self.current_entry.tcomment += self.current_token[2:]
+        return True
+
+    def handle_gc(self):
+        """Handle a generated comment."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        if self.current_entry.comment != '':
+            self.current_entry.comment += '\n'
+        self.current_entry.comment += self.current_token[3:]
+        return True
+
+    def handle_oc(self):
+        """Handle a file:num occurence."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        occurrences = self.current_token[3:].split()
+        for occurrence in occurrences:
+            if occurrence != '':
+                try:
+                    fil, line = occurrence.split(':')
+                    if not line.isdigit():
+                        fil  = fil + line
+                        line = ''
+                    self.current_entry.occurrences.append((fil, line))
+                except:
+                    self.current_entry.occurrences.append((occurrence, ''))
+        return True
+
+    def handle_fl(self):
+        """Handle a flags line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.flags += self.current_token[3:].split(', ')
+        return True
+
+    def handle_pp(self):
+        """Handle a previous msgid_plural line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.previous_msgid_plural = \
+            unescape(self.current_token[17:-1])
+        return True
+
+    def handle_pm(self):
+        """Handle a previous msgid line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.previous_msgid = \
+            unescape(self.current_token[10:-1])
+        return True
+
+    def handle_pc(self):
+        """Handle a previous msgctxt line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.previous_msgctxt = \
+            unescape(self.current_token[12:-1])
+        return True
+
+    def handle_ct(self):
+        """Handle a msgctxt."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.msgctxt = unescape(self.current_token[9:-1])
+        return True
+
+    def handle_mi(self):
+        """Handle a msgid."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.obsolete = self.entry_obsolete
+        self.current_entry.msgid = unescape(self.current_token[7:-1])
+        return True
+
+    def handle_mp(self):
+        """Handle a msgid plural."""
+        self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
+        return True
+
+    def handle_ms(self):
+        """Handle a msgstr."""
+        self.current_entry.msgstr = unescape(self.current_token[8:-1])
+        self.current_entry.linenum = str(self.current_line)
+        return True
+
+    def handle_mx(self):
+        """Handle a msgstr plural."""
+        index, value = self.current_token[7], self.current_token[11:-1]
+        self.current_entry.msgstr_plural[index] = unescape(value)
+        self.msgstr_index = index
+        return True
+
+    def handle_mc(self):
+        """Handle a msgid or msgstr continuation line."""
+        token = unescape(self.current_token[1:-1])
+        if self.current_state == 'CT':
+            typ = 'msgctxt'
+            self.current_entry.msgctxt += token
+        elif self.current_state == 'MI':
+            typ = 'msgid'
+            self.current_entry.msgid += token
+        elif self.current_state == 'MP':
+            typ = 'msgid_plural'
+            self.current_entry.msgid_plural += token
+        elif self.current_state == 'MS':
+            typ = 'msgstr'
+            self.current_entry.msgstr += token
+        elif self.current_state == 'MX':
+            typ = 'msgstr[%s]' % self.msgstr_index
+            self.current_entry.msgstr_plural[self.msgstr_index] += token
+        elif self.current_state == 'PP':
+            typ = 'previous_msgid_plural'
+            token = token[3:]
+            self.current_entry.previous_msgid_plural += token
+        elif self.current_state == 'PM':
+            typ = 'previous_msgid'
+            token = token[3:]
+            self.current_entry.previous_msgid += token
+        elif self.current_state == 'PC':
+            typ = 'previous_msgctxt'
+            token = token[3:]
+            self.current_entry.previous_msgctxt += token
+        # don't change the current state
+        return False
+
+# }}}
+# class _MOFileParser {{{
+
+class _MOFileParser(object):
+    """
+    A class to parse binary mo files.
+    """
+
+    def __init__(self, mofile, *args, **kwargs):
+        """
+        Constructor.
+
+        Keyword arguments:
+
+        ``mofile``
+            string, path to the mo file or its content
+
+        ``encoding``
+            string, the encoding to use, defaults to ``default_encoding``
+            global variable (optional).
+
+        ``check_for_duplicates``
+            whether to check for duplicate entries when adding entries to the
+            file (optional, default: ``False``).
+        """
+        self.fhandle = open(mofile, 'rb')
+        self.instance = MOFile(
+            fpath=mofile,
+            encoding=kwargs.get('encoding', default_encoding),
+            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+        )
+
+    def parse(self):
+        """
+        Build the instance with the file handle provided in the
+        constructor.
+        """
+        # parse magic number
+        magic_number = self._readbinary('<I', 4)
+        if magic_number == MOFile.LITTLE_ENDIAN:
+            ii = '<II'
+        elif magic_number == MOFile.BIG_ENDIAN:
+            ii = '>II'
+        else:
+            raise IOError('Invalid mo file, magic number is incorrect !')
+        self.instance.magic_number = magic_number
+        # parse the version number and the number of strings
+        self.instance.version, numofstrings = self._readbinary(ii, 8)
+        # original strings and translation strings hash table offset
+        msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
+        # move to msgid hash table and read length and offset of msgids
+        self.fhandle.seek(msgids_hash_offset)
+        msgids_index = []
+        for i in range(numofstrings):
+            msgids_index.append(self._readbinary(ii, 8))
+        # move to msgstr hash table and read length and offset of msgstrs
+        self.fhandle.seek(msgstrs_hash_offset)
+        msgstrs_index = []
+        for i in range(numofstrings):
+            msgstrs_index.append(self._readbinary(ii, 8))
+        # build entries
+        for i in range(numofstrings):
+            self.fhandle.seek(msgids_index[i][1])
+            msgid = self.fhandle.read(msgids_index[i][0])
+            self.fhandle.seek(msgstrs_index[i][1])
+            msgstr = self.fhandle.read(msgstrs_index[i][0])
+            if i == 0: # metadata
+                raw_metadata, metadata = msgstr.split('\n'), {}
+                for line in raw_metadata:
+                    tokens = line.split(':', 1)
+                    if tokens[0] != '':
+                        try:
+                            metadata[tokens[0]] = tokens[1].strip()
+                        except IndexError:
+                            metadata[tokens[0]] = ''
+                self.instance.metadata = metadata
+                continue
+            # test if we have a plural entry
+            msgid_tokens = msgid.split('\0')
+            if len(msgid_tokens) > 1:
+                entry = MOEntry(
+                    msgid=msgid_tokens[0],
+                    msgid_plural=msgid_tokens[1],
+                    msgstr_plural=dict((k,v) for k,v in \
+                        enumerate(msgstr.split('\0')))
+                )
+            else:
+                entry = MOEntry(msgid=msgid, msgstr=msgstr)
+            self.instance.append(entry)
+        # close opened file
+        self.fhandle.close()
+        return self.instance
+
+    def _readbinary(self, fmt, numbytes):
+        """
+        Private method that unpack n bytes of data using format <fmt>.
+        It returns a tuple or a mixed value if the tuple length is 1.
+        """
+        bytes = self.fhandle.read(numbytes)
+        tup = struct.unpack(fmt, bytes)
+        if len(tup) == 1:
+            return tup[0]
+        return tup
+
+# }}}
+# class TextWrapper {{{
+
+class TextWrapper(textwrap.TextWrapper):
+    """
+    Subclass of textwrap.TextWrapper that backport the
+    drop_whitespace option.
+    """
+    def __init__(self, *args, **kwargs):
+        drop_whitespace = kwargs.pop('drop_whitespace', True) 
+        textwrap.TextWrapper.__init__(self, *args, **kwargs)
+        self.drop_whitespace = drop_whitespace
+
+    def _wrap_chunks(self, chunks):
+        """_wrap_chunks(chunks : [string]) -> [string]
+
+        Wrap a sequence of text chunks and return a list of lines of
+        length 'self.width' or less.  (If 'break_long_words' is false,
+        some lines may be longer than this.)  Chunks correspond roughly
+        to words and the whitespace between them: each chunk is
+        indivisible (modulo 'break_long_words'), but a line break can
+        come between any two chunks.  Chunks should not have internal
+        whitespace; ie. a chunk is either all whitespace or a "word".
+        Whitespace chunks will be removed from the beginning and end of
+        lines, but apart from that whitespace is preserved.
+        """
+        lines = []
+        if self.width <= 0:
+            raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+        # Arrange in reverse order so items can be efficiently popped
+        # from a stack of chucks.
+        chunks.reverse()
+
+        while chunks:
+
+            # Start the list of chunks that will make up the current line.
+            # cur_len is just the length of all the chunks in cur_line.
+            cur_line = []
+            cur_len = 0
+
+            # Figure out which static string will prefix this line.
+            if lines:
+                indent = self.subsequent_indent
+            else:
+                indent = self.initial_indent
+
+            # Maximum width for this line.
+            width = self.width - len(indent)
+
+            # First chunk on line is whitespace -- drop it, unless this
+            # is the very beginning of the text (ie. no lines started yet).
+            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+                del chunks[-1]
+
+            while chunks:
+                l = len(chunks[-1])
+
+                # Can at least squeeze this chunk onto the current line.
+                if cur_len + l <= width:
+                    cur_line.append(chunks.pop())
+                    cur_len += l
+
+                # Nope, this line is full.
+                else:
+                    break
+
+            # The current line is full, and the next chunk is too big to
+            # fit on *any* line (not just this one).
+            if chunks and len(chunks[-1]) > width:
+                self._handle_long_word(chunks, cur_line, cur_len, width)
+
+            # If the last chunk on this line is all whitespace, drop it.
+            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
+                del cur_line[-1]
+
+            # Convert current line back to a string and store it in list
+            # of all lines (return value).
+            if cur_line:
+                lines.append(indent + ''.join(cur_line))
+
+        return lines
+
+# }}}
+# function wrap() {{{
+
+def wrap(text, width=70, **kwargs):
+    """
+    Wrap a single paragraph of text, returning a list of wrapped lines.
+    """
+    if sys.version_info < (2, 6):
+        return TextWrapper(width=width, **kwargs).wrap(text)
+    return textwrap.wrap(text, width=width, **kwargs)
+
+#}}}

Added: translation/trunk/tools/validate.py
===================================================================
--- translation/trunk/tools/validate.py	                        (rev 0)
+++ translation/trunk/tools/validate.py	2011-03-17 17:52:17 UTC (rev 24379)
@@ -0,0 +1,94 @@
+#! /bin/env python
+
+from __future__ import unicode_literals, print_function
+
+import polib
+import sys
+import getopt
+
+symbols = [
+    '(', ')', '<', '>', '{',
+    '}', '[', ']', '\"', ';'
+    ]
+
+counts = {}
+
+def reset():
+  for s in symbols:
+    counts[s] = 0
+
+def parse_orig_string(string):
+  for ch in string:
+    if ch in symbols:
+      counts[ch] += 1
+
+def parse_trans_string(string):
+  for ch in string:
+    if ch in symbols:
+      counts[ch] -= 1
+
+def get_strings(file):
+  entries = []
+  po = polib.pofile(file, autodetect_encoding=True)
+
+  for entry in po:
+    entries.append((entry.msgid, entry.msgstr, entry.linenum))
+
+  return entries
+
+def warn():
+  for s in symbols:
+    if counts[s] < 0:
+      return True
+  return False
+
+def usage():
+  print("Usage: %s -i/--input=<file.po> -l/--logfile=<logfile>" % sys.argv[0])
+
+def log(string, file_to_check, log_file, linenum = -1):
+  f = open(log_file, "a+")
+  if linenum == -1:
+    f.write(("%s: %s\n" % (file_to_check, string)).encode("utf-8"))
+  else:
+    f.write(("%s (%s): %s\n" % (file_to_check, linenum, string)).encode("utf-8"))
+  f.close()
+
+def check(file_to_check, log_file):
+  errors = 0
+
+  strings = get_strings(file_to_check)
+  for (orig, trans, linenum) in strings:
+    reset()
+    parse_orig_string(orig)
+    parse_trans_string(trans)
+    if warn():
+      errors += 1
+      log(trans, file_to_check, log_file, linenum)
+
+  if errors != 0:
+    log("Total count of warnings %d\n" % errors, file_to_check, log_file)
+
+if __name__ == '__main__':
+  try:
+    opts, args = getopt.getopt(sys.argv[1:], "i:hl:", ["input=", "help", "logfile="])
+  except getopt.GetoptError, err:
+    print(str(err))
+    sys.exit(2)
+
+  file_to_check = None
+  log_file = None
+
+  for opt, arg in opts:
+    if opt in ("-i", "--input"):
+      file_to_check = arg
+    elif opt in ("-h", "--help"):
+      usage()
+      sys.exit()
+    elif opt in ("-l", "--logfile"):
+      log_file = arg
+
+  if file_to_check is None or log_file is None:
+    print("ERROR: You need to specify both the input and the logfile")
+    sys.exit(2)
+
+  check(file_to_check, log_file)

    

[tor-commits] r24379: {} a script to validate translation input, thanks chiiph (translation/trunk/tools)

Runa Sandvik