#!@PYTHON@ -s # # Copyright (c) 2010-2018 Shaun McCance # # ITS Tool program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # ITS Tool is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with ITS Tool; if not, write to the Free Software Foundation, 59 Temple # Place, Suite 330, Boston, MA 0211-1307 USA. # from __future__ import print_function from __future__ import unicode_literals VERSION="@VERSION@" DATADIR="@DATADIR@" import gettext import hashlib import libxml2 import optparse import os import os.path import re import sys import time import io PY3 = sys.version_info[0] == 3 if PY3: string_types = str, def ustr(s, encoding=None): if isinstance(s, str): return s elif encoding: return str(s, encoding) else: return str(s) ustr_type = str else: string_types = basestring, ustr = ustr_type = unicode NS_ITS = 'http://www.w3.org/2005/11/its' NS_ITST = 'http://itstool.org/extensions/' NS_BLANK = 'http://itstool.org/extensions/blank/' NS_XLINK = 'http://www.w3.org/1999/xlink' NS_XML = 'http://www.w3.org/XML/1998/namespace' class NoneTranslations: def gettext(self, message): return None def lgettext(self, message): return None def ngettext(self, msgid1, msgid2, n): return None def lngettext(self, msgid1, msgid2, n): return None def ugettext(self, message): return None def ungettext(self, msgid1, msgid2, n): return None class MessageList (object): def __init__ (self): self._messages = [] self._by_node = {} self._has_credits = False def add_message (self, message, node): self._messages.append (message) if node is not None: self._by_node[node] = message def add_credits(self): if self._has_credits: return msg = Message() msg.set_context('_') msg.add_text('translator-credits') msg.add_comment(Comment('Put one translator per line, in the form NAME , YEAR1, YEAR2')) self._messages.append(msg) self._has_credits = True def get_message_by_node (self, node): return self._by_node.get(node, None) def get_nodes_with_messages (self): return list(self._by_node.keys()) def output (self, out): msgs = [] msgdict = {} for msg in self._messages: key = (msg.get_context(), msg.get_string()) if key in msgdict: for source in msg.get_sources(): msgdict[key].add_source(source) for marker in msg.get_markers(): msgdict[key].add_marker(marker) for comment in msg.get_comments(): msgdict[key].add_comment(comment) for idvalue in msg.get_id_values(): msgdict[key].add_id_value(idvalue) if msg.get_preserve_space(): msgdict[key].set_preserve_space() if msg.get_locale_filter() is not None: locale = msgdict[key].get_locale_filter() if locale is not None: msgdict[key].set_locale_filter('%s, %s' % (locale, msg.get_locale_filter())) else: msgdict[key].set_locale_filter(msg.get_locale_filter()) else: msgs.append(msg) msgdict[key] = msg out.write('msgid ""\n') out.write('msgstr ""\n') out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n') out.write('"POT-Creation-Date: %s\\n"\n' % time.strftime("%Y-%m-%d %H:%M%z")) out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n') out.write('"Last-Translator: FULL NAME \\n"\n') out.write('"Language-Team: LANGUAGE \\n"\n') out.write('"MIME-Version: 1.0\\n"\n') out.write('"Content-Type: text/plain; charset=UTF-8\\n"\n') out.write('"Content-Transfer-Encoding: 8bit\\n"\n') out.write('\n') for msg in msgs: out.write(msg.format()) out.write('\n') class Comment (object): def __init__ (self, text): self._text = ustr(text) assert(text is not None) self._markers = [] def add_marker (self, marker): self._markers.append(marker) def get_markers (self): return self._markers def get_text (self): return self._text def format (self): ret = '' markers = {} for marker in self._markers: if marker not in markers: ret += '#. (itstool) comment: ' + marker + '\n' markers[marker] = marker if '\n' in self._text: doadd = False for line in self._text.split('\n'): if line != '': doadd = True if not doadd: continue ret += '#. %s\n' % line else: text = self._text while len(text) > 72: j = text.rfind(' ', 0, 72) if j == -1: j = text.find(' ') if j == -1: break ret += '#. %s\n' % text[:j] text = text[j+1:] ret += '#. %s\n' % text return ret class Placeholder (object): def __init__ (self, node): self.node = node self.name = ustr(node.name, 'utf-8') class Message (object): def __init__ (self): self._message = [] self._empty = True self._ctxt = None self._placeholders = [] self._sources = [] self._markers = [] self._id_values = [] self._locale_filter = None self._comments = [] self._preserve = False def __repr__(self): if self._empty: return "Empty message" return self.get_string() def escape (self, text): return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t") def add_text (self, text): if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): self._message.append('') if not isinstance(text, ustr_type): text = ustr(text, 'utf-8') self._message[-1] += text.replace('&', '&').replace('<', '<').replace('>', '>') if re.sub('\s+', ' ', text).strip() != '': self._empty = False def add_entity_ref (self, name): self._message.append('&' + name + ';') self._empty = False def add_placeholder (self, node): holder = Placeholder(node) self._placeholders.append(holder) self._message.append(holder) def get_placeholder (self, name): placeholder = 1 for holder in self._placeholders: holdername = '%s-%i' % (holder.name, placeholder) if holdername == ustr(name, 'utf-8'): return holder placeholder += 1 def add_start_tag (self, node): if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): self._message.append('') if node.ns() is not None and node.ns().name is not None: self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8'))) else: self._message[-1] += ('<%s' % ustr(node.name, 'utf-8')) for prop in xml_attr_iter(node): name = prop.name if prop.ns() is not None: name = prop.ns().name + ':' + name atval = prop.content if not isinstance(atval, ustr_type): atval = ustr(atval, 'utf-8') atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') self._message += " %s=\"%s\"" % (name, atval) if node.children is not None: self._message[-1] += '>' else: self._message[-1] += '/>' def add_end_tag (self, node): if node.children is not None: if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): self._message.append('') if node.ns() is not None and node.ns().name is not None: self._message[-1] += ('' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8'))) else: self._message[-1] += ('' % ustr(node.name, 'utf-8')) def is_empty (self): return self._empty def get_context (self): return self._ctxt def set_context (self, ctxt): self._ctxt = ctxt def add_source (self, source): if not isinstance(source, ustr_type): source = ustr(source, 'utf-8') self._sources.append(source) def get_sources (self): return self._sources def add_marker (self, marker): if not isinstance(marker, ustr_type): marker = ustr(marker, 'utf-8') self._markers.append(marker) def get_markers (self): return self._markers def add_id_value(self, id_value): self._id_values.append(id_value) def get_id_values(self): return self._id_values def add_comment (self, comment): if comment is not None: self._comments.append(comment) def get_comments (self): return self._comments def get_string (self): message = '' placeholder = 1 for msg in self._message: if isinstance(msg, string_types): message += msg elif isinstance(msg, Placeholder): message += '<_:%s-%i/>' % (msg.name, placeholder) placeholder += 1 if not self._preserve: message = re.sub('\s+', ' ', message).strip() return message def get_preserve_space (self): return self._preserve def set_preserve_space (self, preserve=True): self._preserve = preserve def get_locale_filter(self): return self._locale_filter def set_locale_filter(self, locale): self._locale_filter = locale def format (self): ret = '' markers = {} for marker in self._markers: if marker not in markers: ret += '#. (itstool) path: ' + marker + '\n' markers[marker] = marker for idvalue in self._id_values: ret += '#. (itstool) id: ' + idvalue + '\n' if self._locale_filter is not None: ret += '#. (itstool) ' + self._locale_filter[1] + ' locale: ' + self._locale_filter[0] + '\n' comments = [] commentsdict = {} for comment in self._comments: key = comment.get_text() if key in commentsdict: for marker in comment.get_markers(): commentsdict[key].add_marker(marker) else: comments.append(comment) commentsdict[key] = comment for i in range(len(comments)): if i != 0: ret += '#.\n' ret += comments[i].format() for source in self._sources: ret += '#: %s\n' % source if self._preserve: ret += '#, no-wrap\n' if self._ctxt is not None: ret += 'msgctxt "%s"\n' % self._ctxt message = self.get_string() if self._preserve: ret += 'msgid ""\n' lines = message.split('\n') for line, no in zip(lines, list(range(len(lines)))): if no == len(lines) - 1: ret += '"%s"\n' % self.escape(line) else: ret += '"%s\\n"\n' % self.escape(line) else: ret += 'msgid "%s"\n' % self.escape(message) ret += 'msgstr ""\n' return ret def xml_child_iter (node): child = node.children while child is not None: yield child child = child.next def xml_attr_iter (node): attr = node.get_properties() while attr is not None: yield attr attr = attr.next def xml_is_ns_name (node, ns, name): if node.type != 'element': return False return node.name == name and node.ns() is not None and node.ns().content == ns def xml_get_node_path(node): # The built-in nodePath() method only does numeric indexes # when necessary for disambiguation. For various reasons, # we prefer always using indexes. name = node.name if node.ns() is not None and node.ns().name is not None: name = node.ns().name + ':' + name if node.type == 'attribute': name = '@' + name name = '/' + name if node.type == 'element' and node.parent.type == 'element': count = 1 prev = node.previousElementSibling() while prev is not None: if prev.name == node.name: if prev.ns() is None: if node.ns() is None: count += 1 else: if node.ns() is not None: if prev.ns().name == node.ns().name: count += 1 prev = prev.previousElementSibling() name = '%s[%i]' % (name, count) if node.parent.type == 'element': name = xml_get_node_path(node.parent) + name return name def xml_error_catcher(doc, error): doc._xml_err += " %s" % error def fix_node_ns (node, nsdefs): childnsdefs = nsdefs.copy() nsdef = node.nsDefs() while nsdef is not None: nextnsdef = nsdef.next if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content: node.removeNsDef(nsdef.content) else: childnsdefs[nsdef.name] = nsdef.content nsdef = nextnsdef for child in xml_child_iter(node): if child.type == 'element': fix_node_ns(child, childnsdefs) class LocNote (object): def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False): self.locnote = locnote self.locnoteref = locnoteref self.locnotetype = locnotetype if self.locnotetype != 'alert': self.locnotetype = 'description' self._preserve_space=space def __repr__(self): if self.locnote is not None: if self._preserve_space: return self.locnote else: return re.sub('\s+', ' ', self.locnote).strip() elif self.locnoteref is not None: return '(itstool) link: ' + re.sub('\s+', ' ', self.locnoteref).strip() return '' class Document (object): def __init__ (self, filename, messages, load_dtd=False, keep_entities=False): self._xml_err = '' libxml2.registerErrorHandler(xml_error_catcher, self) try: ctxt = libxml2.createFileParserCtxt(filename) except: sys.stderr.write('Error: cannot open XML file %s\n' % filename) sys.exit(1) ctxt.lineNumbers(1) self._load_dtd = load_dtd self._keep_entities = keep_entities if load_dtd: ctxt.loadSubset(1) if keep_entities: ctxt.loadSubset(1) ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD) ctxt.replaceEntities(0) else: ctxt.replaceEntities(1) ctxt.parseDocument() self._filename = filename self._doc = ctxt.doc() self._localrules = [] def pre_process (node): for child in xml_child_iter(node): if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'): if child.nsProp('parse', None) == 'text': child.xincludeProcessTree() elif xml_is_ns_name(child, NS_ITS, 'rules'): if child.hasNsProp('href', NS_XLINK): href = child.nsProp('href', NS_XLINK) fileref = os.path.join(os.path.dirname(filename), href) if not os.path.exists(fileref): if opts.itspath is not None: for pathdir in opts.itspath: fileref = os.path.join(pathdir, href) if os.path.exists(fileref): break if not os.path.exists(fileref): sys.stderr.write('Error: Could not locate ITS file %s\n' % href) sys.exit(1) hctxt = libxml2.createFileParserCtxt(fileref) hctxt.replaceEntities(1) hctxt.parseDocument() root = hctxt.doc().getRootElement() version = None if root.hasNsProp('version', None): version = root.nsProp('version', None) else: sys.stderr.write('Warning: ITS file %s missing version attribute\n' % os.path.basename(href)) if version is not None and version not in ('1.0', '2.0'): sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' % (os.path.basename(href), root.nsProp('version', None))) else: self._localrules.append(root) version = None if child.hasNsProp('version', None): version = child.nsProp('version', None) else: root = child.doc.getRootElement() if root.hasNsProp('version', NS_ITS): version = root.nsProp('version', NS_ITS) else: sys.stderr.write('Warning: Local ITS rules missing version attribute\n') if version is not None and version not in ('1.0', '2.0'): sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' % version) else: self._localrules.append(child) pre_process(child) pre_process(self._doc) try: self._check_errors() except libxml2.parserError as e: sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e)) sys.exit(1) self._msgs = messages self._its_translate_nodes = {} self._its_within_text_nodes = {} self._its_locale_filters = {} self._its_id_values = {} self._its_loc_notes = {} self._its_preserve_space_nodes = {} self._itst_drop_nodes = {} self._itst_contexts = {} self._its_lang = {} self._itst_lang_attr = {} self._itst_credits = None self._its_externals = {} self._clear_cache() def __del__ (self): self._doc.freeDoc() def _check_errors(self): if self._xml_err: raise libxml2.parserError(self._xml_err) def _clear_cache(self): self._its_translate_nodes_cache = {} self._its_locale_filters_cache = {} self._its_loc_notes_cache = {} def get_its_params(self, rules): params = {} for child in xml_child_iter(rules): if xml_is_ns_name(child, NS_ITS, 'param'): params[child.nsProp('name', None)] = child.getContent() return params def register_its_params(self, xpath, params, userparams={}): for param in params: if param in userparams: xpath.xpathRegisterVariable(name, None, userparams[param]) else: xpath.xpathRegisterVariable(name, None, params[param]) def apply_its_rule(self, rule, xpath): self._clear_cache() if rule.type != 'element': return if xml_is_ns_name(rule, NS_ITS, 'translateRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._its_translate_nodes[node] = rule.nsProp('translate', None) elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._its_within_text_nodes[node] = rule.nsProp('withinText', None) elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): val = rule.nsProp('preserveSpace', None) if val == 'yes': self._its_preserve_space_nodes[node] = 'preserve' elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._its_preserve_space_nodes[node] = rule.nsProp('space', None) elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'): if rule.nsProp('selector', None) is not None: if rule.hasNsProp('localeFilterList', None): lst = rule.nsProp('localeFilterList', None) else: lst = '*' if rule.hasNsProp('localeFilterType', None): typ = rule.nsProp('localeFilterType', None) else: typ = 'include' for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._its_locale_filters[node] = (lst, typ) elif xml_is_ns_name(rule, NS_ITST, 'dropRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._itst_drop_nodes[node] = rule.nsProp('drop', None) elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'): sel = rule.nsProp('selector', None) idv = rule.nsProp('idValue', None) if sel is not None and idv is not None: for node in self._try_xpath_eval(xpath, sel): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) idvalue = self._try_xpath_eval(xpath, idv) if isinstance(idvalue, string_types): self._its_id_values[node] = idvalue else: for val in idvalue: self._its_id_values[node] = val.content break xpath.setContextNode(oldnode) pass elif xml_is_ns_name(rule, NS_ITST, 'contextRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): if rule.hasNsProp('context', None): self._itst_contexts[node] = rule.nsProp('context', None) elif rule.hasNsProp('contextPointer', None): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None)) if isinstance(ctxt, string_types): self._itst_contexts[node] = ctxt else: for ctxt in ctxt: self._itst_contexts[node] = ctxt.content break xpath.setContextNode(oldnode) elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'): locnote = None notetype = rule.nsProp('locNoteType', None) for child in xml_child_iter(rule): if xml_is_ns_name(child, NS_ITS, 'locNote'): locnote = LocNote(locnote=child.content, locnotetype=notetype) break if locnote is None: if rule.hasNsProp('locNoteRef', None): locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype) if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): if locnote is not None: self._its_loc_notes.setdefault(node, []).append(locnote) else: if rule.hasNsProp('locNotePointer', None): sel = rule.nsProp('locNotePointer', None) ref = False elif rule.hasNsProp('locNoteRefPointer', None): sel = rule.nsProp('locNoteRefPointer', None) ref = True else: continue try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) note = self._try_xpath_eval(xpath, sel) if isinstance(note, string_types): if ref: nodenote = LocNote(locnoteref=note, locnotetype=notetype) else: nodenote = LocNote(locnote=note, locnotetype=notetype) self._its_loc_notes.setdefault(node, []).append(nodenote) else: for note in note: if ref: nodenote = LocNote(locnoteref=note.content, locnotetype=notetype) else: nodenote = LocNote(locnote=note.content, locnotetype=notetype, space=self.get_preserve_space(note)) self._its_loc_notes.setdefault(node, []).append(nodenote) break xpath.setContextNode(oldnode) elif xml_is_ns_name(rule, NS_ITS, 'langRule'): if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None)) if len(res) > 0: self._its_lang[node] = res[0].content # We need to construct language attributes, not just read # language information. Technically, langPointer could be # any XPath expression. But if it looks like an attribute # accessor, just use the attribute name. if rule.nsProp('langPointer', None)[0] == '@': self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:] xpath.setContextNode(oldnode) elif xml_is_ns_name(rule, NS_ITST, 'credits'): if rule.nsProp('appendTo', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)): self._itst_credits = (node, rule) break elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or xml_is_ns_name(rule, NS_ITST, 'externalRefRule')): sel = rule.nsProp('selector', None) if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'): ptr = rule.nsProp('externalResourceRefPointer', None) else: ptr = rule.nsProp('refPointer', None) if sel is not None and ptr is not None: for node in self._try_xpath_eval(xpath, sel): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) res = self._try_xpath_eval(xpath, ptr) if len(res) > 0: self._its_externals[node] = res[0].content xpath.setContextNode(oldnode) def apply_its_rules(self, builtins, userparams={}): self._clear_cache() if builtins: dirs = [] ddir = os.getenv('XDG_DATA_HOME', '') if ddir == '': ddir = os.path.join(os.path.expanduser('~'), '.local', 'share') dirs.append(ddir) ddir = os.getenv('XDG_DATA_DIRS', '') if ddir == '': if DATADIR not in ('/usr/local/share', '/usr/share'): ddir += DATADIR + ':' ddir += '/usr/local/share:/usr/share' dirs.extend(ddir.split(':')) ddone = {} for ddir in dirs: itsdir = os.path.join(ddir, 'itstool', 'its') if not os.path.exists(itsdir): continue for dfile in os.listdir(itsdir): if dfile.endswith('.its'): if not ddone.get(dfile, False): self.apply_its_file(os.path.join(itsdir, dfile), userparams=userparams) ddone[dfile] = True self.apply_local_its_rules(userparams=userparams) def apply_its_file(self, filename, userparams={}): self._clear_cache() doc = libxml2.parseFile(filename) root = doc.getRootElement() if not xml_is_ns_name(root, NS_ITS, 'rules'): return version = None if root.hasNsProp('version', None): version = root.nsProp('version', None) else: sys.stderr.write('Warning: ITS file %s missing version attribute\n' % os.path.basename(filename)) if version is not None and version not in ('1.0', '2.0'): sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' % (os.path.basename(filename), root.nsProp('version', None))) return matched = True for match in xml_child_iter(root): if xml_is_ns_name(match, NS_ITST, 'match'): matched = False xpath = self._doc.xpathNewContext() par = match nss = {} while par is not None: nsdef = par.nsDefs() while nsdef is not None: if nsdef.name is not None: if nsdef.name not in nss: nss[nsdef.name] = nsdef.content xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next par = par.parent if match.hasNsProp('selector', None): if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0: matched = True break if matched == False: return params = self.get_its_params(root) for rule in xml_child_iter(root): xpath = self._doc.xpathNewContext() par = match nss = {} while par is not None: nsdef = par.nsDefs() while nsdef is not None: if nsdef.name is not None: if nsdef.name not in nss: nss[nsdef.name] = nsdef.content xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next par = par.parent self.register_its_params(xpath, params, userparams=userparams) self.apply_its_rule(rule, xpath) def apply_local_its_rules(self, userparams={}): self._clear_cache() for rules in self._localrules: def reg_ns(xpath, node): if node.parent is not None: reg_ns(xpath, node.parent) nsdef = node.nsDefs() while nsdef is not None: if nsdef.name is not None: xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next xpath = self._doc.xpathNewContext() reg_ns(xpath, rules) params = self.get_its_params(rules) self.register_its_params(xpath, params, userparams=userparams) for rule in xml_child_iter(rules): if rule.type != 'element': continue if rule.nsDefs() is not None: rule_xpath = self._doc.xpathNewContext() reg_ns(rule_xpath, rule) self.register_its_params(rule_xpath, params, userparams=userparams) else: rule_xpath = xpath self.apply_its_rule(rule, rule_xpath) def _append_credits(self, parent, node, trdata): if xml_is_ns_name(node, NS_ITST, 'for-each'): select = node.nsProp('select', None) if select == 'years': for year in trdata[2].split(','): for child in xml_child_iter(node): self._append_credits(parent, child, trdata + (year.strip(),)) elif xml_is_ns_name(node, NS_ITST, 'value-of'): select = node.nsProp('select', None) val = None if select == 'name': val = trdata[0] elif select == 'email': val = trdata[1] elif select == 'years': val = trdata[2] elif select == 'year' and len(trdata) == 4: val = trdata[3] if val is not None: if not PY3: val = val.encode('utf-8') parent.addContent(val) else: newnode = node.copyNode(2) parent.addChild(newnode) for child in xml_child_iter(node): self._append_credits(newnode, child, trdata) def merge_credits(self, translations, language, node): if self._itst_credits is None: return # Dear Python, please implement pgettext. # http://bugs.python.org/issue2504 # Sincerely, Shaun trans = translations.ugettext('_\x04translator-credits') if trans is None or trans == 'translator-credits': return regex = re.compile('(.*) \<(.*)\>, (.*)') for credit in trans.split('\n'): match = regex.match(credit) if not match: continue trdata = match.groups() for node in xml_child_iter(self._itst_credits[1]): self._append_credits(self._itst_credits[0], node, trdata) def join_translations(self, translations, node=None, strict=False): is_root = False if node is None: is_root = True self.generate_messages(comments=False) node = self._doc.getRootElement() if node is None or node.type != 'element': return if self.get_itst_drop(node) == 'yes': prev = node.prev node.unlinkNode() node.freeNode() if prev is not None and prev.isBlankNode(): prev.unlinkNode() prev.freeNode() return msg = self._msgs.get_message_by_node(node) if msg is None: self.translate_attrs(node, node) children = [child for child in xml_child_iter(node)] for child in children: self.join_translations(translations, node=child, strict=strict) else: prevnode = None if node.prev is not None and node.prev.type == 'text': prevtext = node.prev.content if re.sub('\s+', '', prevtext) == '': prevnode = node.prev for lang in sorted(list(translations.keys()), reverse=True): locale = self.get_its_locale_filter(node) lmatch = match_locale_list(locale[0], lang) if (locale[1] == 'include' and not lmatch) or (locale[1] == 'exclude' and lmatch): continue newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang) if newnode != node: newnode.setProp('xml:lang', lang) node.addNextSibling(newnode) if prevnode is not None: node.addNextSibling(prevnode.copyNode(0)) if is_root: # Because of the way we create nodes and rewrite the document, # we end up with lots of redundant namespace definitions. We # kill them off in one fell swoop at the end. fix_node_ns(node, {}) self._check_errors() def merge_translations(self, translations, language, node=None, strict=False): is_root = False if node is None: is_root = True self.generate_messages(comments=False) node = self._doc.getRootElement() if node is None or node.type != 'element': return drop = False locale = self.get_its_locale_filter(node) if locale[1] == 'include': if locale[0] != '*': if not match_locale_list(locale[0], language): drop = True elif locale[1] == 'exclude': if match_locale_list(locale[0], language): drop = True if self.get_itst_drop(node) == 'yes' or drop: prev = node.prev node.unlinkNode() node.freeNode() if prev is not None and prev.isBlankNode(): prev.unlinkNode() prev.freeNode() return if is_root: self.merge_credits(translations, language, node) msg = self._msgs.get_message_by_node(node) if msg is None: self.translate_attrs(node, node) children = [child for child in xml_child_iter(node)] for child in children: self.merge_translations(translations, language, node=child, strict=strict) else: newnode = self.get_translated(node, translations, strict=strict, lang=language) if newnode != node: self.translate_attrs(node, newnode) node.replaceNode(newnode) if is_root: # Apply language attributes to untranslated nodes. We don't do # this before processing, because then these attributes would # be copied into the new nodes. We apply the attribute without # checking whether it was translated, because any that were will # just be floating around, unattached to a document. for lcnode in self._msgs.get_nodes_with_messages(): attr = self._itst_lang_attr.get(lcnode) if attr is None: continue origlang = None lcpar = lcnode while lcpar is not None: origlang = self._its_lang.get(lcpar) if origlang is not None: break lcpar = lcpar.parent if origlang is not None: lcnode.setProp(attr, origlang) # And then set the language attribute on the root node. if language is not None: attr = self._itst_lang_attr.get(node) if attr is not None: node.setProp(attr, language) # Because of the way we create nodes and rewrite the document, # we end up with lots of redundant namespace definitions. We # kill them off in one fell swoop at the end. fix_node_ns(node, {}) self._check_errors() def translate_attrs(self, oldnode, newnode): trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes'] for attr in trans_attrs: srccontent = attr.get_content() if not PY3: srccontent = srccontent.decode('utf-8') newcontent = translations.ugettext(srccontent) if newcontent: if not PY3: newcontent = newcontent.encode('utf-8') newnode.setProp(attr.name, newcontent) def get_translated (self, node, translations, strict=False, lang=None): msg = self._msgs.get_message_by_node(node) if msg is None: return node msgstr = msg.get_string() # Dear Python, please implement pgettext. # http://bugs.python.org/issue2504 # Sincerely, Shaun if msg.get_context() is not None: msgstr = msg.get_context() + '\x04' + msgstr trans = translations.ugettext(msgstr) if trans is None: return node nss = {} def reg_ns(node, nss): if node.parent is not None: reg_ns(node.parent, nss) nsdef = node.nsDefs() while nsdef is not None: nss[nsdef.name] = nsdef.content nsdef = nsdef.next reg_ns(node, nss) nss['_'] = NS_BLANK try: blurb = node.doc.intSubset().serialize('utf-8') except Exception: blurb = '' blurb += '<' + ustr(node.name, 'utf-8') for nsname in list(nss.keys()): if nsname is None: blurb += ' xmlns="%s"' % nss[nsname] else: blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname]) blurb += '>%s' % (trans, ustr(node.name, 'utf-8')) if not PY3: blurb = blurb.encode('utf-8') ctxt = libxml2.createDocParserCtxt(blurb) if self._load_dtd: ctxt.loadSubset(1) if self._keep_entities: ctxt.loadSubset(1) ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD) ctxt.replaceEntities(0) else: ctxt.replaceEntities(1) ctxt.parseDocument() trnode = ctxt.doc().getRootElement() try: self._check_errors() except libxml2.parserError: if strict: raise else: sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % ( (lang + ' ') if lang is not None else '', msgstr.encode('utf-8'))) self._xml_err = '' return node def scan_node(node): children = [child for child in xml_child_iter(node)] for child in children: if child.type != 'element': continue if child.ns() is not None and child.ns().content == NS_BLANK: ph_node = msg.get_placeholder(child.name).node if self.has_child_elements(ph_node): self.merge_translations(translations, None, ph_node, strict=strict) newnode = ph_node.copyNode(1) newnode.setTreeDoc(self._doc) child.replaceNode(newnode) else: repl = self.get_translated(ph_node, translations, strict=strict, lang=lang) child.replaceNode(repl) scan_node(child) try: scan_node(trnode) except: if strict: raise else: sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % ( (lang + ' ') if lang is not None else '', msgstr.encode('utf-8'))) self._xml_err = '' ctxt.doc().freeDoc() return node retnode = node.copyNode(2) retnode.setTreeDoc(self._doc) for child in xml_child_iter(trnode): newnode = child.copyNode(1) newnode.setTreeDoc(self._doc) retnode.addChild(newnode) ctxt.doc().freeDoc() return retnode def generate_messages(self, comments=True): if self._itst_credits is not None: self._msgs.add_credits() for child in xml_child_iter(self._doc): if child.type == 'element': self.generate_message(child, None, comments=comments) break def generate_message(self, node, msg, comments=True, path=None): if node.type in ('text', 'cdata') and msg is not None: msg.add_text(node.content) return if node.type == 'entity_ref': msg.add_entity_ref(node.name); if node.type != 'element': return if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes': return if self._itst_drop_nodes.get(node, 'no') == 'yes': return locfil = self.get_its_locale_filter(node) if locfil == ('', 'include') or locfil == ('*', 'exclude'): return if path is None: path = '' translate = self.get_its_translate(node) withinText = False if translate == 'no': if msg is not None: msg.add_placeholder(node) is_unit = False msg = None else: is_unit = msg is None or self.is_translation_unit(node) if is_unit: if msg is not None: msg.add_placeholder(node) msg = Message() ctxt = None if node.hasNsProp('context', NS_ITST): ctxt = node.nsProp('context', NS_ITST) if ctxt is None: ctxt = self._itst_contexts.get(node) if ctxt is not None: msg.set_context(ctxt) idvalue = self.get_its_id_value(node) if idvalue is not None: basename = os.path.basename(self._filename) msg.add_id_value(basename + '#' + idvalue) if self.get_preserve_space(node): msg.set_preserve_space() if self.get_its_locale_filter(node) != ('*', 'include'): msg.set_locale_filter(self.get_its_locale_filter(node)) msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8'))) else: withinText = True msg.add_start_tag(node) if not withinText: # Add msg for translatable node attributes for attr in xml_attr_iter(node): if self._its_translate_nodes.get(attr, 'no') == 'yes': attr_msg = Message() if self.get_preserve_space(attr): attr_msg.set_preserve_space() attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name)) attr_msg.add_text(attr.content) if comments: for locnote in self.get_its_loc_notes(attr): comment = Comment(locnote) comment.add_marker ('%s/%s@%s' % ( node.parent.name, node.name, attr.name)) attr_msg.add_comment(comment) self._msgs.add_message(attr_msg, attr) if comments and msg is not None: cnode = node while cnode is not None: hasnote = False for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)): comment = Comment(locnote) if withinText: comment.add_marker('.%s/%s' % (path, cnode.name)) msg.add_comment(comment) hasnote = True if hasnote or not is_unit: break cnode = cnode.parent self.generate_external_resource_message(node) for attr in xml_attr_iter(node): self.generate_external_resource_message(attr) idvalue = self.get_its_id_value(attr) if idvalue is not None: basename = os.path.basename(self._filename) msg.add_id_value(basename + '#' + idvalue) if withinText: path = path + '/' + node.name for child in xml_child_iter(node): self.generate_message(child, msg, comments=comments, path=path) if translate: if is_unit and not msg.is_empty(): self._msgs.add_message(msg, node) elif msg is not None: msg.add_end_tag(node) def generate_external_resource_message(self, node): if node not in self._its_externals: return resref = self._its_externals[node] if node.type == 'element': translate = self.get_its_translate(node) marker = '%s/%s' % (node.parent.name, node.name) else: translate = self.get_its_translate(node.parent) marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name) if translate == 'no': return msg = Message() try: fullfile = os.path.join(os.path.dirname(self._filename), resref) filefp = open(fullfile, 'rb') filemd5 = hashlib.md5(filefp.read()).hexdigest() filefp.close() except Exception: filemd5 = '__failed__' txt = "external ref='%s' md5='%s'" % (resref, filemd5) msg.set_context('_') msg.add_text(txt) msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) msg.add_marker(marker) msg.add_comment(Comment('This is a reference to an external file such as an image or' ' video. When the file changes, the md5 hash will change to' ' let you know you need to update your localized copy. The' ' msgstr is not used at all. Set it to whatever you like' ' once you have updated your copy of the file.')) self._msgs.add_message(msg, None) def is_translation_unit (self, node): return self.get_its_within_text(node) != 'yes' def has_child_elements(self, node): return len([child for child in xml_child_iter(node) if child.type=='element']) def get_preserve_space (self, node): while node.type in ('attribute', 'element'): if node.getSpacePreserve() == 1: return True if node in self._its_preserve_space_nodes: return (self._its_preserve_space_nodes[node] == 'preserve') node = node.parent return False def get_its_translate(self, node): if node in self._its_translate_nodes_cache: return self._its_translate_nodes_cache[node] val = None if node.hasNsProp('translate', NS_ITS): val = node.nsProp('translate', NS_ITS) elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None): val = node.nsProp('translate', None) elif node in self._its_translate_nodes: val = self._its_translate_nodes[node] if val is not None: self._its_translate_nodes_cache[node] = val return val if node.type == 'attribute': return 'no' if node.parent.type == 'element': parval = self.get_its_translate(node.parent) self._its_translate_nodes_cache[node] = parval return parval return 'yes' def get_its_within_text(self, node): if node.hasNsProp('withinText', NS_ITS): val = node.nsProp('withinText', NS_ITS) elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None): val = node.nsProp('withinText', None) else: return self._its_within_text_nodes.get(node, 'no') if val in ('yes', 'nested'): return val return 'no' def get_its_locale_filter(self, node): if node in self._its_locale_filters_cache: return self._its_locale_filters_cache[node] if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS): if node.hasNsProp('localeFilterList', NS_ITS): lst = node.nsProp('localeFilterList', NS_ITS) else: lst = '*' if node.hasNsProp('localeFilterType', NS_ITS): typ = node.nsProp('localeFilterType', NS_ITS) else: typ = 'include' return (lst, typ) if (xml_is_ns_name(node, NS_ITS, 'span') and (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))): if node.hasNsProp('localeFilterList', None): lst = node.nsProp('localeFilterList', None) else: lst = '*' if node.hasNsProp('localeFilterType', None): typ = node.nsProp('localeFilterType', None) else: typ = 'include' return (lst, typ) if node in self._its_locale_filters: return self._its_locale_filters[node] if node.parent.type == 'element': parval = self.get_its_locale_filter(node.parent) self._its_locale_filters_cache[node] = parval return parval return ('*', 'include') def get_itst_drop(self, node): if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes': return 'yes' if self._itst_drop_nodes.get(node, 'no') == 'yes': return 'yes' return 'no' def get_its_id_value(self, node): if node.hasNsProp('id', NS_XML): return node.nsProp('id', NS_XML) return self._its_id_values.get(node, None) def get_its_loc_notes(self, node, inherit=True): if node in self._its_loc_notes_cache: return self._its_loc_notes_cache[node] ret = [] if ( node.hasNsProp('locNote', NS_ITS) or node.hasNsProp('locNoteRef', NS_ITS) or node.hasNsProp('locNoteType', NS_ITS) ): notetype = node.nsProp('locNoteType', NS_ITS) if node.hasNsProp('locNote', NS_ITS): ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype)) elif node.hasNsProp('locNoteRef', NS_ITS): ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype)) elif xml_is_ns_name(node, NS_ITS, 'span'): if ( node.hasNsProp('locNote', None) or node.hasNsProp('locNoteRef', None) or node.hasNsProp('locNoteType', None) ): notetype = node.nsProp('locNoteType', None) if node.hasNsProp('locNote', None): ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype)) elif node.hasNsProp('locNoteRef', None): ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype)) for locnote in reversed(self._its_loc_notes.get(node, [])): ret.append(locnote) if (len(ret) == 0 and inherit and node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'): parval = self.get_its_loc_notes(node.parent) self._its_loc_notes_cache[node] = parval return parval self._its_loc_notes_cache[node] = ret return ret def output_test_data(self, category, out, node=None): if node is None: node = self._doc.getRootElement() compval = '' if category == 'translate': compval = 'translate="%s"' % self.get_its_translate(node) elif category == 'withinText': if node.type != 'attribute': compval = 'withinText="%s"' % self.get_its_within_text(node) elif category == 'localeFilter': compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node) elif category == 'locNote': val = self.get_its_loc_notes(node) if len(val) > 0: if val[0].locnote is not None: compval = 'locNote="%s"\tlocNoteType="%s"' % (ustr(val[0]), val[0].locnotetype) elif val[0].locnoteref is not None: compval = 'locNoteRef="%s"\tlocNoteType="%s"' % (val[0].locnoteref, val[0].locnotetype) elif category == 'externalResourceRef': val = self._its_externals.get(node, '') if val != '': compval = 'externalResourceRef="%s"' % val elif category == 'idValue': val = self.get_its_id_value(node) if val is not None: compval = 'idValue="%s"' % val elif category == 'preserveSpace': if self.get_preserve_space(node): compval = 'space="preserve"' else: compval = 'space="default"' else: sys.stderr.write('Error: Unrecognized category %s\n' % category) sys.exit(1) if compval != '': out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval)) else: out.write('%s\r\n' % (xml_get_node_path(node))) for attr in sorted(xml_attr_iter(node), key=ustr): self.output_test_data(category, out, attr) for child in xml_child_iter(node): if child.type == 'element': self.output_test_data(category, out, child) @staticmethod def _try_xpath_eval (xpath, expr): try: return xpath.xpathEval(expr) except: sys.stderr.write('Warning: Invalid XPath: %s\n' % expr) return [] def match_locale_list(extranges, locale): if extranges.strip() == '': return False for extrange in [extrange.strip() for extrange in extranges.split(',')]: if match_locale(extrange, locale): return True return False def match_locale(extrange, locale): # Extended filtering for extended language ranges as # defined by RFC4647, part of BCP47. # http://tools.ietf.org/html/rfc4647#section-3.3.2 rangelist = [x.lower() for x in extrange.split('-')] localelist = [x.lower() for x in locale.split('-')] if rangelist[0] not in ('*', localelist[0]): return False rangei = localei = 0 while rangei < len(rangelist): if rangelist[rangei] == '*': rangei += 1 continue if localei >= len(localelist): return False if rangelist[rangei] in ('*', localelist[localei]): rangei += 1 localei += 1 continue if len(localelist[localei]) == 1: return False localei += 1 return True _locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?') def convert_locale (locale): # Automatically convert POSIX-style locales to BCP47 match = _locale_pattern.match(locale) if match is None: return locale ret = match.group(1).lower() variant = match.group(3) if variant == '@cyrillic': ret += '-Cyrl' variant = None if variant == '@devanagari': ret += '-Deva' variant = None elif variant == '@latin': ret += '-Latn' variant = None elif variant == '@shaw': ret += '-Shaw' variant = None if match.group(2) is not None: ret += '-' + match.group(2)[1:].upper() if variant is not None and variant != '@euro': ret += '-' + variant[1:].lower() return ret #def main(): if __name__ == '__main__': options = optparse.OptionParser() options.set_usage('\n itstool [OPTIONS] [XMLFILES]\n' + ' itstool -m [OPTIONS] [XMLFILES]\n' + ' itstool -j [OPTIONS] [MOFILES]') options.add_option('-i', '--its', action='append', dest='itsfile', metavar='ITS', help='Load the ITS rules in the file ITS (can specify multiple times)') options.add_option('-l', '--lang', dest='lang', default=None, metavar='LANGUAGE', help='Explicitly set the language code for output file') options.add_option('-j', '--join', dest='join', metavar='FILE', help='Join multiple MO files with the XML file FILE and output XML file') options.add_option('-m', '--merge', dest='merge', metavar='FILE', help='Merge from a PO or MO file FILE and output XML files') options.add_option('-n', '--no-builtins', action='store_true', dest='nobuiltins', default=False, help='Do not apply the built-in ITS rules') options.add_option('-o', '--output', dest='output', default=None, metavar='OUT', help='Output PO files to file OUT or XML files in directory OUT') options.add_option('--path', action='append', dest='itspath', default=None, metavar='PATHS', help='Extra path where ITS files may be found (can specify multiple times)') options.add_option('-s', '--strict', action='store_true', dest='strict', default=False, help='Exit with error when PO files contain broken XML') options.add_option('-d', '--load-dtd', action='store_true', dest='load_dtd', default=False, help='Load external DTDs used by input XML') options.add_option('-k', '--keep-entities', action='store_true', dest='keep_entities', default=False, help='Keep entity reference unexpanded') options.add_option('-p', '--param', action='append', dest='userparams', default=[], nargs=2, metavar='NAME VALUE', help='Define the ITS parameter NAME to the value VALUE (can specify multiple times)') options.add_option('-t', '--test', dest='test', default=None, metavar='CATEGORY', help='Generate conformance test output for CATEGORY') options.add_option('-v', '--version', action='store_true', dest='version', default=False, help='Print itstool version and exit') (opts, args) = options.parse_args(sys.argv) if opts.version: print('itstool %s' % VERSION) sys.exit(0) userparams = {} for name, value in opts.userparams: userparams[name] = value if opts.merge is None and opts.join is None: messages = MessageList() for filename in args[1:]: doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities) doc.apply_its_rules(not(opts.nobuiltins), userparams=userparams) if opts.itsfile is not None: for itsfile in opts.itsfile: doc.apply_its_file(itsfile, userparams=userparams) if opts.test is None: doc.generate_messages() if opts.output is None or opts.output == '-': out = sys.stdout else: try: out = io.open(opts.output, 'wt', encoding='utf-8') except: sys.stderr.write('Error: Cannot write to file %s\n' % opts.output) sys.exit(1) if opts.test is not None: doc.output_test_data(opts.test, out) else: messages.output(out) out.flush() elif opts.merge is not None: try: translations = gettext.GNUTranslations(open(opts.merge, 'rb')) except: sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge) sys.exit(1) if PY3: translations.ugettext = translations.gettext translations.add_fallback(NoneTranslations()) if opts.lang is None: opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0]) if opts.output is None: out = './' elif os.path.isdir(opts.output): out = opts.output elif len(args) == 2: if opts.output == '-': out = sys.stdout else: out = open(opts.output, 'wb') else: sys.stderr.write('Error: Non-directory output for multiple files\n') sys.exit(1) for filename in args[1:]: messages = MessageList() doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities) doc.apply_its_rules(not(opts.nobuiltins), userparams=userparams) if opts.itsfile is not None: for itsfile in opts.itsfile: doc.apply_its_file(itsfile, userparams=userparams) try: doc.merge_translations(translations, opts.lang, strict=opts.strict) except Exception as e: raise sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e)) sys.exit(1) serialized = doc._doc.serialize('utf-8') if PY3: # For some reason, under py3, our serialized data is returns as a str. # Let's encode it to bytes serialized = serialized.encode('utf-8') fout = out fout_is_str = isinstance(fout, string_types) if fout_is_str: fout = open(os.path.join(fout, os.path.basename(filename)), 'wb') fout.write(serialized) fout.flush() if fout_is_str: fout.close() elif opts.join is not None: translations = {} for filename in args[1:]: try: thistr = gettext.GNUTranslations(open(filename, 'rb')) except: sys.stderr.write('Error: cannot open mo file %s\n' % filename) sys.exit(1) thistr.add_fallback(NoneTranslations()) if PY3: thistr.ugettext = thistr.gettext lang = convert_locale(os.path.splitext(os.path.basename(filename))[0]) translations[lang] = thistr if opts.output is None: out = sys.stdout elif os.path.isdir(opts.output): out = open(os.path.join(opts.output, os.path.basename(filename)), 'wb') else: out = open(opts.output, 'wb') messages = MessageList() doc = Document(opts.join, messages) doc.apply_its_rules(not(opts.nobuiltins), userparams=userparams) if opts.itsfile is not None: for itsfile in opts.itsfile: doc.apply_its_file(itsfile, userparams=userparams) doc.join_translations(translations, strict=opts.strict) serialized = doc._doc.serialize('utf-8') if PY3: # For some reason, under py3, our serialized data is returns as a str. # Let's encode it to bytes serialized = serialized.encode('utf-8') out.write(serialized) out.flush() #if __name__ == '__main__': # if os.getenv('ITSTOOL_PROFILE') is not None: # import cProfile # cProfile.run('main()') # else: # main()