#!/usr/bin/python -s # # Copyright (c) 2010-2011 Shaun McCance # # ITS Tool program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # ITS Tool is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with ITS Tool; if not, write to the Free Software Foundation, 59 Temple # Place, Suite 330, Boston, MA 0211-1307 USA. # VERSION="@VERSION@" DATADIR="@DATADIR@" import gettext import hashlib import libxml2 import optparse import os import os.path import re import sys import time NS_ITS = 'http://www.w3.org/2005/11/its' NS_ITST = 'http://itstool.org/extensions/' NS_BLANK = 'http://itstool.org/extensions/blank/' NS_XLINK = 'http://www.w3.org/1999/xlink' class NoneTranslations: def gettext(self, message): return None def lgettext(self, message): return None def ngettext(self, msgid1, msgid2, n): return None def lngettext(self, msgid1, msgid2, n): return None def ugettext(self, message): return None def ungettext(self, msgid1, msgid2, n): return None class MessageList (object): def __init__ (self): self._messages = [] self._by_node = {} self._has_credits = False def add_message (self, message, node): self._messages.append (message) if node is not None: self._by_node[node] = message def add_credits(self): if self._has_credits: return msg = Message() msg.set_context('_') msg.add_text('translator-credits') msg.add_comment(Comment('Put one translator per line, in the form NAME , YEAR1, YEAR2')) self._messages.append(msg) self._has_credits = True def get_message_by_node (self, node): return self._by_node.get(node, None) def get_nodes_with_messages (self): return self._by_node.keys() def output (self, out): msgs = [] msgdict = {} for msg in self._messages: key = (msg.get_context(), msg.get_string()) if msgdict.has_key(key): for source in msg.get_sources(): msgdict[key].add_source(source) for marker in msg.get_markers(): msgdict[key].add_marker(marker) for comment in msg.get_comments(): msgdict[key].add_comment(comment) if msg.get_preserve_space(): msgdict[key].set_preserve_space() else: msgs.append(msg) msgdict[key] = msg out.write('msgid ""\n') out.write('msgstr ""\n') out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n') out.write('"POT-Creation-Date: %s\\n"\n' % time.strftime("%Y-%m-%d %H:%M%z")) out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n') out.write('"Last-Translator: FULL NAME \\n"\n') out.write('"Language-Team: LANGUAGE \\n"\n') out.write('"MIME-Version: 1.0\\n"\n') out.write('"Content-Type: text/plain; charset=UTF-8\\n"\n') out.write('"Content-Transfer-Encoding: 8bit\\n"\n') out.write('\n') for msg in msgs: out.write(msg.format().encode('utf-8')) out.write('\n') class Comment (object): def __init__ (self, text): self._text = text assert(text is not None) self._markers = [] def add_marker (self, marker): self._markers.append(marker) def get_markers (self): return self._markers def get_text (self): return self._text def format (self): ret = u'' markers = {} for marker in self._markers: if not markers.has_key(marker): ret += '#. (itstool) comment: ' + marker + '\n' markers[marker] = marker if '\n' in self._text: doadd = False for line in self._text.split('\n'): if line != '': doadd = True if not doadd: continue ret += u'#. %s\n' % line else: text = self._text while len(text) > 72: j = text.rfind(' ', 0, 72) if j == -1: j = text.find(' ') if j == -1: break ret += u'#. %s\n' % text[:j] text = text[j+1:] ret += '#. %s\n' % text return ret class Message (object): def __init__ (self): self._message = [] self._empty = True self._ctxt = None self._placeholders = [] self._sources = [] self._markers = [] self._comments = [] self._preserve = False def __repr__(self): if self._empty: return "Empty message" return self.get_string() class Placeholder (object): def __init__ (self, node): self.node = node self.name = unicode(node.name, 'utf-8') def escape (self, text): return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t") def add_text (self, text): if len(self._message) == 0 or not(isinstance(self._message[-1], basestring)): self._message.append('') if not isinstance(text, unicode): text = unicode(text, 'utf-8') self._message[-1] += text.replace('&', '&').replace('<', '<').replace('>', '>') if re.sub('\s+', ' ', text).strip() != '': self._empty = False def add_placeholder (self, node): holder = Message.Placeholder(node) self._placeholders.append(holder) self._message.append(holder) def get_placeholder (self, name): placeholder = 1 for holder in self._placeholders: holdername = u'%s-%i' % (holder.name, placeholder) if holdername == unicode(name, 'utf-8'): return holder placeholder += 1 def add_start_tag (self, node): if len(self._message) == 0 or not(isinstance(self._message[-1], basestring)): self._message.append('') self._message[-1] += ('<%s' % node.name) if node.properties is not None: for prop in node.properties: if prop.type == 'attribute': name = prop.name if prop.ns() is not None: name = prop.ns().name + ':' + name atval = prop.content if not isinstance(atval, unicode): atval = unicode(atval, 'utf-8') atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') self._message += " %s=\"%s\"" % (name, atval) if node.children is not None: self._message[-1] += '>' else: self._message[-1] += '/>' def add_end_tag (self, node): if node.children is not None: if len(self._message) == 0 or not(isinstance(self._message[-1], basestring)): self._message.append('') self._message[-1] += (u'' % unicode(node.name, 'utf-8')) def is_empty (self): return self._empty def get_context (self): return self._ctxt def set_context (self, ctxt): self._ctxt = ctxt def add_source (self, source): if not isinstance(source, unicode): source = unicode(source, 'utf-8') self._sources.append(source) def get_sources (self): return self._sources def add_marker (self, marker): if not isinstance(marker, unicode): marker = unicode(marker, 'utf-8') self._markers.append(marker) def get_markers (self): return self._markers def add_comment (self, comment): if comment is not None: self._comments.append(comment) def get_comments (self): return self._comments def get_string (self): message = u'' placeholder = 1 for msg in self._message: if isinstance(msg, basestring): message += msg elif isinstance(msg, Message.Placeholder): message += u'<_:%s-%i/>' % (msg.name, placeholder) placeholder += 1 if not self._preserve: message = re.sub('\s+', ' ', message).strip() return message def get_preserve_space (self): return self._preserve def set_preserve_space (self, preserve=True): self._preserve = preserve def format (self): ret = u'' markers = {} for marker in self._markers: if not markers.has_key(marker): ret += '#. (itstool) path: ' + marker + '\n' markers[marker] = marker comments = [] commentsdict = {} for comment in self._comments: key = comment.get_text() if commentsdict.has_key(key): for marker in comment.get_markers(): commentsdict[key].add_marker(marker) else: comments.append(comment) commentsdict[key] = comment for i in range(len(comments)): if i != 0: ret += '#.\n' ret += comments[i].format() for source in self._sources: ret += u'#: %s\n' % source if self._preserve: ret += u'#, no-wrap\n' if self._ctxt is not None: ret += u'msgctxt "%s"\n' % self._ctxt message = self.get_string() if self._preserve: ret += u'msgid ""\n' lines = message.split('\n') for line, no in zip(lines, range(len(lines))): if no == len(lines) - 1: ret += u'"%s"\n' % self.escape(line) else: ret += u'"%s\\n"\n' % self.escape(line) else: ret += u'msgid "%s"\n' % self.escape(message) ret += u'msgstr ""\n' return ret def xml_child_iter (node): child = node.children while child is not None: yield child child = child.next def xml_attr_iter (node): attr = node.get_properties() while attr is not None: yield attr attr = attr.next def xml_is_ns_name (node, ns, name): if node.type != 'element': return False return node.name == name and node.ns() is not None and node.ns().content == ns def xml_error_catcher(doc, error): doc._xml_err += " %s" % error def fix_node_ns (node, nsdefs): childnsdefs = nsdefs.copy() nsdef = node.nsDefs() while nsdef is not None: nextnsdef = nsdef.next if nsdefs.has_key(nsdef.name) and nsdefs[nsdef.name] == nsdef.content: node.removeNsDef(nsdef.content) else: childnsdefs[nsdef.name] = nsdef.content nsdef = nextnsdef for child in xml_child_iter(node): if child.type == 'element': fix_node_ns(child, childnsdefs) class Document (object): def __init__ (self, filename, messages): self._xml_err = '' libxml2.registerErrorHandler(xml_error_catcher, self) try: ctxt = libxml2.createFileParserCtxt(filename) except: sys.stderr.write('Error: cannot open XML file %s\n' % filename) sys.exit(1) ctxt.lineNumbers(1) ctxt.replaceEntities(1) ctxt.parseDocument() self._filename = filename self._doc = ctxt.doc() self._localrules = [] def pre_process (node): for child in xml_child_iter(node): if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'): if child.nsProp('parse', None) == 'text': child.xincludeProcessTree() elif xml_is_ns_name(child, NS_ITS, 'rules'): if child.hasNsProp('href', NS_XLINK): href = child.nsProp('href', NS_XLINK) href = os.path.join(os.path.dirname(filename), href) hctxt = libxml2.createFileParserCtxt(href) hctxt.replaceEntities(1) hctxt.parseDocument() root = hctxt.doc().getRootElement() version = None if root.hasNsProp('version', None): version = root.nsProp('version', None) else: sys.stderr.write('Warning: ITS file %s missing version attribute\n' % os.path.basename(href)) if version is not None and version != '1.0': sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' % (os.path.basename(href), root.nsProp('version', None))) else: self._localrules.append(root) version = None if child.hasNsProp('version', None): version = child.nsProp('version', None) else: root = child.doc.getRootElement() if root.hasNsProp('version', NS_ITS): version = root.nsProp('version', NS_ITS) else: sys.stderr.write('Warning: Local ITS rules missing version attribute\n') if version is not None and version != '1.0': sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' % version) else: self._localrules.append(child) pre_process(child) pre_process(self._doc) try: self._check_errors() except libxml2.parserError as e: sys.stderr.write('Error: Could not parse document:\n%s\n' % str(e)) sys.exit(1) self._msgs = messages self._its_translate_nodes = {} self._its_within_text_nodes = {} self._its_loc_notes = {} self._itst_preserve_space_nodes = {} self._itst_drop_nodes = {} self._itst_contexts = {} self._its_lang = {} self._itst_lang_attr = {} self._itst_credits = None self._itst_externals = [] def _check_errors(self): if self._xml_err: raise libxml2.parserError(self._xml_err) def apply_its_rule(self, rule, xpath): if rule.type != 'element': return if xml_is_ns_name(rule, NS_ITS, 'translateRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._its_translate_nodes[node] = rule.nsProp('translate', None) elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._its_within_text_nodes[node] = rule.nsProp('withinText', None) elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._itst_preserve_space_nodes[node] = rule.nsProp('preserveSpace', None) elif xml_is_ns_name(rule, NS_ITST, 'dropRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): self._itst_drop_nodes[node] = rule.nsProp('drop', None) elif xml_is_ns_name(rule, NS_ITST, 'contextRule'): if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): if rule.hasNsProp('context', None): self._itst_contexts[node] = rule.nsProp('context', None) elif rule.hasNsProp('contextPointer', None): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None)) if isinstance(ctxt, basestring): self._itst_contexts[node] = ctxt else: for ctxt in ctxt: self._itst_contexts[node] = ctxt.content break xpath.setContextNode(oldnode) elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'): locnote = None for child in xml_child_iter(rule): if xml_is_ns_name(child, NS_ITS, 'locNote'): locnote = re.sub('\s+', ' ', child.content).strip() break if locnote is None: if rule.hasNsProp('locNoteRef', None): locnote = '(itstool) link: ' + re.sub('\s+', ' ', rule.nsProp('locNoteRef', None)).strip() if rule.nsProp('selector', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): if locnote is not None: self._its_loc_notes.setdefault(node, []).append(locnote) else: if rule.hasNsProp('locNotePointer', None): sel = rule.nsProp('locNotePointer', None) ref = False elif rule.hasNsProp('locNoteRefPointer', None): sel = rule.nsProp('locNoteRefPointer', None) ref = True else: continue try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) note = self._try_xpath_eval(xpath, sel) if isinstance(note, basestring): self._its_loc_notes.setdefault(node, []).append(note) else: for note in note: if self.get_preserve_space(note): cont = note.content else: cont = re.sub('\s+', ' ', note.content).strip() if ref: cont = '(itstool) link: ' + cont self._its_loc_notes.setdefault(node, []).append(cont) break xpath.setContextNode(oldnode) elif xml_is_ns_name(rule, NS_ITS, 'langRule'): if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None)) if len(res) > 0: self._its_lang[node] = res[0].content # We need to construct language attributes, not just read # language information. Technically, langPointer could be # any XPath expression. But if it looks like an attribute # accessor, just use the attribute name. if rule.nsProp('langPointer', None)[0] == '@': self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:] xpath.setContextNode(oldnode) elif xml_is_ns_name(rule, NS_ITST, 'credits'): if rule.nsProp('appendTo', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)): self._itst_credits = (node, rule) break elif xml_is_ns_name(rule, NS_ITST, 'externalRefRule'): if rule.nsProp('selector', None) is not None and rule.nsProp('refPointer', None) is not None: for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): try: oldnode = xpath.contextNode() except: oldnode = None xpath.setContextNode(node) res = self._try_xpath_eval(xpath, rule.nsProp('refPointer', None)) if len(res) > 0: self._itst_externals.append((node, res[0].content)) xpath.setContextNode(oldnode) def apply_its_rules (self): dirs = [] ddir = os.getenv('XDG_DATA_HOME', '') if ddir == '': ddir = os.path.join(os.path.expanduser('~'), '.local', 'share') dirs.append(ddir) ddir = os.getenv('XDG_DATA_DIRS', '') if ddir == '': if DATADIR not in ('/usr/local/share', '/usr/share'): ddir += DATADIR + ':' ddir += '/usr/local/share:/usr/share' dirs.extend(ddir.split(':')) ddone = {} for ddir in dirs: itsdir = os.path.join(ddir, 'itstool', 'its') if not os.path.exists(itsdir): continue for dfile in os.listdir(itsdir): if dfile.endswith('.its'): if not ddone.get(dfile, False): self.apply_its_file(os.path.join(itsdir, dfile)) ddone[dfile] = True self.apply_local_its_rules() def apply_its_file (self, filename): doc = libxml2.parseFile(filename) root = doc.getRootElement() if not xml_is_ns_name(root, NS_ITS, 'rules'): return version = None if root.hasNsProp('version', None): version = root.nsProp('version', None) else: sys.stderr.write('Warning: ITS file %s missing version attribute\n' % os.path.basename(filename)) if version is not None and version != '1.0': sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' % (os.path.basename(filename), root.nsProp('version', None))) return matched = True for match in xml_child_iter(root): if xml_is_ns_name(match, NS_ITST, 'match'): matched = False xpath = self._doc.xpathNewContext() par = match nss = {} while par is not None: nsdef = par.nsDefs() while nsdef is not None: if nsdef.name is not None: if not nss.has_key(nsdef.name): nss[nsdef.name] = nsdef.content xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next par = par.parent if match.hasNsProp('selector', None): if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0: matched = True break if matched == False: return for rule in xml_child_iter(root): xpath = self._doc.xpathNewContext() par = match nss = {} while par is not None: nsdef = par.nsDefs() while nsdef is not None: if nsdef.name is not None: if not nss.has_key(nsdef.name): nss[nsdef.name] = nsdef.content xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next par = par.parent self.apply_its_rule(rule, xpath) def apply_local_its_rules (self): for rules in self._localrules: def reg_ns(xpath, node): if node.parent is not None: reg_ns(xpath, node.parent) nsdef = node.nsDefs() while nsdef is not None: if nsdef.name is not None: xpath.xpathRegisterNs(nsdef.name, nsdef.content) nsdef = nsdef.next xpath = self._doc.xpathNewContext() reg_ns(xpath, rules) for rule in xml_child_iter(rules): if rule.type != 'element': continue if rule.nsDefs() is not None: rule_xpath = self._doc.xpathNewContent() reg_ns(rule_xpath, rule) else: rule_xpath = xpath self.apply_its_rule(rule, rule_xpath) def _append_credits(self, parent, node, trdata): if xml_is_ns_name(node, NS_ITST, 'for-each'): select = node.nsProp('select', None) if select == 'years': for year in trdata[2].split(','): for child in xml_child_iter(node): self._append_credits(parent, child, trdata + (year.strip(),)) elif xml_is_ns_name(node, NS_ITST, 'value-of'): select = node.nsProp('select', None) val = None if select == 'name': val = trdata[0] elif select == 'email': val = trdata[1] elif select == 'years': val = trdata[2] elif select == 'year' and len(trdata) == 4: val = trdata[3] if val is not None: val = val.encode('utf-8') parent.addContent(val) else: newnode = node.copyNode(2) parent.addChild(newnode) for child in xml_child_iter(node): self._append_credits(newnode, child, trdata) def merge_credits(self, translations, language, node): if self._itst_credits is None: return # Dear Python, please implement pgettext. # http://bugs.python.org/issue2504 # Sincerely, Shaun trans = translations.ugettext('_\x04translator-credits') if trans is None or trans == 'translator-credits': return regex = re.compile('(.*) \<(.*)\>, (.*)') for credit in trans.split('\n'): match = regex.match(credit) if not match: continue trdata = match.groups() for node in xml_child_iter(self._itst_credits[1]): self._append_credits(self._itst_credits[0], node, trdata) def join_translations(self, translations, node=None, strict=False): is_root = False if node is None: is_root = True self.generate_messages(comments=False) node = self._doc.getRootElement() if node is None or node.type != 'element': return if ((node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes') or self._itst_drop_nodes.get(node, 'no') == 'yes'): prev = node.prev node.unlinkNode() node.freeNode() if prev.isBlankNode(): prev.unlinkNode() prev.freeNode() return msg = self._msgs.get_message_by_node(node) if msg is None: self.translate_attrs(node, node) children = [child for child in xml_child_iter(node)] for child in children: self.join_translations(translations, node=child, strict=strict) else: prevnode = None if node.prev is not None and node.prev.type == 'text': prevtext = node.prev.content if re.sub('\s+', '', prevtext) == '': prevnode = node.prev for lang in sorted(translations.keys(), reverse=True): newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang) if newnode != node: newnode.setProp('xml:lang', lang) node.addNextSibling(newnode) if prevnode is not None: node.addNextSibling(prevnode.copyNode(0)) if is_root: # Because of the way we create nodes and rewrite the document, # we end up with lots of redundant namespace definitions. We # kill them off in one fell swoop at the end. fix_node_ns(node, {}) self._check_errors() def merge_translations(self, translations, language, node=None, strict=False): is_root = False if node is None: is_root = True self.generate_messages(comments=False) node = self._doc.getRootElement() if node is None or node.type != 'element': return if ((node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes') or self._itst_drop_nodes.get(node, 'no') == 'yes'): prev = node.prev node.unlinkNode() node.freeNode() if prev.isBlankNode(): prev.unlinkNode() prev.freeNode() return if is_root: self.merge_credits(translations, language, node) msg = self._msgs.get_message_by_node(node) if msg is None: self.translate_attrs(node, node) children = [child for child in xml_child_iter(node)] for child in children: self.merge_translations(translations, language, node=child, strict=strict) else: newnode = self.get_translated(node, translations, strict=strict, lang=language) if newnode != node: self.translate_attrs(node, newnode) node.replaceNode(newnode) if is_root: # Apply language attributes to untranslated nodes. We don't do # this before processing, because then these attributes would # be copied into the new nodes. We apply the attribute without # checking whether it was translated, because any that were will # just be floating around, unattached to a document. for lcnode in self._msgs.get_nodes_with_messages(): attr = self._itst_lang_attr.get(lcnode) if attr is None: continue origlang = None lcpar = lcnode while lcpar is not None: origlang = self._its_lang.get(lcpar) if origlang is not None: break lcpar = lcpar.parent if origlang is not None: lcnode.setProp(attr, origlang) # And then set the language attribute on the root node. if language is not None: attr = self._itst_lang_attr.get(node) if attr is not None: node.setProp(attr, language) # Because of the way we create nodes and rewrite the document, # we end up with lots of redundant namespace definitions. We # kill them off in one fell swoop at the end. fix_node_ns(node, {}) self._check_errors() def translate_attrs(self, oldnode, newnode): trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes'] for attr in trans_attrs: newcontent = translations.ugettext(attr.get_content()) if newcontent: newnode.setProp(attr.name, translations.ugettext(attr.get_content())) def get_translated (self, node, translations, strict=False, lang=None): msg = self._msgs.get_message_by_node(node) if msg is None: return node msgstr = msg.get_string() # Dear Python, please implement pgettext. # http://bugs.python.org/issue2504 # Sincerely, Shaun if msg.get_context() is not None: msgstr = msg.get_context() + '\x04' + msgstr trans = translations.ugettext(msgstr) if trans is None: return node nss = {} def reg_ns(node, nss): if node.parent is not None: reg_ns(node.parent, nss) nsdef = node.nsDefs() while nsdef is not None: nss[nsdef.name] = nsdef.content nsdef = nsdef.next reg_ns(node, nss) nss['_'] = NS_BLANK blurb = '<' + node.name for nsname in nss.keys(): if nsname is None: blurb += ' xmlns="%s"' % nss[nsname] else: blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname]) blurb += '>%s' % (trans.encode('utf-8'), node.name) ctxt = libxml2.createDocParserCtxt(blurb) ctxt.replaceEntities(0) ctxt.parseDocument() trnode = ctxt.doc().getRootElement() try: self._check_errors() except libxml2.parserError as e: if strict: raise else: sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % ( (lang + ' ') if lang is not None else '', msgstr.encode('utf-8'))) self._xml_err = '' return node def scan_node(node): children = [child for child in xml_child_iter(node)] for child in children: if child.type != 'element': continue if child.ns() is not None and child.ns().content == NS_BLANK: ph_node = msg.get_placeholder(child.name).node if self.has_child_elements(ph_node): self.merge_translations(translations, None, ph_node, strict=strict) child.replaceNode(ph_node) else: repl = self.get_translated(ph_node, translations, strict=strict, lang=lang) child.replaceNode(repl) scan_node(child) scan_node(trnode) retnode = node.copyNode(2) for child in xml_child_iter(trnode): retnode.addChild(child.copyNode(1)) return retnode def generate_messages(self, comments=True): if self._itst_credits is not None: self._msgs.add_credits() for ext in self._itst_externals: translate = None node = ext[0] while node != None: translate = self.get_its_translate(node) if translate is not None: break node = node.parent if translate == 'no': continue msg = Message() try: fullfile = os.path.join(os.path.dirname(self._filename), ext[1]) filefp = open(fullfile) filemd5 = hashlib.md5(filefp.read()).hexdigest() filefp.close() except: filemd5 = '__failed__' txt = "external ref='%s' md5='%s'" % (ext[1], filemd5) msg.set_context('_') msg.add_text(txt) msg.add_source('%s:%i' % (self._doc.name, ext[0].lineNo())) msg.add_marker(ext[0].name) msg.add_comment(Comment('This is a reference to an external file such as an image or' ' video. When the file changes, the md5 hash will change to' ' let you know you need to update your localized copy. The' ' msgstr is not used at all. Set it to whatever you like' ' once you have updated your copy of the file.')) self._msgs.add_message(msg, None) self._in_translatable = True for child in xml_child_iter(self._doc): if child.type == 'element': self.generate_message(child, None, comments=comments) break def generate_message (self, node, msg, comments=True, path=None): if node.type in ('text', 'cdata') and msg is not None: msg.add_text(node.content) return if node.type != 'element': return if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes': return if self._itst_drop_nodes.get(node, 'no') == 'yes': return if path is None: path = '' translate = self.get_its_translate(node) if translate is None: if self._in_translatable: translate = 'yes' else: translate = 'no' withinText = False if translate == 'no': if msg is not None: msg.add_placeholder(node) is_unit = False msg = None else: is_unit = msg is None or self.is_translation_unit(node) if is_unit: if msg is not None: msg.add_placeholder(node) msg = Message() ctxt = None if node.hasNsProp('context', NS_ITST): ctxt = node.nsProp('context', NS_ITST) if ctxt is None: ctxt = self._itst_contexts.get(node) if ctxt is not None: msg.set_context(ctxt) if self.get_preserve_space(node): msg.set_preserve_space() msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) msg.add_marker('%s/%s' % (node.parent.name, node.name)) else: withinText = True msg.add_start_tag(node) if not withinText: # Add msg for translatable node attributes for attr in xml_attr_iter(node): if self._its_translate_nodes.get(attr, 'no') == 'yes': attr_msg = Message() attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name)) attr_msg.add_text(attr.content) if comments: for locnote in self.get_its_loc_notes(attr): comment = Comment(locnote) comment.add_marker ('%s/%s@%s' % ( node.parent.name, node.name, attr.name)) attr_msg.add_comment(comment) self._msgs.add_message(attr_msg, attr) if comments and msg is not None: cnode = node while cnode is not None: hasnote = False for locnote in self.get_its_loc_notes(cnode): comment = Comment(locnote) if withinText: comment.add_marker('.%s/%s' % (path, cnode.name)) msg.add_comment(comment) hasnote = True if hasnote or not is_unit: break cnode = cnode.parent in_translatable = self._in_translatable self._in_translatable = (translate == 'yes') if withinText: path = path + '/' + node.name for child in xml_child_iter(node): self.generate_message(child, msg, comments=comments, path=path) self._in_translatable = in_translatable if translate: if is_unit and not msg.is_empty(): self._msgs.add_message(msg, node) elif msg is not None: msg.add_end_tag(node) def is_translation_unit (self, node): return self.get_its_within_text(node) != 'yes' def has_child_elements(self, node): return len([child for child in xml_child_iter(node) if child.type=='element']) def get_preserve_space (self, node): if node.getSpacePreserve() == 1: return True else: while node.type == 'element': if self._itst_preserve_space_nodes.has_key(node): return (self._itst_preserve_space_nodes[node] == 'yes') node = node.parent return False def get_its_translate (self, node): if node.hasNsProp('translate', NS_ITS): return node.nsProp('translate', NS_ITS) if xml_is_ns_name(node, NS_ITS, 'span'): if node.hasNsProp('translate', None): return node.nsProp('translate', None) if self._its_translate_nodes.has_key(node): return self._its_translate_nodes[node] return None def get_its_within_text (self, node): return self._its_within_text_nodes.get(node, 'no') def get_its_loc_notes (self, node): ret = [] if node.hasNsProp('locNote', NS_ITS): ret.append(re.sub('\s+', ' ', node.nsProp('locNote', NS_ITS)).strip()) if node.hasNsProp('locNoteRef', NS_ITS): ret.append('(itstool) link: ' + re.sub('\s+', ' ', node.nsProp('locNoteRef', NS_ITS)).strip()) if xml_is_ns_name(node, NS_ITS, 'span'): if node.hasNsProp('locNote', None): ret.append(re.sub('\s+', ' ', node.nsProp('locNote', None)).strip()) if node.hasNsProp('locNoteRef', None): ret.append('(itstool) link: ' + re.sub('\s+', ' ', node.nsProp('locNoteRef', None)).strip()) for locnote in self._its_loc_notes.get(node, []): ret.append(locnote) return ret @staticmethod def _try_xpath_eval (xpath, expr): try: return xpath.xpathEval(expr) except: sys.stderr.write('Warning: Invalid XPath: %s\n' % expr) return [] _locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?') def convert_locale (locale): # Automatically convert POSIX-style locales to BCP47 match = _locale_pattern.match(locale) if match is None: return locale ret = match.group(1).lower() variant = match.group(3) if variant == '@cyrillic': ret += '-Cyrl' variant = None if variant == '@devanagari': ret += '-Deva' variant = None elif variant == '@latin': ret += '-Latn' variant = None elif variant == '@shaw': ret += '-Shaw' variant = None if match.group(2) is not None: ret += '-' + match.group(2)[1:].upper() if variant is not None and variant != '@euro': ret += '-' + variant[1:].lower() return ret if __name__ == '__main__': options = optparse.OptionParser() options.set_usage('\n itstool [OPTIONS] [XMLFILES]\n itstool -m [OPTIONS] [XMLFILES]') options.add_option('-i', '--its', action='append', dest='itsfile', metavar='ITS', help='load the ITS rules in the file ITS (can specify multiple times)') options.add_option('-l', '--lang', dest='lang', default=None, metavar='LANGUAGE', help='explicitly set the language code for output file') options.add_option('-j', '--join', dest='join', metavar='FILE', help='join multiple MO files with the XML file FILE and output XML file') options.add_option('-m', '--merge', dest='merge', metavar='FILE', help='merge from a PO or MO file FILE and output XML files') options.add_option('-o', '--output', dest='output', default=None, metavar='OUT', help='output PO files to file OUT or XML files in directory OUT') options.add_option('-s', '--strict', action='store_true', dest='strict', default=False, help='Exit with error when PO files contain broken XML') options.add_option('-v', '--version', action='store_true', dest='version', default=False, help='print itstool version and exit') (opts, args) = options.parse_args(sys.argv) if opts.version: print('itstool %s' % VERSION) sys.exit(0) if opts.merge is None and opts.join is None: messages = MessageList() for filename in args[1:]: doc = Document(filename, messages) doc.apply_its_rules() if opts.itsfile is not None: for itsfile in opts.itsfile: doc.apply_its_file(itsfile) doc.generate_messages() if opts.output is None or opts.output == '-': out = sys.stdout else: try: out = file(opts.output, 'w') except: sys.stderr.write('Error: Cannot write to file %s\n' % opts.output) sys.exit(1) messages.output(out) elif opts.merge is not None: try: translations = gettext.GNUTranslations(open(opts.merge, 'rb')) except: sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge) sys.exit(1) translations.add_fallback(NoneTranslations()) if opts.lang is None: opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0]) if opts.output is None: out = './' elif os.path.isdir(opts.output): out = opts.output elif len(args) == 2: if opts.output == '-': out = sys.stdout else: out = file(opts.output, 'w') else: sys.stderr.write('Error: Non-directory output for multiple files\n') sys.exit(1) for filename in args[1:]: messages = MessageList() doc = Document(filename, messages) doc.apply_its_rules() if opts.itsfile is not None: for itsfile in opts.itsfile: doc.apply_its_file(itsfile) try: doc.merge_translations(translations, opts.lang, strict=opts.strict) except Exception as e: sys.stderr.write('Error: Could not merge translations:\n%s\n' % str(e)) sys.exit(1) fout = out if isinstance(fout, basestring): fout = file(os.path.join(fout, os.path.basename(filename)), 'w') fout.write(doc._doc.serialize('utf-8')) elif opts.join is not None: translations = {} for filename in args[1:]: try: thistr = gettext.GNUTranslations(open(filename, 'rb')) except: sys.stderr.write('Error: cannot open mo file %s\n' % filename) sys.exit(1) thistr.add_fallback(NoneTranslations()) lang = convert_locale(os.path.splitext(os.path.basename(filename))[0]) translations[lang] = thistr if opts.output is None: out = sys.stdout elif os.path.isdir(opts.output): out = file(os.path.join(opts.output, os.path.basename(filename)), 'w') else: out = file(opts.output, 'w') messages = MessageList() doc = Document(opts.join, messages) doc.apply_its_rules() doc.join_translations(translations, strict=opts.strict) out.write(doc._doc.serialize('utf-8')) if False: if opts.itsfile is not None: for itsfile in opts.itsfile: doc.apply_its_file(itsfile) try: doc.merge_translations(translations, opts.lang, strict=opts.strict) except Exception as e: sys.stderr.write('Error: Could not merge translations:\n%s\n' % str(e)) sys.exit(1) fout = out if isinstance(fout, basestring): fout = file(os.path.join(fout, os.path.basename(filename)), 'w') fout.write(doc._doc.serialize('utf-8'))