diff options
author | Danilo Šegan <danilo@src.gnome.org> | 2006-12-26 19:48:14 +0000 |
---|---|---|
committer | Danilo Šegan <danilo@src.gnome.org> | 2006-12-26 19:48:14 +0000 |
commit | 3e42bf2151f46d22306902c149e038196c658fc2 (patch) | |
tree | fd171f17074d3403f785c4b0a1e04f1097423723 /xml2po | |
parent | e13963d4b77e3508055a4d1d61207ada5ea879e6 (diff) | |
download | gnome-doc-utils-3e42bf2151f46d22306902c149e038196c658fc2.tar.gz |
Added XHTML mode and attribute extraction support.
Diffstat (limited to 'xml2po')
-rw-r--r-- | xml2po/ChangeLog | 20 | ||||
-rw-r--r-- | xml2po/TODO | 6 | ||||
-rw-r--r-- | xml2po/modes/Makefile.am | 2 | ||||
-rw-r--r-- | xml2po/modes/docbook.py | 8 | ||||
-rw-r--r-- | xml2po/modes/empty.py | 6 | ||||
-rw-r--r-- | xml2po/modes/gs.py | 6 | ||||
-rw-r--r-- | xml2po/modes/xhtml.py | 64 | ||||
-rw-r--r-- | xml2po/tests/ChangeLog | 5 | ||||
-rwxr-xr-x | xml2po/tests/test.py | 1 | ||||
-rw-r--r-- | xml2po/tests/xhtml.po | 32 | ||||
-rw-r--r-- | xml2po/tests/xhtml.pot | 32 | ||||
-rw-r--r-- | xml2po/tests/xhtml.xml | 14 | ||||
-rw-r--r-- | xml2po/tests/xhtml.xml.out | 14 | ||||
-rwxr-xr-x | xml2po/xml2po.py | 51 |
14 files changed, 244 insertions, 17 deletions
diff --git a/xml2po/ChangeLog b/xml2po/ChangeLog index 7c216bf..5607bd6 100644 --- a/xml2po/ChangeLog +++ b/xml2po/ChangeLog @@ -1,5 +1,25 @@ 2006-12-26 Danilo Šegan <danilo@gnome.org> + Fix bug #343749. + Support for XHTML mode, partially by Claude Paroz <paroz@email.ch>. + + * TODO: Updated. + + * xml2po.py (processAttribute): Added. + (read_treatedattributes): Added. + (processElementTag): Use treated attributes. + + * modes/Makefile.am (common_DATA): Added xhtml.py. + + * modes/gs.py (gsXmlMode.getTreatedAttributes): + * modes/empty.py (emptyXmlMode.getTreatedAttributes): + * modes/docbook.py (docbookXmlMode.getTreatedAttributes): + Added new method to every other mode. + + * modes/xhtml.py: Added Claude's XHTML mode. + +2006-12-26 Danilo Šegan <danilo@gnome.org> + Fix bug #378073. * xml2po.py (autoNodeIsFinal): Really ignore ignored tags. diff --git a/xml2po/TODO b/xml2po/TODO index d27bbf9..5cfe928 100644 --- a/xml2po/TODO +++ b/xml2po/TODO @@ -26,9 +26,6 @@ TODO: o pipe the constructed .po file thru 'msgcat'; msgcat is meant as a .po file normalization tool. [Karl] - o add support for translating attributes (eg. imagine "title" [??] - attribute in IMG tag in XHTML) -- this should be easy enough - o support several different doctypes in the same run [??] eg. a mathml, docbook, documents all translated with one run @@ -39,6 +36,9 @@ TODO: DONE: + o [2006-12-26] add support for translating attributes (eg. imagine "title" + attribute in IMG tag in XHTML) -- this should be easy enough + o [BUG FIXED with at least libxml2 2.6.21] Merging with "-k" (keep-entities) option doesn't work correctly diff --git a/xml2po/modes/Makefile.am b/xml2po/modes/Makefile.am index 146c1b4..55eff17 100644 --- a/xml2po/modes/Makefile.am +++ b/xml2po/modes/Makefile.am @@ -1,4 +1,4 @@ commondir = $(datadir)/xml2po -common_DATA = docbook.py empty.py gs.py ubuntu.py +common_DATA = docbook.py empty.py gs.py ubuntu.py xhtml.py EXTRA_DIST = $(common_DATA) diff --git a/xml2po/modes/docbook.py b/xml2po/modes/docbook.py index 57e15f1..ceec906 100644 --- a/xml2po/modes/docbook.py +++ b/xml2po/modes/docbook.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>. +# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>. # # This file is part of xml2po. # @@ -49,7 +49,7 @@ class docbookXmlMode: 'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ] self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject', 'screenshot' ] - + def getIgnoredTags(self): "Returns array of tags to be ignored." return self.objects + self.lists @@ -73,6 +73,10 @@ class docbookXmlMode: 'userinput' ] + def getTreatedAttributes(self): + "Returns array of tag attributes which content is to be translated" + return [] + def getStringForTranslators(self): """Returns string which will be used to credit translators.""" return "translator-credits" diff --git a/xml2po/modes/empty.py b/xml2po/modes/empty.py index 9c53aa2..1c9f002 100644 --- a/xml2po/modes/empty.py +++ b/xml2po/modes/empty.py @@ -1,4 +1,4 @@ -# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>. +# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>. # # This file is part of xml2po. # @@ -34,6 +34,10 @@ class emptyXmlMode: "Returns array of tags in which spaces are to be preserved." return [] + def getTreatedAttributes(self): + "Returns array of tag attributes which content is to be translated" + return [] + def preProcessXml(self, doc, msg): "Preprocess a document and perhaps adds some messages." pass diff --git a/xml2po/modes/gs.py b/xml2po/modes/gs.py index 36aa550..9ba0307 100644 --- a/xml2po/modes/gs.py +++ b/xml2po/modes/gs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>. +# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>. # # This file is part of xml2po. # @@ -34,6 +34,10 @@ class gsXmlMode: "Returns array of tags in which spaces are to be preserved." return [] + def getTreatedAttributes(self): + "Returns array of tag attributes which content is to be translated" + return [] + def preProcessXml(self, doc, msg): "Preprocess a document and perhaps adds some messages." pass diff --git a/xml2po/modes/xhtml.py b/xml2po/modes/xhtml.py new file mode 100644 index 0000000..f4371e7 --- /dev/null +++ b/xml2po/modes/xhtml.py @@ -0,0 +1,64 @@ +# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>. +# Copyright (c) 2006 Claude Paroz <paroz@email.ch>. +# +# This file is part of xml2po. +# +# xml2po is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# xml2po is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with xml2po; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +# This implements special instructions for handling XHTML documents +# in a better way, particularly to extract some attributes in HTML tags + +class xhtmlXmlMode: + """Class for special handling of XHTML document types.""" + def getIgnoredTags(self): + "Returns array of tags to be ignored." + return [] + + def getFinalTags(self): + "Returns array of tags to be considered 'final'." + return ['p', 'li', 'pre'] + + def getTreatedAttributes(self): + "Returns array of tag attributes which content is to be translated" + return ['title','alt'] + + def getSpacePreserveTags(self): + "Returns array of tags in which spaces are to be preserved." + return ['pre'] + + def preProcessXml(self, doc, msg): + "Preprocess a document and perhaps adds some messages." + pass + + def postProcessXmlTranslation(self, doc, language, translators): + """Sets a language and translators in "doc" tree. + + "translators" is a string consisted of translator credits. + "language" is a simple string. + "doc" is a libxml2.xmlDoc instance.""" + pass + + def getStringForTranslators(self): + """Returns None or a string to be added to PO files. + + Common example is 'translator-credits'.""" + return None + + def getCommentForTranslators(self): + """Returns a comment to be added next to string for crediting translators. + + It should explain the format of the string provided by getStringForTranslators().""" + return None diff --git a/xml2po/tests/ChangeLog b/xml2po/tests/ChangeLog index 76c2045..c0d6191 100644 --- a/xml2po/tests/ChangeLog +++ b/xml2po/tests/ChangeLog @@ -1,5 +1,10 @@ 2006-12-26 Danilo Šegan <danilo@gnome.org> + * test.py: Added xhtml test. + * xhtml.xml, xhtml.pot, xhtml.po, xhtml.xml.out: Add XHTML mode test. + +2006-12-26 Danilo Šegan <danilo@gnome.org> + * reuse/reuse.xml: Add uncalled reuse test (call with "../../xml2po -r reuse.xml.out reuse.xml" to check that msgid "Introduction" is untranslated in output). diff --git a/xml2po/tests/test.py b/xml2po/tests/test.py index 02387e0..b5d6e88 100755 --- a/xml2po/tests/test.py +++ b/xml2po/tests/test.py @@ -10,6 +10,7 @@ SIMPLETESTS = { 'deep-finals.xml' : {}, 'keepents.xml': { "options" : "-k" }, 'adjacent-ents.xml': { "options" : "-k" }, 'ubuntu-mode.xml': { "options" : "-m ubuntu -k -l sr" }, + 'xhtml.xml': { "options" : "-m xhtml" }, } OTHERTESTS = [ ('relnotes', 'test.sh') ] diff --git a/xml2po/tests/xhtml.po b/xml2po/tests/xhtml.po new file mode 100644 index 0000000..cced111 --- /dev/null +++ b/xml2po/tests/xhtml.po @@ -0,0 +1,32 @@ +msgid "" +msgstr "" +"Project-Id-Version: xhtml test\n" +"POT-Creation-Date: 2006-12-26 20:18+0100\n" +"PO-Revision-Date: 2006-12-26 20:19+0100\n" +"Last-Translator: Данило Шеган\n" +"Language-Team: Српски\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: xhtml.xml:5(title) +msgid "Sample XHTML page" +msgstr "Пример XHTML стране" + +#: xhtml.xml:7(p) +msgid "This is just an example." +msgstr "Ово је само пример." + +#: xhtml.xml:10(img:title) +msgid "Lovely image" +msgstr "Дивна слика" + +#: xhtml.xml:10(img:alt) +msgid "Image of nothing" +msgstr "Слика ничега" + +#. Here nothing but a paragraph should be extracted. +#: xhtml.xml:13(p) +msgid "Enough for a test, if you ask <a href=\"mailto:danilo\" title=\"Danilo\">me personally</a>." +msgstr "Довољно за пробу, ако питате <a href=\"mailto:danilo\" title=\"Данила\">мене лично</a>." + diff --git a/xml2po/tests/xhtml.pot b/xml2po/tests/xhtml.pot new file mode 100644 index 0000000..3575759 --- /dev/null +++ b/xml2po/tests/xhtml.pot @@ -0,0 +1,32 @@ +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: xhtml.xml:4(title) +msgid "Sample XHTML page" +msgstr "" + +#: xhtml.xml:6(p) +msgid "This is just an example." +msgstr "" + +#: xhtml.xml:9(img:title) +msgid "Lovely image" +msgstr "" + +#: xhtml.xml:9(img:alt) +msgid "Image of nothing" +msgstr "" + +#. Here nothing but a paragraph should be extracted. +#: xhtml.xml:12(p) +msgid "Enough for a test, if you ask <a href=\"mailto:danilo\" title=\"Danilo\">me personally</a>." +msgstr "" + diff --git a/xml2po/tests/xhtml.xml b/xml2po/tests/xhtml.xml new file mode 100644 index 0000000..253afc1 --- /dev/null +++ b/xml2po/tests/xhtml.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head><title>Sample XHTML page</title></head> +<body> +<p>This is just an example.</p> + +<!-- Here both title and alt attributes should be extracted. --> +<img title="Lovely image" alt="Image of nothing" src="nothing.png" /> + +<!-- Here nothing but a paragraph should be extracted. --> +<p>Enough for a test, if you ask <a href="mailto:danilo" title="Danilo">me personally</a>.</p> +</body> +</html> diff --git a/xml2po/tests/xhtml.xml.out b/xml2po/tests/xhtml.xml.out new file mode 100644 index 0000000..4116ca0 --- /dev/null +++ b/xml2po/tests/xhtml.xml.out @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>Пример XHTML стране</title></head> +<body> +<p>Ово је само пример.</p> + +<!-- Here both title and alt attributes should be extracted. --> +<img title="Дивна слика" alt="Слика ничега" src="nothing.png" /> + +<!-- Here nothing but a paragraph should be extracted. --> +<p>Довољно за пробу, ако питате <a href="mailto:danilo" title="Данила">мене лично</a>.</p> +</body> +</html> diff --git a/xml2po/xml2po.py b/xml2po/xml2po.py index 0d1528e..1834fae 100755 --- a/xml2po/xml2po.py +++ b/xml2po/xml2po.py @@ -119,7 +119,7 @@ msgstr "" def outputAll(self, out): self.outputHeader(out) - + for k in self.messages: if k in self.comments: out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. "))) @@ -195,7 +195,7 @@ def normalizeString(text, ignorewhitespace = 1): result = re.sub('^ ','', result) result = re.sub(' $','', result) - + return result def stringForEntity(node): @@ -286,14 +286,14 @@ def startTagForNode(node): nsprop = p.name params += " %s=\"%s\"" % (nsprop, myAttributeSerialize(p)) return result+params - + def endTagForNode(node): if not node: return 0 result = node.name return result - + def isFinalNode(node): if automatic: auto = autoNodeIsFinal(node) @@ -353,6 +353,9 @@ def getCommentForNode(node): else: return None +def replaceAttributeContentsWithText(node,text): + node.setContent(text) + def replaceNodeContentsWithText(node,text): """Replaces all subnodes of a node with contents of text treated as XML.""" @@ -367,7 +370,7 @@ def replaceNodeContentsWithText(node,text): tmp = tmp + dtd.serialize('utf-8') except libxml2.treeError: pass - + content = '<%s>%s</%s>' % (starttag, text, endtag) tmp = tmp + content.encode('utf-8') @@ -398,6 +401,7 @@ def replaceNodeContentsWithText(node,text): next = node.next node.replaceNode(newelem.copyNodeList()) node.next = next + else: # In practice, this happens with tags such as "<para> </para>" (only whitespace in between) pass @@ -421,7 +425,7 @@ def autoNodeIsFinal(node): return final -def worthOutputting(node): +def worthOutputting(node, noauto = 0): """Returns 1 if node is "worth outputting", otherwise 0. Node is "worth outputting", if none of the parents @@ -440,11 +444,32 @@ def worthOutputting(node): if not worth: return 0 - return autoNodeIsFinal(node) - + if noauto: + return worth + else: + return autoNodeIsFinal(node) + +def processAttribute(node, attr): + if not node or not attr or not worthOutputting(node=node, noauto=1): + return + + outtxt = attr.content + if mode=='merge': + translation = getTranslation(outtxt, 0) + replaceAttributeContentsWithText(attr, translation.encode('utf-8')) + else: + msg.outputMessage(outtxt, node.lineNo(), "", 0, + node.name + ":" + attr.name) + def processElementTag(node, replacements, restart = 0): """Process node with node.type == 'element'.""" if node.type == 'element': + # Translate attributes if needed + if node.properties and len(treated_attributes): + for p in node.properties: + if p.name in treated_attributes: + processAttribute(node, p) + outtxt = '' if restart: myrepl = [] @@ -553,7 +578,7 @@ def doSerialize(node): child = child.next return outtxt - + def read_finaltags(filelist): if CurrentXmlMode: return CurrentXmlMode.getFinalTags() @@ -571,6 +596,13 @@ def read_ignoredtags(filelist): 'varlistentry' ] return defaults +def read_treatedattributes(filelist): + if CurrentXmlMode: + return CurrentXmlMode.getTreatedAttributes() + else: + return [] + + def tryToUpdate(allargs, lang): # Remove "-u" and "--update-translation" print >>sys.stderr, "OVDI!" @@ -777,6 +809,7 @@ if mofile: ultimate_tags = read_finaltags(ultimate) ignored_tags = read_ignoredtags(ignored) +treated_attributes = read_treatedattributes(ignored) # I'm not particularly happy about making any of these global, # but I don't want to bother too much with it right now |