summaryrefslogtreecommitdiff
path: root/xml2po
diff options
context:
space:
mode:
authorDanilo Šegan <danilo@src.gnome.org>2006-12-26 19:48:14 +0000
committerDanilo Šegan <danilo@src.gnome.org>2006-12-26 19:48:14 +0000
commit3e42bf2151f46d22306902c149e038196c658fc2 (patch)
treefd171f17074d3403f785c4b0a1e04f1097423723 /xml2po
parente13963d4b77e3508055a4d1d61207ada5ea879e6 (diff)
downloadgnome-doc-utils-3e42bf2151f46d22306902c149e038196c658fc2.tar.gz
Added XHTML mode and attribute extraction support.
Diffstat (limited to 'xml2po')
-rw-r--r--xml2po/ChangeLog20
-rw-r--r--xml2po/TODO6
-rw-r--r--xml2po/modes/Makefile.am2
-rw-r--r--xml2po/modes/docbook.py8
-rw-r--r--xml2po/modes/empty.py6
-rw-r--r--xml2po/modes/gs.py6
-rw-r--r--xml2po/modes/xhtml.py64
-rw-r--r--xml2po/tests/ChangeLog5
-rwxr-xr-xxml2po/tests/test.py1
-rw-r--r--xml2po/tests/xhtml.po32
-rw-r--r--xml2po/tests/xhtml.pot32
-rw-r--r--xml2po/tests/xhtml.xml14
-rw-r--r--xml2po/tests/xhtml.xml.out14
-rwxr-xr-xxml2po/xml2po.py51
14 files changed, 244 insertions, 17 deletions
diff --git a/xml2po/ChangeLog b/xml2po/ChangeLog
index 7c216bf..5607bd6 100644
--- a/xml2po/ChangeLog
+++ b/xml2po/ChangeLog
@@ -1,5 +1,25 @@
2006-12-26 Danilo Šegan <danilo@gnome.org>
+ Fix bug #343749.
+ Support for XHTML mode, partially by Claude Paroz <paroz@email.ch>.
+
+ * TODO: Updated.
+
+ * xml2po.py (processAttribute): Added.
+ (read_treatedattributes): Added.
+ (processElementTag): Use treated attributes.
+
+ * modes/Makefile.am (common_DATA): Added xhtml.py.
+
+ * modes/gs.py (gsXmlMode.getTreatedAttributes):
+ * modes/empty.py (emptyXmlMode.getTreatedAttributes):
+ * modes/docbook.py (docbookXmlMode.getTreatedAttributes):
+ Added new method to every other mode.
+
+ * modes/xhtml.py: Added Claude's XHTML mode.
+
+2006-12-26 Danilo Šegan <danilo@gnome.org>
+
Fix bug #378073.
* xml2po.py (autoNodeIsFinal): Really ignore ignored tags.
diff --git a/xml2po/TODO b/xml2po/TODO
index d27bbf9..5cfe928 100644
--- a/xml2po/TODO
+++ b/xml2po/TODO
@@ -26,9 +26,6 @@ TODO:
o pipe the constructed .po file thru 'msgcat'; msgcat is meant as a .po
file normalization tool. [Karl]
- o add support for translating attributes (eg. imagine "title" [??]
- attribute in IMG tag in XHTML) -- this should be easy enough
-
o support several different doctypes in the same run [??]
eg. a mathml, docbook, documents all translated with one run
@@ -39,6 +36,9 @@ TODO:
DONE:
+ o [2006-12-26] add support for translating attributes (eg. imagine "title"
+ attribute in IMG tag in XHTML) -- this should be easy enough
+
o [BUG FIXED with at least libxml2 2.6.21]
Merging with "-k" (keep-entities) option doesn't work correctly
diff --git a/xml2po/modes/Makefile.am b/xml2po/modes/Makefile.am
index 146c1b4..55eff17 100644
--- a/xml2po/modes/Makefile.am
+++ b/xml2po/modes/Makefile.am
@@ -1,4 +1,4 @@
commondir = $(datadir)/xml2po
-common_DATA = docbook.py empty.py gs.py ubuntu.py
+common_DATA = docbook.py empty.py gs.py ubuntu.py xhtml.py
EXTRA_DIST = $(common_DATA)
diff --git a/xml2po/modes/docbook.py b/xml2po/modes/docbook.py
index 57e15f1..ceec906 100644
--- a/xml2po/modes/docbook.py
+++ b/xml2po/modes/docbook.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
#
# This file is part of xml2po.
#
@@ -49,7 +49,7 @@ class docbookXmlMode:
'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject',
'screenshot' ]
-
+
def getIgnoredTags(self):
"Returns array of tags to be ignored."
return self.objects + self.lists
@@ -73,6 +73,10 @@ class docbookXmlMode:
'userinput'
]
+ def getTreatedAttributes(self):
+ "Returns array of tag attributes which content is to be translated"
+ return []
+
def getStringForTranslators(self):
"""Returns string which will be used to credit translators."""
return "translator-credits"
diff --git a/xml2po/modes/empty.py b/xml2po/modes/empty.py
index 9c53aa2..1c9f002 100644
--- a/xml2po/modes/empty.py
+++ b/xml2po/modes/empty.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
#
# This file is part of xml2po.
#
@@ -34,6 +34,10 @@ class emptyXmlMode:
"Returns array of tags in which spaces are to be preserved."
return []
+ def getTreatedAttributes(self):
+ "Returns array of tag attributes which content is to be translated"
+ return []
+
def preProcessXml(self, doc, msg):
"Preprocess a document and perhaps adds some messages."
pass
diff --git a/xml2po/modes/gs.py b/xml2po/modes/gs.py
index 36aa550..9ba0307 100644
--- a/xml2po/modes/gs.py
+++ b/xml2po/modes/gs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
#
# This file is part of xml2po.
#
@@ -34,6 +34,10 @@ class gsXmlMode:
"Returns array of tags in which spaces are to be preserved."
return []
+ def getTreatedAttributes(self):
+ "Returns array of tag attributes which content is to be translated"
+ return []
+
def preProcessXml(self, doc, msg):
"Preprocess a document and perhaps adds some messages."
pass
diff --git a/xml2po/modes/xhtml.py b/xml2po/modes/xhtml.py
new file mode 100644
index 0000000..f4371e7
--- /dev/null
+++ b/xml2po/modes/xhtml.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
+# Copyright (c) 2006 Claude Paroz <paroz@email.ch>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+# This implements special instructions for handling XHTML documents
+# in a better way, particularly to extract some attributes in HTML tags
+
+class xhtmlXmlMode:
+ """Class for special handling of XHTML document types."""
+ def getIgnoredTags(self):
+ "Returns array of tags to be ignored."
+ return []
+
+ def getFinalTags(self):
+ "Returns array of tags to be considered 'final'."
+ return ['p', 'li', 'pre']
+
+ def getTreatedAttributes(self):
+ "Returns array of tag attributes which content is to be translated"
+ return ['title','alt']
+
+ def getSpacePreserveTags(self):
+ "Returns array of tags in which spaces are to be preserved."
+ return ['pre']
+
+ def preProcessXml(self, doc, msg):
+ "Preprocess a document and perhaps adds some messages."
+ pass
+
+ def postProcessXmlTranslation(self, doc, language, translators):
+ """Sets a language and translators in "doc" tree.
+
+ "translators" is a string consisted of translator credits.
+ "language" is a simple string.
+ "doc" is a libxml2.xmlDoc instance."""
+ pass
+
+ def getStringForTranslators(self):
+ """Returns None or a string to be added to PO files.
+
+ Common example is 'translator-credits'."""
+ return None
+
+ def getCommentForTranslators(self):
+ """Returns a comment to be added next to string for crediting translators.
+
+ It should explain the format of the string provided by getStringForTranslators()."""
+ return None
diff --git a/xml2po/tests/ChangeLog b/xml2po/tests/ChangeLog
index 76c2045..c0d6191 100644
--- a/xml2po/tests/ChangeLog
+++ b/xml2po/tests/ChangeLog
@@ -1,5 +1,10 @@
2006-12-26 Danilo Šegan <danilo@gnome.org>
+ * test.py: Added xhtml test.
+ * xhtml.xml, xhtml.pot, xhtml.po, xhtml.xml.out: Add XHTML mode test.
+
+2006-12-26 Danilo Šegan <danilo@gnome.org>
+
* reuse/reuse.xml: Add uncalled reuse test (call with
"../../xml2po -r reuse.xml.out reuse.xml" to check that msgid
"Introduction" is untranslated in output).
diff --git a/xml2po/tests/test.py b/xml2po/tests/test.py
index 02387e0..b5d6e88 100755
--- a/xml2po/tests/test.py
+++ b/xml2po/tests/test.py
@@ -10,6 +10,7 @@ SIMPLETESTS = { 'deep-finals.xml' : {},
'keepents.xml': { "options" : "-k" },
'adjacent-ents.xml': { "options" : "-k" },
'ubuntu-mode.xml': { "options" : "-m ubuntu -k -l sr" },
+ 'xhtml.xml': { "options" : "-m xhtml" },
}
OTHERTESTS = [ ('relnotes', 'test.sh') ]
diff --git a/xml2po/tests/xhtml.po b/xml2po/tests/xhtml.po
new file mode 100644
index 0000000..cced111
--- /dev/null
+++ b/xml2po/tests/xhtml.po
@@ -0,0 +1,32 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: xhtml test\n"
+"POT-Creation-Date: 2006-12-26 20:18+0100\n"
+"PO-Revision-Date: 2006-12-26 20:19+0100\n"
+"Last-Translator: Данило Шеган\n"
+"Language-Team: Српски\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: xhtml.xml:5(title)
+msgid "Sample XHTML page"
+msgstr "Пример XHTML стране"
+
+#: xhtml.xml:7(p)
+msgid "This is just an example."
+msgstr "Ово је само пример."
+
+#: xhtml.xml:10(img:title)
+msgid "Lovely image"
+msgstr "Дивна слика"
+
+#: xhtml.xml:10(img:alt)
+msgid "Image of nothing"
+msgstr "Слика ничега"
+
+#. Here nothing but a paragraph should be extracted.
+#: xhtml.xml:13(p)
+msgid "Enough for a test, if you ask <a href=\"mailto:danilo\" title=\"Danilo\">me personally</a>."
+msgstr "Довољно за пробу, ако питате <a href=\"mailto:danilo\" title=\"Данила\">мене лично</a>."
+
diff --git a/xml2po/tests/xhtml.pot b/xml2po/tests/xhtml.pot
new file mode 100644
index 0000000..3575759
--- /dev/null
+++ b/xml2po/tests/xhtml.pot
@@ -0,0 +1,32 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: xhtml.xml:4(title)
+msgid "Sample XHTML page"
+msgstr ""
+
+#: xhtml.xml:6(p)
+msgid "This is just an example."
+msgstr ""
+
+#: xhtml.xml:9(img:title)
+msgid "Lovely image"
+msgstr ""
+
+#: xhtml.xml:9(img:alt)
+msgid "Image of nothing"
+msgstr ""
+
+#. Here nothing but a paragraph should be extracted.
+#: xhtml.xml:12(p)
+msgid "Enough for a test, if you ask <a href=\"mailto:danilo\" title=\"Danilo\">me personally</a>."
+msgstr ""
+
diff --git a/xml2po/tests/xhtml.xml b/xml2po/tests/xhtml.xml
new file mode 100644
index 0000000..253afc1
--- /dev/null
+++ b/xml2po/tests/xhtml.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Sample XHTML page</title></head>
+<body>
+<p>This is just an example.</p>
+
+<!-- Here both title and alt attributes should be extracted. -->
+<img title="Lovely image" alt="Image of nothing" src="nothing.png" />
+
+<!-- Here nothing but a paragraph should be extracted. -->
+<p>Enough for a test, if you ask <a href="mailto:danilo" title="Danilo">me personally</a>.</p>
+</body>
+</html>
diff --git a/xml2po/tests/xhtml.xml.out b/xml2po/tests/xhtml.xml.out
new file mode 100644
index 0000000..4116ca0
--- /dev/null
+++ b/xml2po/tests/xhtml.xml.out
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>Пример XHTML стране</title></head>
+<body>
+<p>Ово је само пример.</p>
+
+<!-- Here both title and alt attributes should be extracted. -->
+<img title="Дивна слика" alt="Слика ничега" src="nothing.png" />
+
+<!-- Here nothing but a paragraph should be extracted. -->
+<p>Довољно за пробу, ако питате <a href="mailto:danilo" title="Данила">мене лично</a>.</p>
+</body>
+</html>
diff --git a/xml2po/xml2po.py b/xml2po/xml2po.py
index 0d1528e..1834fae 100755
--- a/xml2po/xml2po.py
+++ b/xml2po/xml2po.py
@@ -119,7 +119,7 @@ msgstr ""
def outputAll(self, out):
self.outputHeader(out)
-
+
for k in self.messages:
if k in self.comments:
out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
@@ -195,7 +195,7 @@ def normalizeString(text, ignorewhitespace = 1):
result = re.sub('^ ','', result)
result = re.sub(' $','', result)
-
+
return result
def stringForEntity(node):
@@ -286,14 +286,14 @@ def startTagForNode(node):
nsprop = p.name
params += " %s=\"%s\"" % (nsprop, myAttributeSerialize(p))
return result+params
-
+
def endTagForNode(node):
if not node:
return 0
result = node.name
return result
-
+
def isFinalNode(node):
if automatic:
auto = autoNodeIsFinal(node)
@@ -353,6 +353,9 @@ def getCommentForNode(node):
else:
return None
+def replaceAttributeContentsWithText(node,text):
+ node.setContent(text)
+
def replaceNodeContentsWithText(node,text):
"""Replaces all subnodes of a node with contents of text treated as XML."""
@@ -367,7 +370,7 @@ def replaceNodeContentsWithText(node,text):
tmp = tmp + dtd.serialize('utf-8')
except libxml2.treeError:
pass
-
+
content = '<%s>%s</%s>' % (starttag, text, endtag)
tmp = tmp + content.encode('utf-8')
@@ -398,6 +401,7 @@ def replaceNodeContentsWithText(node,text):
next = node.next
node.replaceNode(newelem.copyNodeList())
node.next = next
+
else:
# In practice, this happens with tags such as "<para> </para>" (only whitespace in between)
pass
@@ -421,7 +425,7 @@ def autoNodeIsFinal(node):
return final
-def worthOutputting(node):
+def worthOutputting(node, noauto = 0):
"""Returns 1 if node is "worth outputting", otherwise 0.
Node is "worth outputting", if none of the parents
@@ -440,11 +444,32 @@ def worthOutputting(node):
if not worth:
return 0
- return autoNodeIsFinal(node)
-
+ if noauto:
+ return worth
+ else:
+ return autoNodeIsFinal(node)
+
+def processAttribute(node, attr):
+ if not node or not attr or not worthOutputting(node=node, noauto=1):
+ return
+
+ outtxt = attr.content
+ if mode=='merge':
+ translation = getTranslation(outtxt, 0)
+ replaceAttributeContentsWithText(attr, translation.encode('utf-8'))
+ else:
+ msg.outputMessage(outtxt, node.lineNo(), "", 0,
+ node.name + ":" + attr.name)
+
def processElementTag(node, replacements, restart = 0):
"""Process node with node.type == 'element'."""
if node.type == 'element':
+ # Translate attributes if needed
+ if node.properties and len(treated_attributes):
+ for p in node.properties:
+ if p.name in treated_attributes:
+ processAttribute(node, p)
+
outtxt = ''
if restart:
myrepl = []
@@ -553,7 +578,7 @@ def doSerialize(node):
child = child.next
return outtxt
-
+
def read_finaltags(filelist):
if CurrentXmlMode:
return CurrentXmlMode.getFinalTags()
@@ -571,6 +596,13 @@ def read_ignoredtags(filelist):
'varlistentry' ]
return defaults
+def read_treatedattributes(filelist):
+ if CurrentXmlMode:
+ return CurrentXmlMode.getTreatedAttributes()
+ else:
+ return []
+
+
def tryToUpdate(allargs, lang):
# Remove "-u" and "--update-translation"
print >>sys.stderr, "OVDI!"
@@ -777,6 +809,7 @@ if mofile:
ultimate_tags = read_finaltags(ultimate)
ignored_tags = read_ignoredtags(ignored)
+treated_attributes = read_treatedattributes(ignored)
# I'm not particularly happy about making any of these global,
# but I don't want to bother too much with it right now