Added XHTML mode and attribute extraction support.

author: Danilo Šegan <danilo@src.gnome.org> 2006-12-26 19:48:14 +0000
committer: Danilo Šegan <danilo@src.gnome.org> 2006-12-26 19:48:14 +0000
commit: 3e42bf2151f46d22306902c149e038196c658fc2 (patch)
tree: fd171f17074d3403f785c4b0a1e04f1097423723 /xml2po
parent: e13963d4b77e3508055a4d1d61207ada5ea879e6 (diff)
download: gnome-doc-utils-3e42bf2151f46d22306902c149e038196c658fc2.tar.gz
14 files changed, 244 insertions, 17 deletions
diff --git a/xml2po/ChangeLog b/xml2po/ChangeLog
index 7c216bf..5607bd6 100644
--- a/xml2po/ChangeLog
+++ b/xml2po/ChangeLog
@@ -1,5 +1,25 @@
 2006-12-26  Danilo Šegan  <danilo@gnome.org>
 
+	Fix bug #343749.
+	Support for XHTML mode, partially by Claude Paroz <paroz@email.ch>.
+
+	* TODO: Updated.
+
+	* xml2po.py (processAttribute): Added.
+	(read_treatedattributes): Added.
+	(processElementTag): Use treated attributes.
+
+	* modes/Makefile.am (common_DATA): Added xhtml.py.
+
+	* modes/gs.py (gsXmlMode.getTreatedAttributes):
+	* modes/empty.py (emptyXmlMode.getTreatedAttributes):
+	* modes/docbook.py (docbookXmlMode.getTreatedAttributes):
+	Added new method to every other mode.
+
+	* modes/xhtml.py: Added Claude's XHTML mode.
+
+2006-12-26  Danilo Šegan  <danilo@gnome.org>
+
 	Fix bug #378073.
 
 	* xml2po.py (autoNodeIsFinal): Really ignore ignored tags.
diff --git a/xml2po/TODO b/xml2po/TODO
index d27bbf9..5cfe928 100644
--- a/xml2po/TODO
+++ b/xml2po/TODO
@@ -26,9 +26,6 @@ TODO:
  o pipe the constructed .po file thru 'msgcat'; msgcat is meant as a .po
    file normalization tool. [Karl]
 
- o add support for translating attributes (eg. imagine "title"            [??]
-   attribute in IMG tag in XHTML) -- this should be easy enough
-
  o support several different doctypes in the same run                     [??]
    eg. a mathml, docbook, documents all translated with one run
 
@@ -39,6 +36,9 @@ TODO:
 
 DONE:
 
+ o [2006-12-26] add support for translating attributes (eg. imagine "title"
+   attribute in IMG tag in XHTML) -- this should be easy enough
+
  o [BUG FIXED with at least libxml2 2.6.21] 
    Merging with "-k" (keep-entities) option doesn't work correctly 
 
diff --git a/xml2po/modes/Makefile.am b/xml2po/modes/Makefile.am
index 146c1b4..55eff17 100644
--- a/xml2po/modes/Makefile.am
+++ b/xml2po/modes/Makefile.am
@@ -1,4 +1,4 @@
 commondir = $(datadir)/xml2po
-common_DATA = docbook.py empty.py gs.py ubuntu.py
+common_DATA = docbook.py empty.py gs.py ubuntu.py xhtml.py
 
 EXTRA_DIST = $(common_DATA)
diff --git a/xml2po/modes/docbook.py b/xml2po/modes/docbook.py
index 57e15f1..ceec906 100644
--- a/xml2po/modes/docbook.py
+++ b/xml2po/modes/docbook.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
 #
 # This file is part of xml2po.
 #
@@ -49,7 +49,7 @@ class docbookXmlMode:
                       'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
         self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject',
                          'screenshot' ]
-        
+
     def getIgnoredTags(self):
         "Returns array of tags to be ignored."
         return  self.objects + self.lists
@@ -73,6 +73,10 @@ class docbookXmlMode:
             'userinput'
             ]
 
+    def getTreatedAttributes(self):
+        "Returns array of tag attributes which content is to be translated"
+        return []
+
     def getStringForTranslators(self):
         """Returns string which will be used to credit translators."""
         return "translator-credits"
diff --git a/xml2po/modes/empty.py b/xml2po/modes/empty.py
index 9c53aa2..1c9f002 100644
--- a/xml2po/modes/empty.py
+++ b/xml2po/modes/empty.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
 #
 # This file is part of xml2po.
 #
@@ -34,6 +34,10 @@ class emptyXmlMode:
         "Returns array of tags in which spaces are to be preserved."
         return []
 
+    def getTreatedAttributes(self):
+        "Returns array of tag attributes which content is to be translated"
+        return []
+
     def preProcessXml(self, doc, msg):
         "Preprocess a document and perhaps adds some messages."
         pass
diff --git a/xml2po/modes/gs.py b/xml2po/modes/gs.py
index 36aa550..9ba0307 100644
--- a/xml2po/modes/gs.py
+++ b/xml2po/modes/gs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
 #
 # This file is part of xml2po.
 #
@@ -34,6 +34,10 @@ class gsXmlMode:
         "Returns array of tags in which spaces are to be preserved."
         return []
 
+    def getTreatedAttributes(self):
+        "Returns array of tag attributes which content is to be translated"
+        return []
+
     def preProcessXml(self, doc, msg):
         "Preprocess a document and perhaps adds some messages."
         pass
diff --git a/xml2po/modes/xhtml.py b/xml2po/modes/xhtml.py
new file mode 100644
index 0000000..f4371e7
--- /dev/null
+++ b/xml2po/modes/xhtml.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
+# Copyright (c) 2006 Claude Paroz <paroz@email.ch>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+# This implements special instructions for handling XHTML documents
+# in a better way, particularly to extract some attributes in HTML tags
+
+class xhtmlXmlMode:
+    """Class for special handling of XHTML document types."""
+    def getIgnoredTags(self):
+        "Returns array of tags to be ignored."
+        return []
+
+    def getFinalTags(self):
+        "Returns array of tags to be considered 'final'."
+        return ['p', 'li', 'pre']
+
+    def getTreatedAttributes(self):
+        "Returns array of tag attributes which content is to be translated"
+        return ['title','alt']
+
+    def getSpacePreserveTags(self):
+        "Returns array of tags in which spaces are to be preserved."
+        return ['pre']
+
+    def preProcessXml(self, doc, msg):
+        "Preprocess a document and perhaps adds some messages."
+        pass
+
+    def postProcessXmlTranslation(self, doc, language, translators):
+        """Sets a language and translators in "doc" tree.
+
+        "translators" is a string consisted of translator credits.
+        "language" is a simple string.
+        "doc" is a libxml2.xmlDoc instance."""
+        pass
+
+    def getStringForTranslators(self):
+        """Returns None or a string to be added to PO files.
+
+        Common example is 'translator-credits'."""
+        return None
+
+    def getCommentForTranslators(self):
+        """Returns a comment to be added next to string for crediting translators.
+
+        It should explain the format of the string provided by getStringForTranslators()."""
+        return None
diff --git a/xml2po/tests/ChangeLog b/xml2po/tests/ChangeLog
index 76c2045..c0d6191 100644
--- a/xml2po/tests/ChangeLog
+++ b/xml2po/tests/ChangeLog
@@ -1,5 +1,10 @@
 2006-12-26  Danilo Šegan  <danilo@gnome.org>
 
+	* test.py: Added xhtml test.
+	* xhtml.xml, xhtml.pot, xhtml.po, xhtml.xml.out: Add XHTML mode test.
+
+2006-12-26  Danilo Šegan  <danilo@gnome.org>
+
 	* reuse/reuse.xml: Add uncalled reuse test (call with
 	"../../xml2po -r reuse.xml.out reuse.xml" to check that msgid
 	"Introduction" is untranslated in output).
diff --git a/xml2po/tests/test.py b/xml2po/tests/test.py
index 02387e0..b5d6e88 100755
--- a/xml2po/tests/test.py
+++ b/xml2po/tests/test.py
@@ -10,6 +10,7 @@ SIMPLETESTS = { 'deep-finals.xml' : {},
                 'keepents.xml': { "options" : "-k" },
                 'adjacent-ents.xml': { "options" : "-k" },
                 'ubuntu-mode.xml': { "options" : "-m ubuntu -k -l sr" },
+                'xhtml.xml': { "options" : "-m xhtml" },
                 }
 
 OTHERTESTS = [ ('relnotes', 'test.sh') ]
diff --git a/xml2po/tests/xhtml.po b/xml2po/tests/xhtml.po
new file mode 100644
index 0000000..cced111
--- /dev/null
+++ b/xml2po/tests/xhtml.po
@@ -0,0 +1,32 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: xhtml test\n"
+"POT-Creation-Date: 2006-12-26 20:18+0100\n"
+"PO-Revision-Date: 2006-12-26 20:19+0100\n"
+"Last-Translator: Данило Шеган\n"
+"Language-Team: Српски\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: xhtml.xml:5(title) 
+msgid "Sample XHTML page"
+msgstr "Пример XHTML стране"
+
+#: xhtml.xml:7(p) 
+msgid "This is just an example."
+msgstr "Ово је само пример."
+
+#: xhtml.xml:10(img:title) 
+msgid "Lovely image"
+msgstr "Дивна слика"
+
+#: xhtml.xml:10(img:alt) 
+msgid "Image of nothing"
+msgstr "Слика ничега"
+
+#. Here nothing but a paragraph should be extracted.
+#: xhtml.xml:13(p) 
+msgid "Enough for a test, if you ask <a href=\"mailto:danilo\" title=\"Danilo\">me personally</a>."
+msgstr "Довољно за пробу, ако питате <a href=\"mailto:danilo\" title=\"Данила\">мене лично</a>."
+
diff --git a/xml2po/tests/xhtml.pot b/xml2po/tests/xhtml.pot
new file mode 100644
index 0000000..3575759
--- /dev/null
+++ b/xml2po/tests/xhtml.pot
@@ -0,0 +1,32 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: xhtml.xml:4(title) 
+msgid "Sample XHTML page"
+msgstr ""
+
+#: xhtml.xml:6(p) 
+msgid "This is just an example."
+msgstr ""
+
+#: xhtml.xml:9(img:title) 
+msgid "Lovely image"
+msgstr ""
+
+#: xhtml.xml:9(img:alt) 
+msgid "Image of nothing"
+msgstr ""
+
+#. Here nothing but a paragraph should be extracted.
+#: xhtml.xml:12(p) 
+msgid "Enough for a test, if you ask <a href=\"mailto:danilo\" title=\"Danilo\">me personally</a>."
+msgstr ""
+
diff --git a/xml2po/tests/xhtml.xml b/xml2po/tests/xhtml.xml
new file mode 100644
index 0000000..253afc1
--- /dev/null
+++ b/xml2po/tests/xhtml.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Sample XHTML page</title></head>
+<body>
+<p>This is just an example.</p>
+
+<!-- Here both title and alt attributes should be extracted. -->
+<img title="Lovely image" alt="Image of nothing" src="nothing.png" />
+
+<!-- Here nothing but a paragraph should be extracted. -->
+<p>Enough for a test, if you ask <a href="mailto:danilo" title="Danilo">me personally</a>.</p>
+</body>
+</html>
diff --git a/xml2po/tests/xhtml.xml.out b/xml2po/tests/xhtml.xml.out
new file mode 100644
index 0000000..4116ca0
--- /dev/null
+++ b/xml2po/tests/xhtml.xml.out
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>Пример XHTML стране</title></head>
+<body>
+<p>Ово је само пример.</p>
+
+<!-- Here both title and alt attributes should be extracted. -->
+<img title="Дивна слика" alt="Слика ничега" src="nothing.png" />
+
+<!-- Here nothing but a paragraph should be extracted. -->
+<p>Довољно за пробу, ако питате <a href="mailto:danilo" title="Данила">мене лично</a>.</p>
+</body>
+</html>
diff --git a/xml2po/xml2po.py b/xml2po/xml2po.py
index 0d1528e..1834fae 100755
--- a/xml2po/xml2po.py
+++ b/xml2po/xml2po.py
@@ -119,7 +119,7 @@ msgstr ""
 
     def outputAll(self, out):
         self.outputHeader(out)
-        
+
         for k in self.messages:
             if k in self.comments:
                 out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
@@ -195,7 +195,7 @@ def normalizeString(text, ignorewhitespace = 1):
 
     result = re.sub('^ ','', result)
     result = re.sub(' $','', result)
-    
+
     return result
 
 def stringForEntity(node):
@@ -286,14 +286,14 @@ def startTagForNode(node):
                     nsprop = p.name
                 params += " %s=\"%s\"" % (nsprop, myAttributeSerialize(p))
     return result+params
-        
+
 def endTagForNode(node):
     if not node:
         return 0
 
     result = node.name
     return result
-        
+
 def isFinalNode(node):
     if automatic:
         auto = autoNodeIsFinal(node)
@@ -353,6 +353,9 @@ def getCommentForNode(node):
     else:
         return None
 
+def replaceAttributeContentsWithText(node,text):
+    node.setContent(text)
+
 def replaceNodeContentsWithText(node,text):
     """Replaces all subnodes of a node with contents of text treated as XML."""
 
@@ -367,7 +370,7 @@ def replaceNodeContentsWithText(node,text):
             tmp = tmp + dtd.serialize('utf-8')
         except libxml2.treeError:
             pass
-            
+
         content = '<%s>%s</%s>' % (starttag, text, endtag)
         tmp = tmp + content.encode('utf-8')
 
@@ -398,6 +401,7 @@ def replaceNodeContentsWithText(node,text):
                 next = node.next
                 node.replaceNode(newelem.copyNodeList())
                 node.next = next
+
         else:
             # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
             pass
@@ -421,7 +425,7 @@ def autoNodeIsFinal(node):
     return final
 
 
-def worthOutputting(node):
+def worthOutputting(node, noauto = 0):
     """Returns 1 if node is "worth outputting", otherwise 0.
 
     Node is "worth outputting", if none of the parents
@@ -440,11 +444,32 @@ def worthOutputting(node):
     if not worth:
         return 0
 
-    return autoNodeIsFinal(node)
-    
+    if noauto:
+        return worth
+    else:
+        return autoNodeIsFinal(node)
+
+def processAttribute(node, attr):
+    if not node or not attr or not worthOutputting(node=node, noauto=1):
+        return
+
+    outtxt = attr.content
+    if mode=='merge':
+        translation = getTranslation(outtxt, 0)
+        replaceAttributeContentsWithText(attr, translation.encode('utf-8'))
+    else:
+        msg.outputMessage(outtxt, node.lineNo(),  "", 0,
+                          node.name + ":" + attr.name)
+
 def processElementTag(node, replacements, restart = 0):
     """Process node with node.type == 'element'."""
     if node.type == 'element':
+        # Translate attributes if needed
+        if node.properties and len(treated_attributes):
+            for p in node.properties:
+                if p.name in treated_attributes:
+                    processAttribute(node, p)
+
         outtxt = ''
         if restart:
             myrepl = []
@@ -553,7 +578,7 @@ def doSerialize(node):
             child = child.next
         return outtxt
 
-    
+
 def read_finaltags(filelist):
     if CurrentXmlMode:
         return CurrentXmlMode.getFinalTags()
@@ -571,6 +596,13 @@ def read_ignoredtags(filelist):
                     'varlistentry' ]
         return defaults
 
+def read_treatedattributes(filelist):
+    if CurrentXmlMode:
+        return CurrentXmlMode.getTreatedAttributes()
+    else:
+        return []
+
+
 def tryToUpdate(allargs, lang):
     # Remove "-u" and "--update-translation"
     print >>sys.stderr, "OVDI!"
@@ -777,6 +809,7 @@ if mofile:
 
 ultimate_tags = read_finaltags(ultimate)
 ignored_tags = read_ignoredtags(ignored)
+treated_attributes = read_treatedattributes(ignored)
 
 # I'm not particularly happy about making any of these global,
 # but I don't want to bother too much with it right now
author	Danilo Šegan <danilo@src.gnome.org>	2006-12-26 19:48:14 +0000
committer	Danilo Šegan <danilo@src.gnome.org>	2006-12-26 19:48:14 +0000
commit	3e42bf2151f46d22306902c149e038196c658fc2 (patch)
tree	fd171f17074d3403f785c4b0a1e04f1097423723 /xml2po
parent	e13963d4b77e3508055a4d1d61207ada5ea879e6 (diff)
download	gnome-doc-utils-3e42bf2151f46d22306902c149e038196c658fc2.tar.gz