summaryrefslogtreecommitdiff
path: root/docutils/tools
diff options
context:
space:
mode:
authorwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2005-06-20 19:20:00 +0000
committerwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2005-06-20 19:20:00 +0000
commit5339c5492b96fcdb60d05b66006c615edae20215 (patch)
treeb294fb7565b9b8575f572591a2edb5abb436ac49 /docutils/tools
parent8823c435fe816c5b0108e870a6ffbf15ba129eb2 (diff)
downloaddocutils-5339c5492b96fcdb60d05b66006c615edae20215.tar.gz
moved create_unimap.py to tools/dev/
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@3534 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/tools')
-rwxr-xr-xdocutils/tools/dev/create_unimap.py80
1 files changed, 80 insertions, 0 deletions
diff --git a/docutils/tools/dev/create_unimap.py b/docutils/tools/dev/create_unimap.py
new file mode 100755
index 000000000..260913ed3
--- /dev/null
+++ b/docutils/tools/dev/create_unimap.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Author: Felix Wiemann
+# Contact: Felix_Wiemann@ososo.de
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This file has been placed in the public domain.
+
+# Call: create_unimap.py < unicode.xml > unicode_latex.py
+#
+# Get unicode.xml from
+# <http://www.w3.org/2003/entities/xml/unicode.xml>.
+
+from xml.dom import minidom
+import sys
+import pprint
+
+def w(s):
+ if isinstance(s, unicode):
+ s = s.encode('utf8')
+ sys.stdout.write(s)
+
+text_map = {}
+math_map = {}
+
+class Visitor:
+
+ """Node visitor for contents of unicode.xml."""
+
+ def visit_character(self, node):
+ for n in node.childNodes:
+ if n.nodeName == 'latex':
+ code = node.attributes['dec'].value
+ if '-' in code:
+ # I don't know what this means, but we probably
+ # don't need it....
+ continue
+ if int(code) < 128:
+ # Wrong (maps "-" to "$-$", which is too wide) and
+ # unnecessary (maps "a" to "{a}").
+ continue
+ latex_code = n.childNodes[0].nodeValue.encode('ascii').strip()
+ if node.attributes['mode'].value == 'math':
+ math_map[unichr(int(code))] = '$%s$' % latex_code
+ else:
+ text_map[unichr(int(code))] = '{%s}' % latex_code
+
+def call_visitor(node, visitor=Visitor()):
+ if isinstance(node, minidom.Text):
+ name = 'Text'
+ else:
+ name = node.nodeName.replace('#', '_')
+ if hasattr(visitor, 'visit_' + name):
+ getattr(visitor, 'visit_' + name)(node)
+ for child in node.childNodes:
+ call_visitor(child)
+ if hasattr(visitor, 'depart_' + name):
+ getattr(visitor, 'depart_' + name)(node)
+
+document = minidom.parse(sys.stdin)
+call_visitor(document)
+
+unicode_map = math_map
+unicode_map.update(text_map)
+# Now unicode_map contains the text entries plus dollar-enclosed math
+# entries for those chars for which no text entry exists.
+
+print '# Author: Felix Wiemann'
+print '# Contact: Felix_Wiemann@ososo.de'
+print '# Revision: $%s$' % 'Revision'
+print '# Date: $%s$' % 'Date'
+print '# Copyright: This file has been placed in the public domain.'
+print '#'
+print '# This is a mapping of Unicode characters to LaTeX'
+print '# equivalents. The information has been extracted from'
+print '# <http://www.w3.org/2003/entities/xml/unicode.xml>.'
+print '# The extraction has been done by the "create_unimap.py"'
+print '# script written by Felix Wiemann.'
+print
+print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0)