summaryrefslogtreecommitdiff
path: root/tools/dev/create_unimap.py
diff options
context:
space:
mode:
authorwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2005-06-20 19:20:00 +0000
committerwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2005-06-20 19:20:00 +0000
commit71c59a6b9569fc2aafc4d4e41c21cca04b29cd65 (patch)
tree80799f6b82a57e2ede2edba5da6e2ceb8d4965c5 /tools/dev/create_unimap.py
parentfbc14fc4439455bc8726051604c4148ed14e29ee (diff)
downloaddocutils-71c59a6b9569fc2aafc4d4e41c21cca04b29cd65.tar.gz
moved create_unimap.py to tools/dev/
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3534 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'tools/dev/create_unimap.py')
-rwxr-xr-xtools/dev/create_unimap.py80
1 files changed, 80 insertions, 0 deletions
diff --git a/tools/dev/create_unimap.py b/tools/dev/create_unimap.py
new file mode 100755
index 000000000..260913ed3
--- /dev/null
+++ b/tools/dev/create_unimap.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Author: Felix Wiemann
+# Contact: Felix_Wiemann@ososo.de
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This file has been placed in the public domain.
+
+# Call: create_unimap.py < unicode.xml > unicode_latex.py
+#
+# Get unicode.xml from
+# <http://www.w3.org/2003/entities/xml/unicode.xml>.
+
+from xml.dom import minidom
+import sys
+import pprint
+
+def w(s):
+ if isinstance(s, unicode):
+ s = s.encode('utf8')
+ sys.stdout.write(s)
+
+text_map = {}
+math_map = {}
+
+class Visitor:
+
+ """Node visitor for contents of unicode.xml."""
+
+ def visit_character(self, node):
+ for n in node.childNodes:
+ if n.nodeName == 'latex':
+ code = node.attributes['dec'].value
+ if '-' in code:
+ # I don't know what this means, but we probably
+ # don't need it....
+ continue
+ if int(code) < 128:
+ # Wrong (maps "-" to "$-$", which is too wide) and
+ # unnecessary (maps "a" to "{a}").
+ continue
+ latex_code = n.childNodes[0].nodeValue.encode('ascii').strip()
+ if node.attributes['mode'].value == 'math':
+ math_map[unichr(int(code))] = '$%s$' % latex_code
+ else:
+ text_map[unichr(int(code))] = '{%s}' % latex_code
+
+def call_visitor(node, visitor=Visitor()):
+ if isinstance(node, minidom.Text):
+ name = 'Text'
+ else:
+ name = node.nodeName.replace('#', '_')
+ if hasattr(visitor, 'visit_' + name):
+ getattr(visitor, 'visit_' + name)(node)
+ for child in node.childNodes:
+ call_visitor(child)
+ if hasattr(visitor, 'depart_' + name):
+ getattr(visitor, 'depart_' + name)(node)
+
+document = minidom.parse(sys.stdin)
+call_visitor(document)
+
+unicode_map = math_map
+unicode_map.update(text_map)
+# Now unicode_map contains the text entries plus dollar-enclosed math
+# entries for those chars for which no text entry exists.
+
+print '# Author: Felix Wiemann'
+print '# Contact: Felix_Wiemann@ososo.de'
+print '# Revision: $%s$' % 'Revision'
+print '# Date: $%s$' % 'Date'
+print '# Copyright: This file has been placed in the public domain.'
+print '#'
+print '# This is a mapping of Unicode characters to LaTeX'
+print '# equivalents. The information has been extracted from'
+print '# <http://www.w3.org/2003/entities/xml/unicode.xml>.'
+print '# The extraction has been done by the "create_unimap.py"'
+print '# script written by Felix Wiemann.'
+print
+print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0)