diff options
| author | wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2005-06-20 19:20:00 +0000 |
|---|---|---|
| committer | wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2005-06-20 19:20:00 +0000 |
| commit | 5339c5492b96fcdb60d05b66006c615edae20215 (patch) | |
| tree | b294fb7565b9b8575f572591a2edb5abb436ac49 /docutils/tools | |
| parent | 8823c435fe816c5b0108e870a6ffbf15ba129eb2 (diff) | |
| download | docutils-5339c5492b96fcdb60d05b66006c615edae20215.tar.gz | |
moved create_unimap.py to tools/dev/
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@3534 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/tools')
| -rwxr-xr-x | docutils/tools/dev/create_unimap.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/docutils/tools/dev/create_unimap.py b/docutils/tools/dev/create_unimap.py new file mode 100755 index 000000000..260913ed3 --- /dev/null +++ b/docutils/tools/dev/create_unimap.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Author: Felix Wiemann +# Contact: Felix_Wiemann@ososo.de +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This file has been placed in the public domain. + +# Call: create_unimap.py < unicode.xml > unicode_latex.py +# +# Get unicode.xml from +# <http://www.w3.org/2003/entities/xml/unicode.xml>. + +from xml.dom import minidom +import sys +import pprint + +def w(s): + if isinstance(s, unicode): + s = s.encode('utf8') + sys.stdout.write(s) + +text_map = {} +math_map = {} + +class Visitor: + + """Node visitor for contents of unicode.xml.""" + + def visit_character(self, node): + for n in node.childNodes: + if n.nodeName == 'latex': + code = node.attributes['dec'].value + if '-' in code: + # I don't know what this means, but we probably + # don't need it.... + continue + if int(code) < 128: + # Wrong (maps "-" to "$-$", which is too wide) and + # unnecessary (maps "a" to "{a}"). + continue + latex_code = n.childNodes[0].nodeValue.encode('ascii').strip() + if node.attributes['mode'].value == 'math': + math_map[unichr(int(code))] = '$%s$' % latex_code + else: + text_map[unichr(int(code))] = '{%s}' % latex_code + +def call_visitor(node, visitor=Visitor()): + if isinstance(node, minidom.Text): + name = 'Text' + else: + name = node.nodeName.replace('#', '_') + if hasattr(visitor, 'visit_' + name): + getattr(visitor, 'visit_' + name)(node) + for child in node.childNodes: + call_visitor(child) + if hasattr(visitor, 'depart_' + name): + getattr(visitor, 'depart_' + name)(node) + +document = minidom.parse(sys.stdin) +call_visitor(document) + +unicode_map = math_map +unicode_map.update(text_map) +# Now unicode_map contains the text entries plus dollar-enclosed math +# entries for those chars for which no text entry exists. + +print '# Author: Felix Wiemann' +print '# Contact: Felix_Wiemann@ososo.de' +print '# Revision: $%s$' % 'Revision' +print '# Date: $%s$' % 'Date' +print '# Copyright: This file has been placed in the public domain.' +print '#' +print '# This is a mapping of Unicode characters to LaTeX' +print '# equivalents. The information has been extracted from' +print '# <http://www.w3.org/2003/entities/xml/unicode.xml>.' +print '# The extraction has been done by the "create_unimap.py"' +print '# script written by Felix Wiemann.' +print +print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0) |
