diff options
author | wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2006-01-09 20:44:25 +0000 |
---|---|---|
committer | wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2006-01-09 20:44:25 +0000 |
commit | d77fdfef70e08114f57cbef5d91707df8717ea9f (patch) | |
tree | 49444e3486c0c333cb7b33dfa721296c08ee4ece /tools/dev/create_unimap.py | |
parent | 53cd16ca6ca5f638cbe5956988e88f9339e355cf (diff) | |
parent | 3993c4097756e9885bcfbd07cb1cc1e4e95e50e4 (diff) | |
download | docutils-0.4.tar.gz |
Release 0.4: tagging released revisiondocutils-0.4
git-svn-id: http://svn.code.sf.net/p/docutils/code/tags/docutils-0.4@4268 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'tools/dev/create_unimap.py')
-rwxr-xr-x | tools/dev/create_unimap.py | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/tools/dev/create_unimap.py b/tools/dev/create_unimap.py new file mode 100755 index 000000000..1d1a2f8a0 --- /dev/null +++ b/tools/dev/create_unimap.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# Author: Felix Wiemann +# Contact: Felix_Wiemann@ososo.de +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This file has been placed in the public domain. + +# Call: create_unimap.py < unicode.xml > unicode_latex.py +# +# Get unicode.xml from +# <http://www.w3.org/2003/entities/xml/unicode.xml>. + +from xml.dom import minidom +import sys +import pprint + +def w(s): + if isinstance(s, unicode): + s = s.encode('utf8') + sys.stdout.write(s) + +text_map = {} +math_map = {} + +class Visitor: + + """Node visitor for contents of unicode.xml.""" + + def visit_character(self, node): + for n in node.childNodes: + if n.nodeName == 'latex': + code = node.attributes['dec'].value + if '-' in code: + # I don't know what this means, but we probably + # don't need it.... + continue + if int(code) < 128: + # Wrong (maps "-" to "$-$", which is too wide) and + # unnecessary (maps "a" to "{a}"). + continue + latex_code = n.childNodes[0].nodeValue.encode('ascii').strip() + if node.attributes['mode'].value == 'math': + math_map[unichr(int(code))] = '$%s$' % latex_code + else: + text_map[unichr(int(code))] = '{%s}' % latex_code + +def call_visitor(node, visitor=Visitor()): + if isinstance(node, minidom.Text): + name = 'Text' + else: + name = node.nodeName.replace('#', '_') + if hasattr(visitor, 'visit_' + name): + getattr(visitor, 'visit_' + name)(node) + for child in node.childNodes: + call_visitor(child) + if hasattr(visitor, 'depart_' + name): + getattr(visitor, 'depart_' + name)(node) + +document = minidom.parse(sys.stdin) +call_visitor(document) + +unicode_map = math_map +unicode_map.update(text_map) +# Now unicode_map contains the text entries plus dollar-enclosed math +# entries for those chars for which no text entry exists. + +print '# Author: Felix Wiemann' +print '# Contact: Felix_Wiemann@ososo.de' +print '# Revision: $%s$' % 'Revision' +print '# Date: $%s$' % 'Date' +print '# Copyright: This file has been placed in the public domain.' +print +print '# This is a mapping of Unicode characters to LaTeX equivalents.' +print '# The information has been extracted from' +print '# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by' +print '# David Carlisle and Sebastian Rahtz.' +print '#' +print '# The extraction has been done by the "create_unimap.py" script' +print '# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.' +print +print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0) |