diff options
| author | wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2005-06-20 19:20:00 +0000 |
|---|---|---|
| committer | wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2005-06-20 19:20:00 +0000 |
| commit | 71c59a6b9569fc2aafc4d4e41c21cca04b29cd65 (patch) | |
| tree | 80799f6b82a57e2ede2edba5da6e2ceb8d4965c5 /tools/dev/create_unimap.py | |
| parent | fbc14fc4439455bc8726051604c4148ed14e29ee (diff) | |
| download | docutils-71c59a6b9569fc2aafc4d4e41c21cca04b29cd65.tar.gz | |
moved create_unimap.py to tools/dev/
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3534 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'tools/dev/create_unimap.py')
| -rwxr-xr-x | tools/dev/create_unimap.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/tools/dev/create_unimap.py b/tools/dev/create_unimap.py new file mode 100755 index 000000000..260913ed3 --- /dev/null +++ b/tools/dev/create_unimap.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Author: Felix Wiemann +# Contact: Felix_Wiemann@ososo.de +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This file has been placed in the public domain. + +# Call: create_unimap.py < unicode.xml > unicode_latex.py +# +# Get unicode.xml from +# <http://www.w3.org/2003/entities/xml/unicode.xml>. + +from xml.dom import minidom +import sys +import pprint + +def w(s): + if isinstance(s, unicode): + s = s.encode('utf8') + sys.stdout.write(s) + +text_map = {} +math_map = {} + +class Visitor: + + """Node visitor for contents of unicode.xml.""" + + def visit_character(self, node): + for n in node.childNodes: + if n.nodeName == 'latex': + code = node.attributes['dec'].value + if '-' in code: + # I don't know what this means, but we probably + # don't need it.... + continue + if int(code) < 128: + # Wrong (maps "-" to "$-$", which is too wide) and + # unnecessary (maps "a" to "{a}"). + continue + latex_code = n.childNodes[0].nodeValue.encode('ascii').strip() + if node.attributes['mode'].value == 'math': + math_map[unichr(int(code))] = '$%s$' % latex_code + else: + text_map[unichr(int(code))] = '{%s}' % latex_code + +def call_visitor(node, visitor=Visitor()): + if isinstance(node, minidom.Text): + name = 'Text' + else: + name = node.nodeName.replace('#', '_') + if hasattr(visitor, 'visit_' + name): + getattr(visitor, 'visit_' + name)(node) + for child in node.childNodes: + call_visitor(child) + if hasattr(visitor, 'depart_' + name): + getattr(visitor, 'depart_' + name)(node) + +document = minidom.parse(sys.stdin) +call_visitor(document) + +unicode_map = math_map +unicode_map.update(text_map) +# Now unicode_map contains the text entries plus dollar-enclosed math +# entries for those chars for which no text entry exists. + +print '# Author: Felix Wiemann' +print '# Contact: Felix_Wiemann@ososo.de' +print '# Revision: $%s$' % 'Revision' +print '# Date: $%s$' % 'Date' +print '# Copyright: This file has been placed in the public domain.' +print '#' +print '# This is a mapping of Unicode characters to LaTeX' +print '# equivalents. The information has been extracted from' +print '# <http://www.w3.org/2003/entities/xml/unicode.xml>.' +print '# The extraction has been done by the "create_unimap.py"' +print '# script written by Felix Wiemann.' +print +print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0) |
