From 71c59a6b9569fc2aafc4d4e41c21cca04b29cd65 Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 20 Jun 2005 19:20:00 +0000 Subject: moved create_unimap.py to tools/dev/ git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3534 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- tools/dev/create_unimap.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 tools/dev/create_unimap.py (limited to 'tools/dev/create_unimap.py') diff --git a/tools/dev/create_unimap.py b/tools/dev/create_unimap.py new file mode 100755 index 000000000..260913ed3 --- /dev/null +++ b/tools/dev/create_unimap.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Author: Felix Wiemann +# Contact: Felix_Wiemann@ososo.de +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This file has been placed in the public domain. + +# Call: create_unimap.py < unicode.xml > unicode_latex.py +# +# Get unicode.xml from +# . + +from xml.dom import minidom +import sys +import pprint + +def w(s): + if isinstance(s, unicode): + s = s.encode('utf8') + sys.stdout.write(s) + +text_map = {} +math_map = {} + +class Visitor: + + """Node visitor for contents of unicode.xml.""" + + def visit_character(self, node): + for n in node.childNodes: + if n.nodeName == 'latex': + code = node.attributes['dec'].value + if '-' in code: + # I don't know what this means, but we probably + # don't need it.... + continue + if int(code) < 128: + # Wrong (maps "-" to "$-$", which is too wide) and + # unnecessary (maps "a" to "{a}"). + continue + latex_code = n.childNodes[0].nodeValue.encode('ascii').strip() + if node.attributes['mode'].value == 'math': + math_map[unichr(int(code))] = '$%s$' % latex_code + else: + text_map[unichr(int(code))] = '{%s}' % latex_code + +def call_visitor(node, visitor=Visitor()): + if isinstance(node, minidom.Text): + name = 'Text' + else: + name = node.nodeName.replace('#', '_') + if hasattr(visitor, 'visit_' + name): + getattr(visitor, 'visit_' + name)(node) + for child in node.childNodes: + call_visitor(child) + if hasattr(visitor, 'depart_' + name): + getattr(visitor, 'depart_' + name)(node) + +document = minidom.parse(sys.stdin) +call_visitor(document) + +unicode_map = math_map +unicode_map.update(text_map) +# Now unicode_map contains the text entries plus dollar-enclosed math +# entries for those chars for which no text entry exists. + +print '# Author: Felix Wiemann' +print '# Contact: Felix_Wiemann@ososo.de' +print '# Revision: $%s$' % 'Revision' +print '# Date: $%s$' % 'Date' +print '# Copyright: This file has been placed in the public domain.' +print '#' +print '# This is a mapping of Unicode characters to LaTeX' +print '# equivalents. The information has been extracted from' +print '# .' +print '# The extraction has been done by the "create_unimap.py"' +print '# script written by Felix Wiemann.' +print +print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0) -- cgit v1.2.1 From d3a60d28fa5ef090e66b3db54069811efc05afe6 Mon Sep 17 00:00:00 2001 From: wiemann Date: Mon, 20 Jun 2005 19:29:02 +0000 Subject: changed comment at the top of unicode_latex.py git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@3535 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- tools/dev/create_unimap.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/dev/create_unimap.py') diff --git a/tools/dev/create_unimap.py b/tools/dev/create_unimap.py index 260913ed3..1d1a2f8a0 100755 --- a/tools/dev/create_unimap.py +++ b/tools/dev/create_unimap.py @@ -70,11 +70,13 @@ print '# Contact: Felix_Wiemann@ososo.de' print '# Revision: $%s$' % 'Revision' print '# Date: $%s$' % 'Date' print '# Copyright: This file has been placed in the public domain.' +print +print '# This is a mapping of Unicode characters to LaTeX equivalents.' +print '# The information has been extracted from' +print '# , written by' +print '# David Carlisle and Sebastian Rahtz.' print '#' -print '# This is a mapping of Unicode characters to LaTeX' -print '# equivalents. The information has been extracted from' -print '# .' -print '# The extraction has been done by the "create_unimap.py"' -print '# script written by Felix Wiemann.' +print '# The extraction has been done by the "create_unimap.py" script' +print '# located at .' print print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0) -- cgit v1.2.1