summaryrefslogtreecommitdiff
path: root/docutils/tools/dev
diff options
context:
space:
mode:
authorwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2006-01-09 20:44:25 +0000
committerwiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2006-01-09 20:44:25 +0000
commitd77fdfef70e08114f57cbef5d91707df8717ea9f (patch)
tree49444e3486c0c333cb7b33dfa721296c08ee4ece /docutils/tools/dev
parent53cd16ca6ca5f638cbe5956988e88f9339e355cf (diff)
parent3993c4097756e9885bcfbd07cb1cc1e4e95e50e4 (diff)
downloaddocutils-0.4.tar.gz
Release 0.4: tagging released revisiondocutils-0.4
git-svn-id: http://svn.code.sf.net/p/docutils/code/tags/docutils-0.4@4268 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/tools/dev')
-rw-r--r--docutils/tools/dev/README.txt1
-rwxr-xr-xdocutils/tools/dev/create_unimap.py82
-rwxr-xr-xdocutils/tools/dev/profile_docutils.py41
-rwxr-xr-xdocutils/tools/dev/unicode2rstsubs.py204
4 files changed, 0 insertions, 328 deletions
diff --git a/docutils/tools/dev/README.txt b/docutils/tools/dev/README.txt
deleted file mode 100644
index ca9e99ee8..000000000
--- a/docutils/tools/dev/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-Tools for developers.
diff --git a/docutils/tools/dev/create_unimap.py b/docutils/tools/dev/create_unimap.py
deleted file mode 100755
index 1d1a2f8a0..000000000
--- a/docutils/tools/dev/create_unimap.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-
-# Author: Felix Wiemann
-# Contact: Felix_Wiemann@ososo.de
-# Revision: $Revision$
-# Date: $Date$
-# Copyright: This file has been placed in the public domain.
-
-# Call: create_unimap.py < unicode.xml > unicode_latex.py
-#
-# Get unicode.xml from
-# <http://www.w3.org/2003/entities/xml/unicode.xml>.
-
-from xml.dom import minidom
-import sys
-import pprint
-
-def w(s):
- if isinstance(s, unicode):
- s = s.encode('utf8')
- sys.stdout.write(s)
-
-text_map = {}
-math_map = {}
-
-class Visitor:
-
- """Node visitor for contents of unicode.xml."""
-
- def visit_character(self, node):
- for n in node.childNodes:
- if n.nodeName == 'latex':
- code = node.attributes['dec'].value
- if '-' in code:
- # I don't know what this means, but we probably
- # don't need it....
- continue
- if int(code) < 128:
- # Wrong (maps "-" to "$-$", which is too wide) and
- # unnecessary (maps "a" to "{a}").
- continue
- latex_code = n.childNodes[0].nodeValue.encode('ascii').strip()
- if node.attributes['mode'].value == 'math':
- math_map[unichr(int(code))] = '$%s$' % latex_code
- else:
- text_map[unichr(int(code))] = '{%s}' % latex_code
-
-def call_visitor(node, visitor=Visitor()):
- if isinstance(node, minidom.Text):
- name = 'Text'
- else:
- name = node.nodeName.replace('#', '_')
- if hasattr(visitor, 'visit_' + name):
- getattr(visitor, 'visit_' + name)(node)
- for child in node.childNodes:
- call_visitor(child)
- if hasattr(visitor, 'depart_' + name):
- getattr(visitor, 'depart_' + name)(node)
-
-document = minidom.parse(sys.stdin)
-call_visitor(document)
-
-unicode_map = math_map
-unicode_map.update(text_map)
-# Now unicode_map contains the text entries plus dollar-enclosed math
-# entries for those chars for which no text entry exists.
-
-print '# Author: Felix Wiemann'
-print '# Contact: Felix_Wiemann@ososo.de'
-print '# Revision: $%s$' % 'Revision'
-print '# Date: $%s$' % 'Date'
-print '# Copyright: This file has been placed in the public domain.'
-print
-print '# This is a mapping of Unicode characters to LaTeX equivalents.'
-print '# The information has been extracted from'
-print '# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by'
-print '# David Carlisle and Sebastian Rahtz.'
-print '#'
-print '# The extraction has been done by the "create_unimap.py" script'
-print '# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.'
-print
-print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0)
diff --git a/docutils/tools/dev/profile_docutils.py b/docutils/tools/dev/profile_docutils.py
deleted file mode 100755
index 1f79c655e..000000000
--- a/docutils/tools/dev/profile_docutils.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/python -i
-
-# Author: Felix Wiemann
-# Contact: Felix_Wiemann@ososo.de
-# Revision: $Revision$
-# Date: $Date$
-# Copyright: This script has been placed in the public domain.
-
-import os.path
-import docutils.core
-import hotshot.stats
-
-print 'Profiler started.'
-
-os.chdir(os.path.join(os.path.dirname(docutils.__file__), '..'))
-
-print 'Profiling...'
-
-prof = hotshot.Profile('docutils.prof')
-prof.runcall(docutils.core.publish_file, source_path='HISTORY.txt',
- destination_path='prof.HISTORY.html', writer_name='html')
-prof.close()
-
-print 'Loading statistics...'
-
-print """
-stats = hotshot.stats.load('docutils.prof')
-stats.strip_dirs()
-stats.sort_stats('time') # 'cumulative'; 'calls'
-stats.print_stats(40)
-"""
-
-stats = hotshot.stats.load('docutils.prof')
-stats.strip_dirs()
-stats.sort_stats('time')
-stats.print_stats(40)
-
-try:
- execfile(os.environ['PYTHONSTARTUP'])
-except:
- pass
diff --git a/docutils/tools/dev/unicode2rstsubs.py b/docutils/tools/dev/unicode2rstsubs.py
deleted file mode 100755
index abc85e48b..000000000
--- a/docutils/tools/dev/unicode2rstsubs.py
+++ /dev/null
@@ -1,204 +0,0 @@
-#! /usr/bin/env python
-
-# Author: David Goodger
-# Contact: goodger@python.org
-# Revision: $Revision$
-# Date: $Date$
-# Copyright: This program has been placed in the public domain.
-
-"""
-unicode2subfiles.py -- produce character entity files (reSructuredText
-substitutions) from the W3C master unicode.xml file.
-
-This program extracts character entity and entity set information from a
-unicode.xml file and produces multiple reStructuredText files (in the current
-directory) containing substitutions. Entity sets are from ISO 8879 & ISO
-9573-13 (combined), MathML, and HTML4. One or two files are produced for each
-entity set; a second file with a "-wide.txt" suffix is produced if there are
-wide-Unicode characters in the set.
-
-The input file, unicode.xml, is maintained as part of the MathML 2
-Recommentation XML source, and is available from
-<http://www.w3.org/2003/entities/xml/>.
-"""
-
-import sys
-import os
-import optparse
-import re
-from xml.parsers.expat import ParserCreate
-
-
-usage_msg = """Usage: %s [unicode.xml]"""
-
-def usage(prog, status=0, msg=None):
- print >>sys.stderr, usage_msg % prog
- if msg:
- print >>sys.stderr, msg
- sys.exit(status)
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv
- if len(argv) == 2:
- inpath = argv[1]
- elif len(argv) > 2:
- usage(argv[0], 2,
- 'Too many arguments (%s): only 1 expected.' % (len(argv) - 1))
- else:
- inpath = 'unicode.xml'
- if not os.path.isfile(inpath):
- usage(argv[0], 1, 'No such file: "%s".' % inpath)
- infile = open(inpath)
- process(infile)
-
-def process(infile):
- grouper = CharacterEntitySetExtractor(infile)
- grouper.group()
- grouper.write_sets()
-
-
-class CharacterEntitySetExtractor:
-
- """
- Extracts character entity information from unicode.xml file, groups it by
- entity set, and writes out reStructuredText substitution files.
- """
-
- unwanted_entity_sets = ['stix', # unknown, buggy set
- 'predefined']
-
- header = """\
-.. This data file has been placed in the public domain.
-.. Derived from the Unicode character mappings available from
- <http://www.w3.org/2003/entities/xml/>.
- Processed by unicode2rstsubs.py, part of Docutils:
- <http://docutils.sourceforge.net>.
-"""
-
- def __init__(self, infile):
- self.infile = infile
- """Input unicode.xml file."""
-
- self.parser = self.setup_parser()
- """XML parser."""
-
- self.elements = []
- """Stack of element names. Last is current element."""
-
- self.sets = {}
- """Mapping of charent set name to set dict."""
-
- self.charid = None
- """Current character's "id" attribute value."""
-
- self.descriptions = {}
- """Mapping of character ID to description."""
-
- def setup_parser(self):
- parser = ParserCreate()
- parser.StartElementHandler = self.StartElementHandler
- parser.EndElementHandler = self.EndElementHandler
- parser.CharacterDataHandler = self.CharacterDataHandler
- return parser
-
- def group(self):
- self.parser.ParseFile(self.infile)
-
- def StartElementHandler(self, name, attributes):
- self.elements.append(name)
- handler = name + '_start'
- if hasattr(self, handler):
- getattr(self, handler)(name, attributes)
-
- def EndElementHandler(self, name):
- assert self.elements[-1] == name, \
- 'unknown end-tag %r (%r)' % (name, self.element)
- self.elements.pop()
- handler = name + '_end'
- if hasattr(self, handler):
- getattr(self, handler)(name)
-
- def CharacterDataHandler(self, data):
- handler = self.elements[-1] + '_data'
- if hasattr(self, handler):
- getattr(self, handler)(data)
-
- def character_start(self, name, attributes):
- self.charid = attributes['id']
-
- def entity_start(self, name, attributes):
- set = self.entity_set_name(attributes['set'])
- if not set:
- return
- if not self.sets.has_key(set):
- print 'bad set: %r' % set
- return
- entity = attributes['id']
- assert (not self.sets[set].has_key(entity)
- or self.sets[set][entity] == self.charid), \
- ('sets[%r][%r] == %r (!= %r)'
- % (set, entity, self.sets[set][entity], self.charid))
- self.sets[set][entity] = self.charid
-
- def description_data(self, data):
- self.descriptions.setdefault(self.charid, '')
- self.descriptions[self.charid] += data
-
- entity_set_name_pat = re.compile(r'[0-9-]*(.+)$')
- """Pattern to strip ISO numbers off the beginning of set names."""
-
- def entity_set_name(self, name):
- """
- Return lowcased and standard-number-free entity set name.
- Return ``None`` for unwanted entity sets.
- """
- match = self.entity_set_name_pat.match(name)
- name = match.group(1).lower()
- if name in self.unwanted_entity_sets:
- return None
- self.sets.setdefault(name, {})
- return name
-
- def write_sets(self):
- sets = self.sets.keys()
- sets.sort()
- for set_name in sets:
- self.write_set(set_name)
-
- def write_set(self, set_name, wide=None):
- if wide:
- outname = set_name + '-wide.txt'
- else:
- outname = set_name + '.txt'
- outfile = open(outname, 'w')
- print 'writing file "%s"' % outname
- print >>outfile, self.header
- set = self.sets[set_name]
- entities = [(e.lower(), e) for e in set.keys()]
- entities.sort()
- longest = 0
- for _, entity_name in entities:
- longest = max(longest, len(entity_name))
- has_wide = None
- for _, entity_name in entities:
- has_wide = self.write_entity(
- set, set_name, entity_name, outfile, longest, wide) or has_wide
- if has_wide and not wide:
- self.write_set(set_name, 1)
-
- def write_entity(self, set, set_name, entity_name, outfile, longest,
- wide=None):
- charid = set[entity_name]
- if not wide:
- for code in charid[1:].split('-'):
- if int(code, 16) > 0xFFFF:
- return 1 # wide-Unicode character
- codes = ' '.join(['U+%s' % code for code in charid[1:].split('-')])
- print >>outfile, ('.. %-*s unicode:: %s .. %s'
- % (longest + 2, '|' + entity_name + '|',
- codes, self.descriptions[charid]))
-
-
-if __name__ == '__main__':
- sys.exit(main())