docutils.utils is now a package (providing a place for sub-modules)

important:: docutils/math, docutils/error_reporting.py, and docutils/urischemes.py will move to the utils package in the next release, too. Code importing these modules needs to adapt. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7267 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2011-12-20 14:14:21 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2011-12-20 14:14:21 +0000
commit: 954b32789e031834acca7e28bbb35f310051d095 (patch)
tree: 4129b67e382a2dc0f9ffcbe6df8ba971f8e3aa65 /docutils/parsers/rst
parent: 87805e2274e7ec5c4ab1aaf3f2e6ea8b37e459b6 (diff)
download: docutils-954b32789e031834acca7e28bbb35f310051d095.tar.gz
4 files changed, 5 insertions, 215 deletions
diff --git a/docutils/parsers/rst/directives/body.py b/docutils/parsers/rst/directives/body.py
index 8f6d4360c..4ff9fdc05 100644
--- a/docutils/parsers/rst/directives/body.py
+++ b/docutils/parsers/rst/directives/body.py
@@ -16,7 +16,7 @@ from docutils import nodes
 from docutils.parsers.rst import Directive
 from docutils.parsers.rst import directives
 from docutils.parsers.rst.roles import set_classes
-from docutils.parsers.code_analyzer import Lexer, LexerError, NumberLines
+from docutils.utils.code_analyzer import Lexer, LexerError, NumberLines
 
 class BasePseudoSection(Directive):
 
diff --git a/docutils/parsers/rst/punctuation_chars.py b/docutils/parsers/rst/punctuation_chars.py
deleted file mode 100644
index b8dbe2b43..000000000
--- a/docutils/parsers/rst/punctuation_chars.py
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf8 -*-
-# :Copyright: © 2011 Günter Milde.
-# :License: Released under the terms of the `2-Clause BSD license`_, in short:
-#
-#    Copying and distribution of this file, with or without modification,
-#    are permitted in any medium without royalty provided the copyright
-#    notice and this notice are preserved.
-#    This file is offered as-is, without any warranty.
-#
-# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
-
-# :Id: $Id$
-
-import sys, re
-import unicodedata
-
-# punctuation characters around inline markup
-# ===========================================
-#
-# This module provides the lists of characters for the implementation of
-# the `inline markup recognition rules`_ in the reStructuredText parser
-# (states.py)
-#
-# .. _inline markup recognition rules:
-#     ../../../docs/ref/rst/restructuredtext.html#inline-markup
-
-# Docutils punctuation category sample strings
-# --------------------------------------------
-#
-# The sample strings are generated by punctuation_samples() and put here
-# literal to avoid the time-consuming generation with every Docutils
-# run. Running this file as a standalone module checks the definitions below
-# against a re-calculation.
-
-openers = ur"""\"\'\(\<\[\{༺༼᚛⁅⁽₍〈❨❪❬❮❰❲❴⟅⟦⟨⟪⟬⟮⦃⦅⦇⦉⦋⦍⦏⦑⦓⦕⦗⧘⧚⧼⸢⸤⸦⸨〈《「『【〔〖〘〚〝〝﴾︗︵︷︹︻︽︿﹁﹃﹇﹙﹛﹝（［｛｟｢«‘“‹⸂⸄⸉⸌⸜⸠‚„»’”›⸃⸅⸊⸍⸝⸡‛‟"""
-closers = ur"""\"\'\)\>\]\}༻༽᚜⁆⁾₎〉❩❫❭❯❱❳❵⟆⟧⟩⟫⟭⟯⦄⦆⦈⦊⦌⦎⦐⦒⦔⦖⦘⧙⧛⧽⸣⸥⸧⸩〉》」』】〕〗〙〛〞〟﴿︘︶︸︺︼︾﹀﹂﹄﹈﹚﹜﹞）］｝｠｣»’”›⸃⸅⸊⸍⸝⸡‛‟«‘“‹⸂⸄⸉⸌⸜⸠‚„"""
-delimiters = ur"\-\/\:֊־᐀᠆‐‑‒–—―⸗⸚〜〰゠︱︲﹘﹣－¡·¿;·՚՛՜՝՞՟։׀׃׆׳״؉؊،؍؛؞؟٪٫٬٭۔܀܁܂܃܄܅܆܇܈܉܊܋܌܍߷߸߹࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾।॥॰෴๏๚๛༄༅༆༇༈༉༊་༌།༎༏༐༑༒྅࿐࿑࿒࿓࿔၊။၌၍၎၏჻፡።፣፤፥፦፧፨᙭᙮᛫᛬᛭᜵᜶។៕៖៘៙៚᠀᠁᠂᠃᠄᠅᠇᠈᠉᠊᥄᥅᧞᧟᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᰻᰼᰽᰾᰿᱾᱿᳓‖‗†‡•‣․‥…‧‰‱′″‴‵‶‷‸※‼‽‾⁁⁂⁃⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁓⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⳹⳺⳻⳼⳾⳿⸀⸁⸆⸇⸈⸋⸎⸏⸐⸑⸒⸓⸔⸕⸖⸘⸙⸛⸞⸟⸪⸫⸬⸭⸮⸰⸱、。〃〽・꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꡴꡵꡶꡷꣎꣏꣸꣹꣺꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꫞꫟꯫︐︑︒︓︔︕︖︙︰﹅﹆﹉﹊﹋﹌﹐﹑﹒﹔﹕﹖﹗﹟﹠﹡﹨﹪﹫！＂＃％＆＇＊，．／：；？＠＼｡､･𐄀𐄁𐎟𐏐𐡗𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐬹𐬺𐬻𐬼𐬽𐬾𐬿𑂻𑂼𑂾𑂿𑃀𑃁𒑰𒑱𒑲𒑳"
-closing_delimiters = ur"\.\,\;\!\?"
-
-
-# Unicode punctuation character categories
-# ----------------------------------------
-
-unicode_punctuation_categories = {
-    # 'Pc': 'Connector', # not used in Docutils inline markup recognition
-    'Pd': 'Dash',
-    'Ps': 'Open',
-    'Pe': 'Close',
-    'Pi': 'Initial quote', # may behave like Ps or Pe depending on usage
-    'Pf': 'Final quote', # may behave like Ps or Pe depending on usage
-    'Po': 'Other'
-    }
-"""Unicode character categories for punctuation"""
-
-
-# generate character pattern strings
-# ==================================
-
-def unicode_charlists(categories, cp_min=0, cp_max=None):
-    """Return dictionary of Unicode character lists.
-
-    For each of the `catagories`, an item contains a list with all Unicode
-    characters with `cp_min` <= code-point <= `cp_max` that belong to the
-    category. (The default values check every code-point supported by Python.)
-    """
-    # Determine highest code point with one of the given categories
-    # (may shorten the search time considerably if there are many
-    # categories with not too high characters):
-    if cp_max is None:
-        cp_max = max(x for x in xrange(sys.maxunicode + 1)
-                     if unicodedata.category(unichr(x)) in categories)
-        # print cp_max # => 74867 for unicode_punctuation_categories
-    charlists = {}
-    for cat in categories:
-        charlists[cat] = [unichr(x) for x in xrange(cp_min, cp_max+1)
-                          if unicodedata.category(unichr(x)) == cat]
-    return charlists
-
-
-# Character categories in Docutils
-# --------------------------------
-
-def punctuation_samples():
-
-    """Docutils punctuation category sample strings.
-
-    Return list of sample strings for the categories "Open", "Close",
-    "Delimiters" and "Closing-Delimiters" used in the `inline markup
-    recognition rules`_.
-    """
-
-    # Lists with characters in Unicode punctuation character categories
-    cp_min = 160 # ASCII chars have special rules for backwards compatibility
-    ucharlists = unicode_charlists(unicode_punctuation_categories, cp_min)
-
-    # match opening/closing characters
-    # --------------------------------
-    # Rearange the lists to ensure matching characters at the same
-    # index position.
-
-    # low quotation marks are also used as closers (e.g. in Greek)
-    # move them to category Pi:
-    ucharlists['Ps'].remove(u'‚') # 201A  SINGLE LOW-9 QUOTATION MARK
-    ucharlists['Ps'].remove(u'„') # 201E  DOUBLE LOW-9 QUOTATION MARK
-    ucharlists['Pi'] += [u'‚', u'„']
-
-    ucharlists['Pi'].remove(u'‛') # 201B  SINGLE HIGH-REVERSED-9 QUOTATION MARK
-    ucharlists['Pi'].remove(u'‟') # 201F  DOUBLE HIGH-REVERSED-9 QUOTATION MARK
-    ucharlists['Pf'] += [u'‛', u'‟']
-
-    # 301F  LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant:
-    ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d')
-
-    # print u''.join(ucharlists['Ps']).encode('utf8')
-    # print u''.join(ucharlists['Pe']).encode('utf8')
-    # print u''.join(ucharlists['Pi']).encode('utf8')
-    # print u''.join(ucharlists['Pf']).encode('utf8')
-
-    # The Docutils character categories
-    # ---------------------------------
-    #
-    # The categorization of ASCII chars is non-standard to reduce both
-    # false positives and need for escaping. (see `inline markup recognition
-    # rules`_)
-
-    # matching, allowed before markup
-    openers = [re.escape('"\'(<[{')]
-    for cat in ('Ps', 'Pi', 'Pf'):
-        openers.extend(ucharlists[cat])
-
-    # matching, allowed after markup
-    closers = [re.escape('"\')>]}')]
-    for cat in ('Pe', 'Pf', 'Pi'):
-        closers.extend(ucharlists[cat])
-
-    # non-matching, allowed on both sides
-    delimiters = [re.escape('-/:')]
-    for cat in ('Pd', 'Po'):
-        delimiters.extend(ucharlists[cat])
-
-    # non-matching, after markup
-    closing_delimiters = [re.escape('.,;!?')]
-
-    # # Test open/close matching:
-    # for i in range(min(len(openers),len(closers))):
-    #     print '%4d    %s    %s' % (i, openers[i].encode('utf8'),
-    #                                closers[i].encode('utf8'))
-
-    return [u''.join(chars)
-            for chars in (openers, closers, delimiters, closing_delimiters)]
-
-
-# Matching open/close quotes
-# --------------------------
-
-# Rule (5) requires determination of matching open/close pairs. However,
-# the pairing of open/close quotes is ambigue due to  different typographic
-# conventions in different languages.
-
-quote_pairs = {u'\xbb': u'\xbb', # Swedish
-               u'\u2018': u'\u201a', # Greek
-               u'\u2019': u'\u2019', # Swedish
-               u'\u201a': u'\u2018\u2019', # German, Polish
-               u'\u201c': u'\u201e', # German
-               u'\u201e': u'\u201c\u201d',
-               u'\u201d': u'\u201d', # Swedish
-               u'\u203a': u'\u203a', # Swedish
-              }
-
-def match_chars(c1, c2):
-    try:
-        i = openers.index(c1)
-    except ValueError:  # c1 not in openers
-        return False
-    return c2 == closers[i] or c2 in quote_pairs.get(c1, '')
-
-
-
-
-# print results
-# =============
-
-if __name__ == '__main__':
-
-    # (re) create and compare the samples:
-    (o, c, d, cd) = punctuation_samples()
-    if o != openers:
-        print '- openers = ur"""%s"""' % openers.encode('utf8')
-        print '+ openers = ur"""%s"""' % o.encode('utf8')
-    if c != closers:
-        print '- closers = ur"""%s"""' % closers.encode('utf8')
-        print '+ closers = ur"""%s"""' % c.encode('utf8')
-    if d != delimiters:
-        print '- delimiters = ur"%s"' % delimiters.encode('utf8')
-        print '+ delimiters = ur"%s"' % d.encode('utf8')
-    if cd != closing_delimiters:
-        print '- closing_delimiters = ur"%s"' % closing_delimiters.encode('utf8')
-        print '+ closing_delimiters = ur"%s"' % cd.encode('utf8')
-
-    # # test prints
-    # print 'openers = ', repr(openers)
-    # print 'closers = ', repr(closers)
-    # print 'delimiters = ', repr(delimiters)
-    # print 'closing_delimiters = ', repr(closing_delimiters)
-
-    # ucharlists = unicode_charlists(unicode_punctuation_categories)
-    # for cat, chars in ucharlists.items():
-    #     # print cat, chars
-    #     # compact output (visible with a comprehensive font):
-    #     print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
diff --git a/docutils/parsers/rst/roles.py b/docutils/parsers/rst/roles.py
index 1a8b0bf75..9b510fd30 100644
--- a/docutils/parsers/rst/roles.py
+++ b/docutils/parsers/rst/roles.py
@@ -75,7 +75,7 @@ __docformat__ = 'reStructuredText'
 from docutils import nodes, utils
 from docutils.parsers.rst import directives
 from docutils.parsers.rst.languages import en as _fallback_language_module
-from docutils.parsers.code_analyzer import Lexer, LexerError
+from docutils.utils.code_analyzer import Lexer, LexerError
 
 DEFAULT_INTERPRETED_ROLE = 'title-reference'
 """
diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py
index 556fac783..8bb1558f1 100644
--- a/docutils/parsers/rst/states.py
+++ b/docutils/parsers/rst/states.py
@@ -107,16 +107,17 @@ import sys
 import re
 import roman
 from types import FunctionType, MethodType
+
 from docutils import nodes, statemachine, utils, urischemes
 from docutils import ApplicationError, DataError
 from docutils.statemachine import StateMachineWS, StateWS
 from docutils.nodes import fully_normalize_name as normalize_name
 from docutils.nodes import whitespace_normalize_name
-from docutils.utils import escape2null, unescape, column_width
 import docutils.parsers.rst
 from docutils.parsers.rst import directives, languages, tableparser, roles
 from docutils.parsers.rst.languages import en as _fallback_language_module
-from docutils.parsers.rst import punctuation_chars
+from docutils.utils import escape2null, unescape, column_width
+from docutils.utils import punctuation_chars
 
 class MarkupError(DataError): pass
 class UnknownInterpretedRoleError(DataError): pass
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2011-12-20 14:14:21 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2011-12-20 14:14:21 +0000
commit	954b32789e031834acca7e28bbb35f310051d095 (patch)
tree	4129b67e382a2dc0f9ffcbe6df8ba971f8e3aa65 /docutils/parsers/rst
parent	87805e2274e7ec5c4ab1aaf3f2e6ea8b37e459b6 (diff)
download	docutils-954b32789e031834acca7e28bbb35f310051d095.tar.gz