diff options
| author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2011-05-04 20:51:59 +0000 |
|---|---|---|
| committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2011-05-04 20:51:59 +0000 |
| commit | 99da4e158382bb0ac361d6dcd89b9595c7bac674 (patch) | |
| tree | 6815e56a9e64eb07738d9820b0a6c9f9ff5a801a | |
| parent | 2018351fcc57a4be20a5a1065a6d4bd5b0b8c72b (diff) | |
| download | docutils-99da4e158382bb0ac361d6dcd89b9595c7bac674.tar.gz | |
More robust guess of input/output encoding.
The locale encoding is stored as `frontend.locale_encoding` and
used in command line parsing and `default_error_encoding` before falling
back to "ascii".
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7023 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
| -rw-r--r-- | docutils/docs/dev/todo.txt | 39 | ||||
| -rw-r--r-- | docutils/docutils/core.py | 9 | ||||
| -rw-r--r-- | docutils/docutils/frontend.py | 21 | ||||
| -rw-r--r-- | docutils/docutils/utils.py | 8 |
4 files changed, 48 insertions, 29 deletions
diff --git a/docutils/docs/dev/todo.txt b/docutils/docs/dev/todo.txt index 331a53ff3..dbc66b710 100644 --- a/docutils/docs/dev/todo.txt +++ b/docutils/docs/dev/todo.txt @@ -72,9 +72,26 @@ for inclusion in the Python standard library. General ======= -* We don't have consistent (or no) encoding handling for command line - arguments. See - <http://thread.gmane.org/gmane.text.docutils.user/2890/focus=2957>. +* Encoding of command line arguments can only be guessed: + + * try UTF-8/strict first, then try the locale's encoding with + strict error handling, then ASCII/replace? + + UTF-8 is almost 100% safe to try first; false positives are rare, + The locale's encoding with strict error handling may be a + reasonable compromise, but any error would indicate that the + locale's encoding is inappropriate. The only safe fallback is + ASCII/replace. + + * Do not decode argv before option parsing but individual string + values? + + +1 Allows for separate command-line vs. filesystem encodings, + respectively to keep file names encoded. + +1 Allows to configure command-line encoding in a config file, + -1 More complicated. + + Cf. <http://thread.gmane.org/gmane.text.docutils.user/2890/focus=2957>. * Improve handling on Windows: @@ -1232,14 +1249,14 @@ Which equation environments should be supported by the math directive? + numbered: `equation` + unnumbered: `equation*` -* multiline (test for ``\\`` outside of a nested environment +* multiline (test for ``\\`` outside of a nested environment (e.g. `array` or `cases`) + numbered: `align` (number every line) - + (To give one common number to all lines, put them in a `split` environment. Docutils then places it in an `equation` environment.) - + + unnumbered: `align*` + Sphinx math also supports `gather` (checking for blank lines in @@ -1284,10 +1301,10 @@ MathML_ latex_math_ is the base for the current latex2mathml_ module used with ``--math-output=MathML``. - + * Write a new converter based on: - - * a generic tokenizer (see e.g. a `latex-codec recipe`_, + + * a generic tokenizer (see e.g. a `latex-codec recipe`_, `updated latex-codec`_, ) * the Unicode-Char <-> LaTeX mappings database unimathsymbols_ @@ -1303,9 +1320,9 @@ MathML_ .. _ttm: http://hutchinson.belmont.ma.us/tth/mml/ .. _Steve’s LATEX-to-MathML translator: http://www.gold-saucer.org/mathml/greasemonkey/dist/display-latex - .. _latex-codec recipe: + .. _latex-codec recipe: http://code.activestate.com/recipes/252124-latex-codec/ - .. _updated latex-codec: + .. _updated latex-codec: http://mirror.ctan.org/biblio/bibtex/utils/mab2bib/latex.py .. _unimathsymbols: http://milde.users.sourceforge.net/LUCR/Math/ diff --git a/docutils/docutils/core.py b/docutils/docutils/core.py index 21f8d54be..0484a153f 100644 --- a/docutils/docutils/core.py +++ b/docutils/docutils/core.py @@ -22,13 +22,6 @@ from docutils.frontend import OptionParser from docutils.transforms import Transformer import docutils.readers.doctree -try: - import locale - argv_encoding = locale.getpreferredencoding() -except: - argv_encoding = 'ascii' - - class Publisher: """ @@ -156,6 +149,8 @@ class Publisher: option_parser = self.setup_option_parser( usage, description, settings_spec, config_section, **defaults) if argv is None: + argv_encoding = (sys.stdin.encoding or frontend.locale_encoding + or 'ascii') argv = [a.decode(argv_encoding) for a in sys.argv[1:]] self.settings = option_parser.parse_args(argv) diff --git a/docutils/docutils/frontend.py b/docutils/docutils/frontend.py index 00c340a87..819a168e3 100644 --- a/docutils/docutils/frontend.py +++ b/docutils/docutils/frontend.py @@ -39,6 +39,21 @@ import docutils.nodes import optparse from optparse import SUPPRESS_HELP +# Guess the locale's encoding. +# If no valid guess can be made, locale_encoding is set to `None`: +try: + import locale # module missing in Jython +except ImportError: + locale_encoding = None +else: + locale_encoding = locale.getlocale()[1] or locale.getdefaultlocale()[1] + # locale.getpreferredencoding([do_setlocale=True|False]) + # has side-effects | might return a wrong guess. + # (cf. Update 1 in http://stackoverflow.com/questions/4082645/using-python-2-xs-locale-module-to-format-numbers-and-currency) + try: + codecs.lookup(locale_encoding) + except LookupError: + locale_encoding = None def store_multiple(option, opt, value, parser, *args, **kwargs): """ @@ -313,10 +328,8 @@ class OptionParser(optparse.OptionParser, docutils.SettingsSpec): '0': 0, 'off': 0, 'no': 0, 'false': 0, '': 0} """Lookup table for boolean configuration file settings.""" - try: - default_error_encoding = sys.stderr.encoding or 'ascii' - except AttributeError: - default_error_encoding = 'ascii' + default_error_encoding = getattr(sys.stderr, 'encoding', + None) or locale_encoding or 'ascii' default_error_encoding_error_handler = 'backslashreplace' diff --git a/docutils/docutils/utils.py b/docutils/docutils/utils.py index f1320868c..a5d75734c 100644 --- a/docutils/docutils/utils.py +++ b/docutils/docutils/utils.py @@ -116,13 +116,7 @@ class Reporter: self.stream = stream """Where warning output is sent.""" - if encoding is None: - try: - encoding = stream.encoding - except AttributeError: - pass - - self.encoding = encoding or 'ascii' + self.encoding = encoding or getattr(stream, 'encoding', 'ascii') """The output character encoding.""" self.observers = [] |
