diff options
| author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2010-09-17 21:38:29 +0000 |
|---|---|---|
| committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2010-09-17 21:38:29 +0000 |
| commit | c346595d2e87faf2db2ebc096b1eec0bc260de8a (patch) | |
| tree | ccd1be8b591a95857ad614bdf1230b9b48e1740e /docutils | |
| parent | a8a145d40efbb9f0280dcd99d408c281d7c38149 (diff) | |
| download | docutils-c346595d2e87faf2db2ebc096b1eec0bc260de8a.tar.gz | |
Handle language codes according to BCP 47
(Best Current Praxis: Tags for Identifying Languages).
Normalize language tags: downcase, - to _, optional subtags.
E.g. Given "de-AT-1901", look for language modules
"de_at_1901", "de_at", "de_1901", and "de"
TODO: warn (instead of fail), if no module found.
git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk/docutils@6423 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
| -rw-r--r-- | docutils/frontend.py | 2 | ||||
| -rw-r--r-- | docutils/languages/__init__.py | 19 | ||||
| -rw-r--r-- | docutils/parsers/rst/languages/__init__.py | 20 | ||||
| -rw-r--r-- | docutils/utils.py | 33 |
4 files changed, 59 insertions, 15 deletions
diff --git a/docutils/frontend.py b/docutils/frontend.py index c958ce844..82a221943 100644 --- a/docutils/frontend.py +++ b/docutils/frontend.py @@ -464,7 +464,7 @@ class OptionParser(optparse.OptionParser, docutils.SettingsSpec): ['--error-encoding-error-handler'], {'default': default_error_encoding_error_handler, 'validator': validate_encoding_error_handler}), - ('Specify the language (as 2-letter code). Default: en.', + ('Specify the language (as BCP 47 language tag). Default: en.', ['--language', '-l'], {'dest': 'language_code', 'default': 'en', 'metavar': '<name>'}), ('Write output file dependencies to <file>.', diff --git a/docutils/languages/__init__.py b/docutils/languages/__init__.py index 7d6521fc1..c61871b90 100644 --- a/docutils/languages/__init__.py +++ b/docutils/languages/__init__.py @@ -11,11 +11,20 @@ This package contains modules for language-dependent features of Docutils. __docformat__ = 'reStructuredText' +from docutils.utils import normalize_language_tag + _languages = {} def get_language(language_code): - if language_code in _languages: - return _languages[language_code] - module = __import__(language_code, globals(), locals()) - _languages[language_code] = module - return module + for tag in normalize_language_tag(language_code): + if tag in _languages: + return _languages[tag] + try: + module = __import__(tag, globals(), locals()) + except ImportError: + continue + _languages[tag] = module + return module + # TODO: use Docutils reporter, warn instead of fail + # warn ('language "%s" not supported' % tag) + raise diff --git a/docutils/parsers/rst/languages/__init__.py b/docutils/parsers/rst/languages/__init__.py index 962802245..53017d75e 100644 --- a/docutils/parsers/rst/languages/__init__.py +++ b/docutils/parsers/rst/languages/__init__.py @@ -12,14 +12,18 @@ reStructuredText. __docformat__ = 'reStructuredText' +from docutils.utils import normalize_language_tag + _languages = {} def get_language(language_code): - if language_code in _languages: - return _languages[language_code] - try: - module = __import__(language_code, globals(), locals()) - except ImportError: - return None - _languages[language_code] = module - return module + for tag in normalize_language_tag(language_code): + if tag in _languages: + return _languages[tag] + try: + module = __import__(tag, globals(), locals()) + except ImportError: + continue + _languages[tag] = module + return module + return None diff --git a/docutils/utils.py b/docutils/utils.py index 6dfb96dc8..12518d01a 100644 --- a/docutils/utils.py +++ b/docutils/utils.py @@ -525,7 +525,7 @@ def get_stylesheet_list(settings): else: sheets = [] # strip whitespace (frequently occuring in config files) - return [sheet.strip(u' \t\n\r') for sheet in sheets] + return [sheet.strip(u' \t\n') for sheet in sheets] def get_trim_footnote_ref_space(settings): """ @@ -609,6 +609,37 @@ def uniq(L): r.append(item) return r +# by Li Daobing http://code.activestate.com/recipes/190465/ +# since Python 2.6 there is also itertools.combinations() +def unique_combinations(items, n): + """Return r-length tuples, in sorted order, no repeated elements""" + if n==0: yield [] + else: + for i in xrange(len(items)-n+1): + for cc in unique_combinations(items[i+1:],n-1): + yield [items[i]]+cc + +def normalize_language_tag(tag): + """Return a list of normalized combinations for a `BCP 47` language tag. + + Example: + + >>> normalize_language_tag('de-AT-1901') + ['de_at_1901', 'de_at', 'de_1901', 'de'] + """ + # normalize: + tag = tag.lower().replace('-','_') + # find all combinations of subtags + taglist = [] + base_tag= tag.split('_')[:1] + subtags = tag.split('_')[1:] + # print base_tag, subtags + for n in range(len(subtags), 0, -1): + for tags in unique_combinations(subtags, n): + # print tags + taglist.append('_'.join(base_tag + tags)) + taglist += base_tag + return taglist class DependencyList: |
