summaryrefslogtreecommitdiff
path: root/docutils
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-09-17 21:38:29 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-09-17 21:38:29 +0000
commitc346595d2e87faf2db2ebc096b1eec0bc260de8a (patch)
treeccd1be8b591a95857ad614bdf1230b9b48e1740e /docutils
parenta8a145d40efbb9f0280dcd99d408c281d7c38149 (diff)
downloaddocutils-c346595d2e87faf2db2ebc096b1eec0bc260de8a.tar.gz
Handle language codes according to BCP 47
(Best Current Praxis: Tags for Identifying Languages). Normalize language tags: downcase, - to _, optional subtags. E.g. Given "de-AT-1901", look for language modules "de_at_1901", "de_at", "de_1901", and "de" TODO: warn (instead of fail), if no module found. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk/docutils@6423 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
-rw-r--r--docutils/frontend.py2
-rw-r--r--docutils/languages/__init__.py19
-rw-r--r--docutils/parsers/rst/languages/__init__.py20
-rw-r--r--docutils/utils.py33
4 files changed, 59 insertions, 15 deletions
diff --git a/docutils/frontend.py b/docutils/frontend.py
index c958ce844..82a221943 100644
--- a/docutils/frontend.py
+++ b/docutils/frontend.py
@@ -464,7 +464,7 @@ class OptionParser(optparse.OptionParser, docutils.SettingsSpec):
['--error-encoding-error-handler'],
{'default': default_error_encoding_error_handler,
'validator': validate_encoding_error_handler}),
- ('Specify the language (as 2-letter code). Default: en.',
+ ('Specify the language (as BCP 47 language tag). Default: en.',
['--language', '-l'], {'dest': 'language_code', 'default': 'en',
'metavar': '<name>'}),
('Write output file dependencies to <file>.',
diff --git a/docutils/languages/__init__.py b/docutils/languages/__init__.py
index 7d6521fc1..c61871b90 100644
--- a/docutils/languages/__init__.py
+++ b/docutils/languages/__init__.py
@@ -11,11 +11,20 @@ This package contains modules for language-dependent features of Docutils.
__docformat__ = 'reStructuredText'
+from docutils.utils import normalize_language_tag
+
_languages = {}
def get_language(language_code):
- if language_code in _languages:
- return _languages[language_code]
- module = __import__(language_code, globals(), locals())
- _languages[language_code] = module
- return module
+ for tag in normalize_language_tag(language_code):
+ if tag in _languages:
+ return _languages[tag]
+ try:
+ module = __import__(tag, globals(), locals())
+ except ImportError:
+ continue
+ _languages[tag] = module
+ return module
+ # TODO: use Docutils reporter, warn instead of fail
+ # warn ('language "%s" not supported' % tag)
+ raise
diff --git a/docutils/parsers/rst/languages/__init__.py b/docutils/parsers/rst/languages/__init__.py
index 962802245..53017d75e 100644
--- a/docutils/parsers/rst/languages/__init__.py
+++ b/docutils/parsers/rst/languages/__init__.py
@@ -12,14 +12,18 @@ reStructuredText.
__docformat__ = 'reStructuredText'
+from docutils.utils import normalize_language_tag
+
_languages = {}
def get_language(language_code):
- if language_code in _languages:
- return _languages[language_code]
- try:
- module = __import__(language_code, globals(), locals())
- except ImportError:
- return None
- _languages[language_code] = module
- return module
+ for tag in normalize_language_tag(language_code):
+ if tag in _languages:
+ return _languages[tag]
+ try:
+ module = __import__(tag, globals(), locals())
+ except ImportError:
+ continue
+ _languages[tag] = module
+ return module
+ return None
diff --git a/docutils/utils.py b/docutils/utils.py
index 6dfb96dc8..12518d01a 100644
--- a/docutils/utils.py
+++ b/docutils/utils.py
@@ -525,7 +525,7 @@ def get_stylesheet_list(settings):
else:
sheets = []
# strip whitespace (frequently occuring in config files)
- return [sheet.strip(u' \t\n\r') for sheet in sheets]
+ return [sheet.strip(u' \t\n') for sheet in sheets]
def get_trim_footnote_ref_space(settings):
"""
@@ -609,6 +609,37 @@ def uniq(L):
r.append(item)
return r
+# by Li Daobing http://code.activestate.com/recipes/190465/
+# since Python 2.6 there is also itertools.combinations()
+def unique_combinations(items, n):
+ """Return r-length tuples, in sorted order, no repeated elements"""
+ if n==0: yield []
+ else:
+ for i in xrange(len(items)-n+1):
+ for cc in unique_combinations(items[i+1:],n-1):
+ yield [items[i]]+cc
+
+def normalize_language_tag(tag):
+ """Return a list of normalized combinations for a `BCP 47` language tag.
+
+ Example:
+
+ >>> normalize_language_tag('de-AT-1901')
+ ['de_at_1901', 'de_at', 'de_1901', 'de']
+ """
+ # normalize:
+ tag = tag.lower().replace('-','_')
+ # find all combinations of subtags
+ taglist = []
+ base_tag= tag.split('_')[:1]
+ subtags = tag.split('_')[1:]
+ # print base_tag, subtags
+ for n in range(len(subtags), 0, -1):
+ for tags in unique_combinations(subtags, n):
+ # print tags
+ taglist.append('_'.join(base_tag + tags))
+ taglist += base_tag
+ return taglist
class DependencyList: