diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2012-11-23 01:18:49 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2012-11-23 01:18:49 +0000 |
commit | b6bd7db711d2f1da640021b8819a2fbdfffceae0 (patch) | |
tree | c568ecc5ef63da2fedff64b8242e77b5d5383ea5 /docutils | |
parent | 0304e2ae239c44db70bcb85234a188749eba1b87 (diff) | |
download | docutils-b6bd7db711d2f1da640021b8819a2fbdfffceae0.tar.gz |
normalize_language_tag() now returns `BCP 47`_ conformant tags
Subtags separated by ``-``, not ``_``.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7538 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
-rw-r--r-- | docutils/languages/__init__.py | 1 | ||||
-rw-r--r-- | docutils/parsers/rst/languages/__init__.py | 1 | ||||
-rw-r--r-- | docutils/utils/__init__.py | 19 | ||||
-rw-r--r-- | docutils/writers/latex2e/__init__.py | 41 | ||||
-rw-r--r-- | docutils/writers/xetex/__init__.py | 27 |
5 files changed, 51 insertions, 38 deletions
diff --git a/docutils/languages/__init__.py b/docutils/languages/__init__.py index c3d6c05a6..47d896851 100644 --- a/docutils/languages/__init__.py +++ b/docutils/languages/__init__.py @@ -27,6 +27,7 @@ def get_language(language_code, reporter=None): """ # TODO: use a dummy module returning emtpy strings?, configurable? for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: diff --git a/docutils/parsers/rst/languages/__init__.py b/docutils/parsers/rst/languages/__init__.py index 9730efcd0..c52989a4d 100644 --- a/docutils/parsers/rst/languages/__init__.py +++ b/docutils/parsers/rst/languages/__init__.py @@ -22,6 +22,7 @@ _languages = {} def get_language(language_code): for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: diff --git a/docutils/utils/__init__.py b/docutils/utils/__init__.py index ac2ad6edf..1aead4884 100644 --- a/docutils/utils/__init__.py +++ b/docutils/utils/__init__.py @@ -12,6 +12,7 @@ __docformat__ = 'reStructuredText' import sys import os import os.path +import re import warnings import unicodedata from docutils import ApplicationError, DataError @@ -642,20 +643,20 @@ def normalize_language_tag(tag): Example: - >>> normalize_language_tag('de-AT-1901') - ['de_at_1901', 'de_at', 'de_1901', 'de'] + >>> normalize_language_tag('de_AT-1901') + ['de-at-1901', 'de-at', 'de-1901', 'de'] """ # normalize: - tag = tag.lower().replace('-','_') - # find all combinations of subtags + tag = tag.lower().replace('_','-') + # split (except singletons, which mark the following tag as non-standard): + tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag) taglist = [] - base_tag= tag.split('_')[:1] - subtags = tag.split('_')[1:] - # print base_tag, subtags + subtags = [subtag.replace('_', '-') for subtag in tag.split('-')] + base_tag = [subtags.pop(0)] + # find all combinations of subtags for n in range(len(subtags), 0, -1): for tags in unique_combinations(subtags, n): - # print tags - taglist.append('_'.join(base_tag + tags)) + taglist.append('-'.join(base_tag+tags)) taglist += base_tag return taglist diff --git a/docutils/writers/latex2e/__init__.py b/docutils/writers/latex2e/__init__.py index a01a9c4d0..864cc9e13 100644 --- a/docutils/writers/latex2e/__init__.py +++ b/docutils/writers/latex2e/__init__.py @@ -293,18 +293,18 @@ class Babel(object): 'cy': 'welsh', 'da': 'danish', 'de': 'ngerman', # new spelling (de_1996) - 'de_1901': 'german', # old spelling - 'de_at': 'naustrian', - 'de_at_1901': 'austrian', + 'de-1901': 'german', # old spelling + 'de-AT': 'naustrian', + 'de-AT-1901': 'austrian', 'dsb': 'lowersorbian', 'el': 'greek', # monotonic (el-monoton) - 'el_polyton': 'polutonikogreek', + 'el-polyton': 'polutonikogreek', 'en': 'english', # TeX' default language - 'en_au': 'australian', - 'en_ca': 'canadian', - 'en_gb': 'british', - 'en_nz': 'newzealand', - 'en_us': 'american', + 'en-AU': 'australian', + 'en-CA': 'canadian', + 'en-GB': 'british', + 'en-NZ': 'newzealand', + 'en-US': 'american', 'eo': 'esperanto', # '^' is active 'es': 'spanish', 'et': 'estonian', @@ -312,10 +312,10 @@ class Babel(object): # 'fa': 'farsi', 'fi': 'finnish', 'fr': 'french', - 'fr_ca': 'canadien', + 'fr-CA': 'canadien', 'ga': 'irish', # Irish Gaelic # 'grc': # Ancient Greek - 'grc_ibycus': 'ibycus', # Ibycus encoding + 'grc-ibycus': 'ibycus', # Ibycus encoding 'gl': 'galician', 'he': 'hebrew', 'hr': 'croatian', @@ -338,24 +338,27 @@ class Babel(object): 'no': 'norsk', # Norwegian Bokmal 'pl': 'polish', 'pt': 'portuges', - 'pt_br': 'brazil', + 'pt-BR': 'brazil', 'ro': 'romanian', 'ru': 'russian', # '"' is active 'se': 'samin', # North Sami - # sh-cyrl: Serbo-Croatian, Cyrillic script - 'sh-latn': 'serbian', # Serbo-Croatian, Latin script + # sh-Cyrl: Serbo-Croatian, Cyrillic script + 'sh-Latn': 'serbian', # Serbo-Croatian, Latin script 'sk': 'slovak', 'sl': 'slovene', 'sq': 'albanian', - # 'sr-cyrl': Serbian, Cyrillic script (sr-cyrl) - 'sr-latn': 'serbian', # Serbian, Latin script, " active. + # 'sr-Cyrl': Serbian, Cyrillic script (sr-cyrl) + 'sr-Latn': 'serbian', # Serbian, Latin script, " active. 'sv': 'swedish', # 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'vi': 'vietnam', - # zh-latn: Chinese Pinyin + # zh-Latn: Chinese Pinyin } + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + warn_msg = 'Language "%s" not supported by LaTeX (babel)' def __init__(self, language_code, reporter=None): @@ -1595,8 +1598,12 @@ class LaTeXTranslator(nodes.NodeVisitor): self.out.append( '%\n\\begin{list}{}{}\n' ) else: self.out.append( '%\n\\begin{itemize}\n' ) + # if node['classes']: + # self.visit_inline(node) def depart_bullet_list(self, node): + # if node['classes']: + # self.depart_inline(node) if self.is_toc_list: self.out.append( '\n\\end{list}\n' ) else: diff --git a/docutils/writers/xetex/__init__.py b/docutils/writers/xetex/__init__.py index 8c5016e2f..150cb49ce 100644 --- a/docutils/writers/xetex/__init__.py +++ b/docutils/writers/xetex/__init__.py @@ -76,30 +76,33 @@ class Babel(latex2e.Babel): # code Polyglossia-name comment 'cop': 'coptic', 'de': 'german', # new spelling (de_1996) - 'de_1901': 'ogerman', # old spelling + 'de-1901': 'ogerman', # old spelling 'dv': 'divehi', # Maldivian 'dsb': 'lsorbian', - 'el_polyton': 'polygreek', + 'el-polyton': 'polygreek', 'fa': 'farsi', 'grc': 'ancientgreek', 'hsb': 'usorbian', - 'sh-cyrl': 'serbian', # Serbo-Croatian, Cyrillic script - 'sh-latn': 'croatian', # Serbo-Croatian, Latin script + 'sh-Cyrl': 'serbian', # Serbo-Croatian, Cyrillic script + 'sh-Latn': 'croatian', # Serbo-Croatian, Latin script 'sq': 'albanian', - 'sr': 'serbian', # Cyrillic script (sr-cyrl) + 'sr': 'serbian', # Cyrillic script (sr-Cyrl) 'th': 'thai', 'vi': 'vietnamese', - # zh-latn: ??? # Chinese Pinyin + # zh-Latn: ??? # Chinese Pinyin }) + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + # Languages without Polyglossia support: for key in ('af', # 'afrikaans', - 'de_at', # 'naustrian', - 'de_at_1901', # 'austrian', - 'fr_ca', # 'canadien', - 'grc_ibycus', # 'ibycus', (Greek Ibycus encoding) - 'sr-latn', # 'serbian script=latin' + 'de-AT', # 'naustrian', + 'de-AT-1901', # 'austrian', + 'fr-CA', # 'canadien', + 'grc-ibycus', # 'ibycus', (Greek Ibycus encoding) + 'sr-Latn', # 'serbian script=latin' ): - del(language_codes[key]) + del(language_codes[key.lower()]) def __init__(self, language_code, reporter): self.language_code = language_code |