summaryrefslogtreecommitdiff
path: root/docutils
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2012-11-23 01:18:49 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2012-11-23 01:18:49 +0000
commitb6bd7db711d2f1da640021b8819a2fbdfffceae0 (patch)
treec568ecc5ef63da2fedff64b8242e77b5d5383ea5 /docutils
parent0304e2ae239c44db70bcb85234a188749eba1b87 (diff)
downloaddocutils-b6bd7db711d2f1da640021b8819a2fbdfffceae0.tar.gz
normalize_language_tag() now returns `BCP 47`_ conformant tags
Subtags separated by ``-``, not ``_``. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7538 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
-rw-r--r--docutils/languages/__init__.py1
-rw-r--r--docutils/parsers/rst/languages/__init__.py1
-rw-r--r--docutils/utils/__init__.py19
-rw-r--r--docutils/writers/latex2e/__init__.py41
-rw-r--r--docutils/writers/xetex/__init__.py27
5 files changed, 51 insertions, 38 deletions
diff --git a/docutils/languages/__init__.py b/docutils/languages/__init__.py
index c3d6c05a6..47d896851 100644
--- a/docutils/languages/__init__.py
+++ b/docutils/languages/__init__.py
@@ -27,6 +27,7 @@ def get_language(language_code, reporter=None):
"""
# TODO: use a dummy module returning emtpy strings?, configurable?
for tag in normalize_language_tag(language_code):
+ tag = tag.replace('-','_') # '-' not valid in module names
if tag in _languages:
return _languages[tag]
try:
diff --git a/docutils/parsers/rst/languages/__init__.py b/docutils/parsers/rst/languages/__init__.py
index 9730efcd0..c52989a4d 100644
--- a/docutils/parsers/rst/languages/__init__.py
+++ b/docutils/parsers/rst/languages/__init__.py
@@ -22,6 +22,7 @@ _languages = {}
def get_language(language_code):
for tag in normalize_language_tag(language_code):
+ tag = tag.replace('-','_') # '-' not valid in module names
if tag in _languages:
return _languages[tag]
try:
diff --git a/docutils/utils/__init__.py b/docutils/utils/__init__.py
index ac2ad6edf..1aead4884 100644
--- a/docutils/utils/__init__.py
+++ b/docutils/utils/__init__.py
@@ -12,6 +12,7 @@ __docformat__ = 'reStructuredText'
import sys
import os
import os.path
+import re
import warnings
import unicodedata
from docutils import ApplicationError, DataError
@@ -642,20 +643,20 @@ def normalize_language_tag(tag):
Example:
- >>> normalize_language_tag('de-AT-1901')
- ['de_at_1901', 'de_at', 'de_1901', 'de']
+ >>> normalize_language_tag('de_AT-1901')
+ ['de-at-1901', 'de-at', 'de-1901', 'de']
"""
# normalize:
- tag = tag.lower().replace('-','_')
- # find all combinations of subtags
+ tag = tag.lower().replace('_','-')
+ # split (except singletons, which mark the following tag as non-standard):
+ tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag)
taglist = []
- base_tag= tag.split('_')[:1]
- subtags = tag.split('_')[1:]
- # print base_tag, subtags
+ subtags = [subtag.replace('_', '-') for subtag in tag.split('-')]
+ base_tag = [subtags.pop(0)]
+ # find all combinations of subtags
for n in range(len(subtags), 0, -1):
for tags in unique_combinations(subtags, n):
- # print tags
- taglist.append('_'.join(base_tag + tags))
+ taglist.append('-'.join(base_tag+tags))
taglist += base_tag
return taglist
diff --git a/docutils/writers/latex2e/__init__.py b/docutils/writers/latex2e/__init__.py
index a01a9c4d0..864cc9e13 100644
--- a/docutils/writers/latex2e/__init__.py
+++ b/docutils/writers/latex2e/__init__.py
@@ -293,18 +293,18 @@ class Babel(object):
'cy': 'welsh',
'da': 'danish',
'de': 'ngerman', # new spelling (de_1996)
- 'de_1901': 'german', # old spelling
- 'de_at': 'naustrian',
- 'de_at_1901': 'austrian',
+ 'de-1901': 'german', # old spelling
+ 'de-AT': 'naustrian',
+ 'de-AT-1901': 'austrian',
'dsb': 'lowersorbian',
'el': 'greek', # monotonic (el-monoton)
- 'el_polyton': 'polutonikogreek',
+ 'el-polyton': 'polutonikogreek',
'en': 'english', # TeX' default language
- 'en_au': 'australian',
- 'en_ca': 'canadian',
- 'en_gb': 'british',
- 'en_nz': 'newzealand',
- 'en_us': 'american',
+ 'en-AU': 'australian',
+ 'en-CA': 'canadian',
+ 'en-GB': 'british',
+ 'en-NZ': 'newzealand',
+ 'en-US': 'american',
'eo': 'esperanto', # '^' is active
'es': 'spanish',
'et': 'estonian',
@@ -312,10 +312,10 @@ class Babel(object):
# 'fa': 'farsi',
'fi': 'finnish',
'fr': 'french',
- 'fr_ca': 'canadien',
+ 'fr-CA': 'canadien',
'ga': 'irish', # Irish Gaelic
# 'grc': # Ancient Greek
- 'grc_ibycus': 'ibycus', # Ibycus encoding
+ 'grc-ibycus': 'ibycus', # Ibycus encoding
'gl': 'galician',
'he': 'hebrew',
'hr': 'croatian',
@@ -338,24 +338,27 @@ class Babel(object):
'no': 'norsk', # Norwegian Bokmal
'pl': 'polish',
'pt': 'portuges',
- 'pt_br': 'brazil',
+ 'pt-BR': 'brazil',
'ro': 'romanian',
'ru': 'russian', # '"' is active
'se': 'samin', # North Sami
- # sh-cyrl: Serbo-Croatian, Cyrillic script
- 'sh-latn': 'serbian', # Serbo-Croatian, Latin script
+ # sh-Cyrl: Serbo-Croatian, Cyrillic script
+ 'sh-Latn': 'serbian', # Serbo-Croatian, Latin script
'sk': 'slovak',
'sl': 'slovene',
'sq': 'albanian',
- # 'sr-cyrl': Serbian, Cyrillic script (sr-cyrl)
- 'sr-latn': 'serbian', # Serbian, Latin script, " active.
+ # 'sr-Cyrl': Serbian, Cyrillic script (sr-cyrl)
+ 'sr-Latn': 'serbian', # Serbian, Latin script, " active.
'sv': 'swedish',
# 'th': 'thai',
'tr': 'turkish',
'uk': 'ukrainian',
'vi': 'vietnam',
- # zh-latn: Chinese Pinyin
+ # zh-Latn: Chinese Pinyin
}
+ # normalize (downcase) keys
+ language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()])
+
warn_msg = 'Language "%s" not supported by LaTeX (babel)'
def __init__(self, language_code, reporter=None):
@@ -1595,8 +1598,12 @@ class LaTeXTranslator(nodes.NodeVisitor):
self.out.append( '%\n\\begin{list}{}{}\n' )
else:
self.out.append( '%\n\\begin{itemize}\n' )
+ # if node['classes']:
+ # self.visit_inline(node)
def depart_bullet_list(self, node):
+ # if node['classes']:
+ # self.depart_inline(node)
if self.is_toc_list:
self.out.append( '\n\\end{list}\n' )
else:
diff --git a/docutils/writers/xetex/__init__.py b/docutils/writers/xetex/__init__.py
index 8c5016e2f..150cb49ce 100644
--- a/docutils/writers/xetex/__init__.py
+++ b/docutils/writers/xetex/__init__.py
@@ -76,30 +76,33 @@ class Babel(latex2e.Babel):
# code Polyglossia-name comment
'cop': 'coptic',
'de': 'german', # new spelling (de_1996)
- 'de_1901': 'ogerman', # old spelling
+ 'de-1901': 'ogerman', # old spelling
'dv': 'divehi', # Maldivian
'dsb': 'lsorbian',
- 'el_polyton': 'polygreek',
+ 'el-polyton': 'polygreek',
'fa': 'farsi',
'grc': 'ancientgreek',
'hsb': 'usorbian',
- 'sh-cyrl': 'serbian', # Serbo-Croatian, Cyrillic script
- 'sh-latn': 'croatian', # Serbo-Croatian, Latin script
+ 'sh-Cyrl': 'serbian', # Serbo-Croatian, Cyrillic script
+ 'sh-Latn': 'croatian', # Serbo-Croatian, Latin script
'sq': 'albanian',
- 'sr': 'serbian', # Cyrillic script (sr-cyrl)
+ 'sr': 'serbian', # Cyrillic script (sr-Cyrl)
'th': 'thai',
'vi': 'vietnamese',
- # zh-latn: ??? # Chinese Pinyin
+ # zh-Latn: ??? # Chinese Pinyin
})
+ # normalize (downcase) keys
+ language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()])
+
# Languages without Polyglossia support:
for key in ('af', # 'afrikaans',
- 'de_at', # 'naustrian',
- 'de_at_1901', # 'austrian',
- 'fr_ca', # 'canadien',
- 'grc_ibycus', # 'ibycus', (Greek Ibycus encoding)
- 'sr-latn', # 'serbian script=latin'
+ 'de-AT', # 'naustrian',
+ 'de-AT-1901', # 'austrian',
+ 'fr-CA', # 'canadien',
+ 'grc-ibycus', # 'ibycus', (Greek Ibycus encoding)
+ 'sr-Latn', # 'serbian script=latin'
):
- del(language_codes[key])
+ del(language_codes[key.lower()])
def __init__(self, language_code, reporter):
self.language_code = language_code