diff options
author | Armin Ronacher <armin.ronacher@active-4.com> | 2013-11-20 19:30:22 +0000 |
---|---|---|
committer | Armin Ronacher <armin.ronacher@active-4.com> | 2013-11-20 19:30:22 +0000 |
commit | 774047ae71b3b16fb1db2b5010b31175ce61f8a5 (patch) | |
tree | 590730322a0b67371a67e092fe31f48657565e33 | |
parent | 7edfab980eb89e06ee97c576b0778283f1c40707 (diff) | |
download | babel-feature/cldr24.tar.gz |
Started work on supporting CLDR 24feature/cldr24
-rw-r--r-- | babel/numbers.py | 2 | ||||
-rw-r--r-- | babel/plural.py | 100 | ||||
-rwxr-xr-x | scripts/download_import_cldr.py | 6 | ||||
-rwxr-xr-x | scripts/import_cldr.py | 5 | ||||
-rw-r--r-- | tests/test_numbers.py | 4 |
5 files changed, 94 insertions, 23 deletions
diff --git a/babel/numbers.py b/babel/numbers.py index 2f7fe16..c2c782e 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -35,7 +35,7 @@ def get_currency_name(currency, count=None, locale=LC_NUMERIC): >>> get_currency_name('USD', locale='en_US') u'US Dollar' - + .. versionadded:: 0.9.4 :param currency: the currency code diff --git a/babel/plural.py b/babel/plural.py index 144a0dc..983f9fb 100644 --- a/babel/plural.py +++ b/babel/plural.py @@ -10,6 +10,7 @@ """ import re +import decimal _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other') @@ -50,6 +51,9 @@ class PluralRule(object): found = set() self.abstract = [] for key, expr in sorted(list(rules)): + # Other only contains samples + if key == 'other': + continue if key not in _plural_tags: raise ValueError('unknown tag %r' % key) elif key in found: @@ -102,7 +106,22 @@ class PluralRule(object): def __call__(self, n): if not hasattr(self, '_func'): self._func = to_python(self) - return self._func(n) + if isinstance(n, float): + int_n = int(n) + if int_n == n: + n = int_n + else: + n = decimal.Decimal(n) + if isinstance(n, decimal.Decimal): + str_n = str(n).lstrip('+-') + trailing = '.' in str_n and str_n.split('.', 1)[1] or '' + v = len(trailing) + w = len(trailing.rstrip('0')) + f = int(trailing or 0) + t = int(trailing.rstrip('0') or 0) + else: + v = w = f = t = 0 + return self._func(abs(n), v, w, f, t) def to_javascript(rule): @@ -153,7 +172,10 @@ def to_python(rule): 'MOD': cldr_modulo } to_python = _PythonCompiler().compile - result = ['def evaluate(n):'] + result = [ + 'def evaluate(n, v=0, w=0, f=0, t=0):', + ' i = int(n)', + ] for tag, ast in PluralRule.parse(rule).abstract: # the str() call is to coerce the tag to the native string. It's # a limited ascii restricted set of tags anyways so that is fine. @@ -259,16 +281,24 @@ class _Parser(object): tree of tuples. It implements the following grammar:: condition = and_condition ('or' and_condition)* + ('@integer' samples)? + ('@decimal' samples)? and_condition = relation ('and' relation)* - relation = is_relation | in_relation | within_relation | 'n' <EOL> + relation = is_relation | in_relation | within_relation is_relation = expr 'is' ('not')? value - in_relation = expr ('not')? 'in' range_list + in_relation = expr (('not')? 'in' | '=' | '!=') range_list within_relation = expr ('not')? 'within' range_list - expr = 'n' ('mod' value)? + expr = operand (('mod' | '%') value)? + operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' range_list = (range | value) (',' range_list)* value = digit+ digit = 0|1|2|3|4|5|6|7|8|9 range = value'..'value + samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))? + sampleRange = decimalValue '~' decimalValue + decimalValue = value ('.' value)? + + (Samples are currently entirely ignored) - Whitespace can occur between or around any of the above tokens. - Rules should be mutually exclusive; for a given numeric value, only one @@ -283,14 +313,15 @@ class _Parser(object): _rules = [ (None, re.compile(r'\s+(?u)')), - ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')), + ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[nivwft])\b')), ('value', re.compile(r'\d+')), - ('comma', re.compile(r',')), + ('symbol', re.compile(r'%|,|!=|=')), ('ellipsis', re.compile(r'\.\.')) ] + _vars = tuple('nivwft') def __init__(self, string): - string = string.lower() + string = string.lower().split('@')[0] result = [] pos = 0 end = len(string) @@ -352,30 +383,50 @@ class _Parser(object): if self.skip('word', 'within'): method = 'within' else: - self.expect('word', 'in', term="'within' or 'in'") + if not self.skip('word', 'in'): + if negated: + raise RuleError('Cannot negate operator based rules.') + return self.newfangled_relation(left) rv = 'relation', (method, left, self.range_list()) if negated: rv = 'not', (rv,) return rv + def newfangled_relation(self, left): + if self.skip('symbol', '='): + negated = False + elif self.skip('symbol', '!='): + negated = True + else: + raise RuleError('Expected "=" or "!=" or legacy relation') + rv = 'relation', ('in', left, self.range_list()) + if negated: + rv = 'not', (rv,) + return rv + def range_or_value(self): left = self.value() if self.skip('ellipsis'): - return((left, self.value())) + return (left, self.value()) else: - return((left, left)) + return (left, left) def range_list(self): range_list = [self.range_or_value()] - while self.skip('comma'): + while self.skip('symbol', ','): range_list.append(self.range_or_value()) return 'range_list', range_list def expr(self): - self.expect('word', 'n') + word = self.skip('word') + if word is None or word[1] not in self._vars: + raise RuleError('Expected identifier variable') + name = word[1] if self.skip('word', 'mod'): - return 'mod', (('n', ()), self.value()) - return 'n', () + return 'mod', ((name, ()), self.value()) + elif self.skip('symbol', '%'): + return 'mod', ((name, ()), self.value()) + return name, () def value(self): return 'value', (int(self.expect('value')[1]),) @@ -401,6 +452,11 @@ class _Compiler(object): return getattr(self, 'compile_' + op)(*args) compile_n = lambda x: 'n' + compile_i = lambda x: 'i' + compile_v = lambda x: 'v' + compile_w = lambda x: 'w' + compile_f = lambda x: 'f' + compile_t = lambda x: 't' compile_value = lambda x, v: str(v) compile_and = _binary_compiler('(%s && %s)') compile_or = _binary_compiler('(%s || %s)') @@ -455,18 +511,30 @@ class _GettextCompiler(_Compiler): class _JavaScriptCompiler(_GettextCompiler): """Compiles the expression to plain of JavaScript.""" + # XXX: presently javascript does not support any of the + # fraction support and basically only deals with integers. + compile_i = lambda x: 'parseInt(n, 10)' + compile_v = lambda x: '0' + compile_w = lambda x: '0' + compile_f = lambda x: '0' + compile_t = lambda x: '0' + def compile_relation(self, method, expr, range_list): code = _GettextCompiler.compile_relation( self, method, expr, range_list) if method == 'in': expr = self.compile(expr) - code = '(parseInt(%s) == %s && %s)' % (expr, expr, code) + code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code) return code class _UnicodeCompiler(_Compiler): """Returns a unicode pluralization rule again.""" + # XXX: this currently spits out the old syntax instead of the new + # one. We can change that, but it will break a whole bunch of stuff + # for users I suppose. + compile_is = _binary_compiler('%s is %s') compile_isnot = _binary_compiler('%s is not %s') compile_and = _binary_compiler('%s and %s') diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index 9c82fc8..fe01053 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -13,9 +13,9 @@ except ImportError: from urllib import urlretrieve -URL = 'http://unicode.org/Public/cldr/23.1/core.zip' -FILENAME = 'core-23.1.zip' -FILESUM = 'd44ff35f9b9160becbb3a575468d8a5a' +URL = 'http://unicode.org/Public/cldr/24/core.zip' +FILENAME = 'core-24.zip' +FILESUM = 'cd2e8f31baf65c96bfc7e5377b3b793f' BLKSIZE = 131072 diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 3a2f121..02a6532 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -186,6 +186,8 @@ def main(): # pass our parser anyways. if '-' in alias.attrib['type']: continue + if 'replacement' not in alias.attrib: + continue language_aliases[alias.attrib['type']] = alias.attrib['replacement'] # Territory aliases @@ -574,7 +576,8 @@ def main(): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in currency_formats: continue - pattern = text_type(elem.findtext('currencyFormat/pattern')) + pattern = text_type(elem.findtext( + 'currencyFormat[@type="standard"]/pattern')) currency_formats[elem.attrib.get('type')] = \ numbers.parse_pattern(pattern) diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 99e0d1b..fae6c71 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -175,11 +175,11 @@ class NumberParsingTestCase(unittest.TestCase): def test_get_currency_name(): - assert numbers.get_currency_name('USD', 'en_US') == u'US dollars' + assert numbers.get_currency_name('USD', locale='en_US') == u'US Dollar' def test_get_currency_symbol(): - assert numbers.get_currency_symbol('USD', 'en_US') == u'$' + assert numbers.get_currency_symbol('USD', locale='en_US') == u'$' def test_get_territory_currencies(): |