Started work on supporting CLDR 24feature/cldr24

author: Armin Ronacher <armin.ronacher@active-4.com> 2013-11-20 19:30:22 +0000
committer: Armin Ronacher <armin.ronacher@active-4.com> 2013-11-20 19:30:22 +0000
commit: 774047ae71b3b16fb1db2b5010b31175ce61f8a5 (patch)
tree: 590730322a0b67371a67e092fe31f48657565e33
parent: 7edfab980eb89e06ee97c576b0778283f1c40707 (diff)
download: babel-feature/cldr24.tar.gz
5 files changed, 94 insertions, 23 deletions
diff --git a/babel/numbers.py b/babel/numbers.py
index 2f7fe16..c2c782e 100644
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -35,7 +35,7 @@ def get_currency_name(currency, count=None, locale=LC_NUMERIC):
 
     >>> get_currency_name('USD', locale='en_US')
     u'US Dollar'
-    
+
     .. versionadded:: 0.9.4
 
     :param currency: the currency code
diff --git a/babel/plural.py b/babel/plural.py
index 144a0dc..983f9fb 100644
--- a/babel/plural.py
+++ b/babel/plural.py
@@ -10,6 +10,7 @@
 """
 
 import re
+import decimal
 
 
 _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
@@ -50,6 +51,9 @@ class PluralRule(object):
         found = set()
         self.abstract = []
         for key, expr in sorted(list(rules)):
+            # Other only contains samples
+            if key == 'other':
+                continue
             if key not in _plural_tags:
                 raise ValueError('unknown tag %r' % key)
             elif key in found:
@@ -102,7 +106,22 @@ class PluralRule(object):
     def __call__(self, n):
         if not hasattr(self, '_func'):
             self._func = to_python(self)
-        return self._func(n)
+        if isinstance(n, float):
+            int_n = int(n)
+            if int_n == n:
+                n = int_n
+            else:
+                n = decimal.Decimal(n)
+        if isinstance(n, decimal.Decimal):
+            str_n = str(n).lstrip('+-')
+            trailing = '.' in str_n and str_n.split('.', 1)[1] or ''
+            v = len(trailing)
+            w = len(trailing.rstrip('0'))
+            f = int(trailing or 0)
+            t = int(trailing.rstrip('0') or 0)
+        else:
+            v = w = f = t = 0
+        return self._func(abs(n), v, w, f, t)
 
 
 def to_javascript(rule):
@@ -153,7 +172,10 @@ def to_python(rule):
         'MOD':      cldr_modulo
     }
     to_python = _PythonCompiler().compile
-    result = ['def evaluate(n):']
+    result = [
+        'def evaluate(n, v=0, w=0, f=0, t=0):',
+        ' i = int(n)',
+    ]
     for tag, ast in PluralRule.parse(rule).abstract:
         # the str() call is to coerce the tag to the native string.  It's
         # a limited ascii restricted set of tags anyways so that is fine.
@@ -259,16 +281,24 @@ class _Parser(object):
     tree of tuples. It implements the following grammar::
 
         condition     = and_condition ('or' and_condition)*
+                        ('@integer' samples)?
+                        ('@decimal' samples)?
         and_condition = relation ('and' relation)*
-        relation      = is_relation | in_relation | within_relation | 'n' <EOL>
+        relation      = is_relation | in_relation | within_relation
         is_relation   = expr 'is' ('not')? value
-        in_relation   = expr ('not')? 'in' range_list
+        in_relation   = expr (('not')? 'in' | '=' | '!=') range_list
         within_relation = expr ('not')? 'within' range_list
-        expr          = 'n' ('mod' value)?
+        expr          = operand (('mod' | '%') value)?
+        operand       = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
         range_list    = (range | value) (',' range_list)*
         value         = digit+
         digit         = 0|1|2|3|4|5|6|7|8|9
         range         = value'..'value
+        samples       = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
+        sampleRange   = decimalValue '~' decimalValue
+        decimalValue  = value ('.' value)?
+
+    (Samples are currently entirely ignored)
 
     - Whitespace can occur between or around any of the above tokens.
     - Rules should be mutually exclusive; for a given numeric value, only one
@@ -283,14 +313,15 @@ class _Parser(object):
 
     _rules = [
         (None, re.compile(r'\s+(?u)')),
-        ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')),
+        ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[nivwft])\b')),
         ('value', re.compile(r'\d+')),
-        ('comma', re.compile(r',')),
+        ('symbol', re.compile(r'%|,|!=|=')),
         ('ellipsis', re.compile(r'\.\.'))
     ]
+    _vars = tuple('nivwft')
 
     def __init__(self, string):
-        string = string.lower()
+        string = string.lower().split('@')[0]
         result = []
         pos = 0
         end = len(string)
@@ -352,30 +383,50 @@ class _Parser(object):
         if self.skip('word', 'within'):
             method = 'within'
         else:
-            self.expect('word', 'in', term="'within' or 'in'")
+            if not self.skip('word', 'in'):
+                if negated:
+                    raise RuleError('Cannot negate operator based rules.')
+                return self.newfangled_relation(left)
         rv = 'relation', (method, left, self.range_list())
         if negated:
             rv = 'not', (rv,)
         return rv
 
+    def newfangled_relation(self, left):
+        if self.skip('symbol', '='):
+            negated = False
+        elif self.skip('symbol', '!='):
+            negated = True
+        else:
+            raise RuleError('Expected "=" or "!=" or legacy relation')
+        rv = 'relation', ('in', left, self.range_list())
+        if negated:
+            rv = 'not', (rv,)
+        return rv
+
     def range_or_value(self):
         left = self.value()
         if self.skip('ellipsis'):
-            return((left, self.value()))
+            return (left, self.value())
         else:
-            return((left, left))
+            return (left, left)
 
     def range_list(self):
         range_list = [self.range_or_value()]
-        while self.skip('comma'):
+        while self.skip('symbol', ','):
             range_list.append(self.range_or_value())
         return 'range_list', range_list
 
     def expr(self):
-        self.expect('word', 'n')
+        word = self.skip('word')
+        if word is None or word[1] not in self._vars:
+            raise RuleError('Expected identifier variable')
+        name = word[1]
         if self.skip('word', 'mod'):
-            return 'mod', (('n', ()), self.value())
-        return 'n', ()
+            return 'mod', ((name, ()), self.value())
+        elif self.skip('symbol', '%'):
+            return 'mod', ((name, ()), self.value())
+        return name, ()
 
     def value(self):
         return 'value', (int(self.expect('value')[1]),)
@@ -401,6 +452,11 @@ class _Compiler(object):
         return getattr(self, 'compile_' + op)(*args)
 
     compile_n = lambda x: 'n'
+    compile_i = lambda x: 'i'
+    compile_v = lambda x: 'v'
+    compile_w = lambda x: 'w'
+    compile_f = lambda x: 'f'
+    compile_t = lambda x: 't'
     compile_value = lambda x, v: str(v)
     compile_and = _binary_compiler('(%s && %s)')
     compile_or = _binary_compiler('(%s || %s)')
@@ -455,18 +511,30 @@ class _GettextCompiler(_Compiler):
 class _JavaScriptCompiler(_GettextCompiler):
     """Compiles the expression to plain of JavaScript."""
 
+    # XXX: presently javascript does not support any of the
+    # fraction support and basically only deals with integers.
+    compile_i = lambda x: 'parseInt(n, 10)'
+    compile_v = lambda x: '0'
+    compile_w = lambda x: '0'
+    compile_f = lambda x: '0'
+    compile_t = lambda x: '0'
+
     def compile_relation(self, method, expr, range_list):
         code = _GettextCompiler.compile_relation(
             self, method, expr, range_list)
         if method == 'in':
             expr = self.compile(expr)
-            code = '(parseInt(%s) == %s && %s)' % (expr, expr, code)
+            code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
         return code
 
 
 class _UnicodeCompiler(_Compiler):
     """Returns a unicode pluralization rule again."""
 
+    # XXX: this currently spits out the old syntax instead of the new
+    # one.  We can change that, but it will break a whole bunch of stuff
+    # for users I suppose.
+
     compile_is = _binary_compiler('%s is %s')
     compile_isnot = _binary_compiler('%s is not %s')
     compile_and = _binary_compiler('%s and %s')
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py
index 9c82fc8..fe01053 100755
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -13,9 +13,9 @@ except ImportError:
     from urllib import urlretrieve
 
 
-URL = 'http://unicode.org/Public/cldr/23.1/core.zip'
-FILENAME = 'core-23.1.zip'
-FILESUM = 'd44ff35f9b9160becbb3a575468d8a5a'
+URL = 'http://unicode.org/Public/cldr/24/core.zip'
+FILENAME = 'core-24.zip'
+FILESUM = 'cd2e8f31baf65c96bfc7e5377b3b793f'
 BLKSIZE = 131072
 
 
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 3a2f121..02a6532 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -186,6 +186,8 @@ def main():
             # pass our parser anyways.
             if '-' in alias.attrib['type']:
                 continue
+            if 'replacement' not in alias.attrib:
+                continue
             language_aliases[alias.attrib['type']] = alias.attrib['replacement']
 
         # Territory aliases
@@ -574,7 +576,8 @@ def main():
             if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                     and elem.attrib.get('type') in currency_formats:
                 continue
-            pattern = text_type(elem.findtext('currencyFormat/pattern'))
+            pattern = text_type(elem.findtext(
+                'currencyFormat[@type="standard"]/pattern'))
             currency_formats[elem.attrib.get('type')] = \
                 numbers.parse_pattern(pattern)
 
diff --git a/tests/test_numbers.py b/tests/test_numbers.py
index 99e0d1b..fae6c71 100644
--- a/tests/test_numbers.py
+++ b/tests/test_numbers.py
@@ -175,11 +175,11 @@ class NumberParsingTestCase(unittest.TestCase):
 
 
 def test_get_currency_name():
-    assert numbers.get_currency_name('USD', 'en_US') == u'US dollars'
+    assert numbers.get_currency_name('USD', locale='en_US') == u'US Dollar'
 
 
 def test_get_currency_symbol():
-    assert numbers.get_currency_symbol('USD', 'en_US') == u'$'
+    assert numbers.get_currency_symbol('USD', locale='en_US') == u'$'
 
 
 def test_get_territory_currencies():
author	Armin Ronacher <armin.ronacher@active-4.com>	2013-11-20 19:30:22 +0000
committer	Armin Ronacher <armin.ronacher@active-4.com>	2013-11-20 19:30:22 +0000
commit	774047ae71b3b16fb1db2b5010b31175ce61f8a5 (patch)
tree	590730322a0b67371a67e092fe31f48657565e33
parent	7edfab980eb89e06ee97c576b0778283f1c40707 (diff)
download	babel-feature/cldr24.tar.gz